diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 1e185956bd30e..4544a922def1a 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4446,34 +4446,9 @@ static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, VL); } -// Is this a shuffle extracts either the even or odd elements of a vector? -// That is, specifically, either (a) or (b) in the options below. -// Single operand shuffle is easy: -// a) t35: v8i8 = vector_shuffle<0,2,4,6,u,u,u,u> t34, undef -// b) t35: v8i8 = vector_shuffle<1,3,5,7,u,u,u,u> t34, undef -// Double operand shuffle: -// t34: v8i8 = extract_subvector t11, Constant:i64<0> -// t33: v8i8 = extract_subvector t11, Constant:i64<8> -// a) t35: v8i8 = vector_shuffle<0,2,4,6,8,10,12,14> t34, t33 -// b) t35: v8i8 = vector_shuffle<1,3,5,7,9,11,13,15> t34, t33 -static SDValue isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1, - SDValue V2, ArrayRef Mask, - const RISCVSubtarget &Subtarget) { - // Need to be able to widen the vector. - if (VT.getScalarSizeInBits() >= Subtarget.getELen()) - return SDValue(); - - // First index must be the first even or odd element from V1. - if (Mask[0] != 0 && Mask[0] != 1) - return SDValue(); - - // The others must increase by 2 each time. - for (unsigned i = 1; i != Mask.size(); ++i) - if (Mask[i] != -1 && Mask[i] != Mask[0] + (int)i * 2) - return SDValue(); - - if (1 == count_if(Mask, [](int Idx) { return Idx != -1; })) - return SDValue(); +// Can this shuffle be performed on exactly one (possibly larger) input? +static SDValue getSingleShuffleSrc(MVT VT, MVT ContainerVT, SDValue V1, + SDValue V2) { if (V2.isUndef() && RISCVTargetLowering::getLMUL(ContainerVT) != RISCVII::VLMUL::LMUL_8) @@ -4490,12 +4465,13 @@ static SDValue isDeinterleaveShuffle(MVT VT, MVT ContainerVT, SDValue V1, return SDValue(); // Src needs to have twice the number of elements. - if (Src.getValueType().getVectorNumElements() != (Mask.size() * 2)) + unsigned NumElts = VT.getVectorNumElements(); + if (Src.getValueType().getVectorNumElements() != (NumElts * 2)) return SDValue(); // The extracts must extract the two halves of the source. if (V1.getConstantOperandVal(1) != 0 || - V2.getConstantOperandVal(1) != Mask.size()) + V2.getConstantOperandVal(1) != NumElts) return SDValue(); return Src; @@ -4612,36 +4588,29 @@ static int isElementRotate(int &LoSrc, int &HiSrc, ArrayRef Mask) { return Rotation; } -// Lower a deinterleave shuffle to vnsrl. -// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (EvenElts == true) -// -> [p, q, r, s] (EvenElts == false) -// VT is the type of the vector to return, <[vscale x ]n x ty> -// Src is the vector to deinterleave of type <[vscale x ]n*2 x ty> -static SDValue getDeinterleaveViaVNSRL(const SDLoc &DL, MVT VT, SDValue Src, - bool EvenElts, SelectionDAG &DAG) { - // The result is a vector of type . The source is a vector of - // type (For the single source case, the high half is undef) - if (Src.getValueType() == VT) { - EVT WideVT = VT.getDoubleNumVectorElementsVT(); - Src = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideVT, DAG.getUNDEF(WideVT), - Src, DAG.getVectorIdxConstant(0, DL)); - } - - // Bitcast the source vector from -> - // This also converts FP to int. +// Lower a deinterleave shuffle to SRL and TRUNC. Factor must be +// 2, 4, 8 and the integer type Factor-times larger than VT's +// element type must be a legal element type. +// [a, p, b, q, c, r, d, s] -> [a, b, c, d] (Factor=2, Index=0) +// -> [p, q, r, s] (Factor=2, Index=1) +static SDValue getDeinterleaveShiftAndTrunc(const SDLoc &DL, MVT VT, + SDValue Src, unsigned Factor, + unsigned Index, SelectionDAG &DAG) { unsigned EltBits = VT.getScalarSizeInBits(); - MVT WideSrcVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * 2), - VT.getVectorElementCount()); + ElementCount SrcEC = Src.getValueType().getVectorElementCount(); + MVT WideSrcVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits * Factor), + SrcEC.divideCoefficientBy(Factor)); + MVT ResVT = MVT::getVectorVT(MVT::getIntegerVT(EltBits), + SrcEC.divideCoefficientBy(Factor)); Src = DAG.getBitcast(WideSrcVT, Src); - MVT IntVT = VT.changeVectorElementTypeToInteger(); - - // If we want even elements, then the shift amount is 0. Otherwise, shift by - // the original element size. - unsigned Shift = EvenElts ? 0 : EltBits; + unsigned Shift = Index * EltBits; SDValue Res = DAG.getNode(ISD::SRL, DL, WideSrcVT, Src, DAG.getConstant(Shift, DL, WideSrcVT)); - Res = DAG.getNode(ISD::TRUNCATE, DL, IntVT, Res); + Res = DAG.getNode(ISD::TRUNCATE, DL, ResVT, Res); + MVT IntVT = VT.changeVectorElementTypeToInteger(); + Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, IntVT, DAG.getUNDEF(IntVT), Res, + DAG.getVectorIdxConstant(0, DL)); return DAG.getBitcast(VT, Res); } @@ -5332,11 +5301,24 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, if (ShuffleVectorInst::isReverseMask(Mask, NumElts) && V2.isUndef()) return DAG.getNode(ISD::VECTOR_REVERSE, DL, VT, V1); - // If this is a deinterleave and we can widen the vector, then we can use - // vnsrl to deinterleave. - if (SDValue Src = - isDeinterleaveShuffle(VT, ContainerVT, V1, V2, Mask, Subtarget)) - return getDeinterleaveViaVNSRL(DL, VT, Src, Mask[0] == 0, DAG); + // If this is a deinterleave(2,4,8) and we can widen the vector, then we can + // use shift and truncate to perform the shuffle. + // TODO: For Factor=6, we can perform the first step of the deinterleave via + // shift-and-trunc reducing total cost for everything except an mf8 result. + // TODO: For Factor=4,8, we can do the same when the ratio isn't high enough + // to do the entire operation. + if (VT.getScalarSizeInBits() < Subtarget.getELen()) { + const unsigned MaxFactor = Subtarget.getELen() / VT.getScalarSizeInBits(); + assert(MaxFactor == 2 || MaxFactor == 4 || MaxFactor == 8); + for (unsigned Factor = 2; Factor <= MaxFactor; Factor <<= 1) { + unsigned Index = 0; + if (ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, Factor, Index) && + 1 < count_if(Mask, [](int Idx) { return Idx != -1; })) { + if (SDValue Src = getSingleShuffleSrc(VT, ContainerVT, V1, V2)) + return getDeinterleaveShiftAndTrunc(DL, VT, Src, Factor, Index, DAG); + } + } + } if (SDValue V = lowerVECTOR_SHUFFLEAsVSlideup(DL, VT, V1, V2, Mask, Subtarget, DAG)) @@ -10739,8 +10721,8 @@ SDValue RISCVTargetLowering::lowerVECTOR_DEINTERLEAVE(SDValue Op, // We can deinterleave through vnsrl.wi if the element type is smaller than // ELEN if (VecVT.getScalarSizeInBits() < Subtarget.getELen()) { - SDValue Even = getDeinterleaveViaVNSRL(DL, VecVT, Concat, true, DAG); - SDValue Odd = getDeinterleaveViaVNSRL(DL, VecVT, Concat, false, DAG); + SDValue Even = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 0, DAG); + SDValue Odd = getDeinterleaveShiftAndTrunc(DL, VecVT, Concat, 2, 1, DAG); return DAG.getMergeValues({Even, Odd}, DL); } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll index ae5dbfa4bf30b..ede25d2c9bb07 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-deinterleave-load.ll @@ -24,19 +24,20 @@ define {<16 x i1>, <16 x i1>} @vector_deinterleave_load_v16i1_v32i1(ptr %p) { ; CHECK-NEXT: vadd.vi v12, v11, -16 ; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; CHECK-NEXT: vslidedown.vi v0, v8, 2 -; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vadd.vi v11, v11, -15 ; CHECK-NEXT: vmerge.vim v13, v10, 1, v0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vmerge.vim v14, v10, 1, v0 -; CHECK-NEXT: vnsrl.wi v8, v14, 0 +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v10, v8, 0 +; CHECK-NEXT: vnsrl.wi v8, v8, 8 ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vrgather.vv v8, v13, v12, v0.t -; CHECK-NEXT: vnsrl.wi v12, v14, 8 -; CHECK-NEXT: vmsne.vi v10, v8, 0 -; CHECK-NEXT: vrgather.vv v12, v13, v11, v0.t -; CHECK-NEXT: vmsne.vi v8, v12, 0 -; CHECK-NEXT: vmv.v.v v0, v10 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vrgather.vv v10, v13, v12, v0.t +; CHECK-NEXT: vrgather.vv v8, v13, v11, v0.t +; CHECK-NEXT: vmsne.vi v0, v10, 0 +; CHECK-NEXT: vmsne.vi v8, v8, 0 ; CHECK-NEXT: ret %vec = load <32 x i1>, ptr %p %retval = call {<16 x i1>, <16 x i1>} @llvm.vector.deinterleave2.v32i1(<32 x i1> %vec) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll index 21417fe8deefb..5d307211ead6e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll @@ -721,24 +721,12 @@ define <8 x i32> @shuffle_v8i32_2(<8 x i32> %x, <8 x i32> %y) { define <8 x i8> @shuffle_v64i8_v8i8(<64 x i8> %wide.vec) { ; CHECK-LABEL: shuffle_v64i8_v8i8: ; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, 4112 -; CHECK-NEXT: li a1, 240 -; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma -; CHECK-NEXT: vmv.s.x v0, a1 -; CHECK-NEXT: li a1, 32 -; CHECK-NEXT: addi a0, a0, 257 -; CHECK-NEXT: vmv.s.x v14, a0 -; CHECK-NEXT: lui a0, 98561 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vcompress.vm v12, v8, v14 -; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-NEXT: vslidedown.vx v8, v8, a1 -; CHECK-NEXT: addi a0, a0, -2048 ; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-NEXT: vmv.v.x v10, a0 -; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, mu -; CHECK-NEXT: vrgather.vv v12, v8, v10, v0.t -; CHECK-NEXT: vmv1r.v v8, v12 +; CHECK-NEXT: vnsrl.wi v12, v8, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v12, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0 ; CHECK-NEXT: ret %s = shufflevector <64 x i8> %wide.vec, <64 x i8> poison, <8 x i32> ret <8 x i8> %s diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll index 9d2c722334b08..66f95b7077672 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-changes-length.ll @@ -104,7 +104,7 @@ define <4 x i32> @v4i32_v16i32(<16 x i32>) { ; RV32-NEXT: vmv.v.i v0, 10 ; RV32-NEXT: vsetivli zero, 2, e16, m1, tu, ma ; RV32-NEXT: vslideup.vi v14, v12, 1 -; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32-NEXT: vnsrl.wx v12, v8, a0 ; RV32-NEXT: vsetivli zero, 8, e32, m4, ta, ma ; RV32-NEXT: vslidedown.vi v8, v8, 8 @@ -116,9 +116,8 @@ define <4 x i32> @v4i32_v16i32(<16 x i32>) { ; RV64-LABEL: v4i32_v16i32: ; RV64: # %bb.0: ; RV64-NEXT: li a0, 32 -; RV64-NEXT: vsetivli zero, 1, e8, mf8, ta, ma +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64-NEXT: vmv.v.i v0, 10 -; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma ; RV64-NEXT: vnsrl.wx v12, v8, a0 ; RV64-NEXT: vsetivli zero, 8, e32, m4, ta, ma ; RV64-NEXT: vslidedown.vi v8, v8, 8 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll index 6450174d44ca8..08fd4fb85ff3f 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-deinterleave.ll @@ -67,22 +67,12 @@ define void @deinterleave4_0_i8(ptr %in, ptr %out) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: li a0, -1 -; CHECK-NEXT: vmv.v.i v0, 12 -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v9, v8, 4 -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vwaddu.vv v10, v8, v9 -; CHECK-NEXT: vwmaccu.vx v10, a0, v9 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: vsll.vi v9, v9, 2 -; CHECK-NEXT: vadd.vi v9, v9, -8 -; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v8, v8, 8 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vrgather.vv v10, v8, v9, v0.t -; CHECK-NEXT: vse8.v v10, (a1) +; CHECK-NEXT: vse8.v v8, (a1) ; CHECK-NEXT: ret entry: %0 = load <16 x i8>, ptr %in, align 1 @@ -96,20 +86,11 @@ define void @deinterleave4_8_i8(ptr %in, ptr %out) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: li a0, -1 -; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v9, v8, 8 -; CHECK-NEXT: vsetivli zero, 4, e8, mf2, ta, ma -; CHECK-NEXT: vslidedown.vi v10, v9, 4 -; CHECK-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-NEXT: vwaddu.vv v11, v9, v10 -; CHECK-NEXT: vwmaccu.vx v11, a0, v10 -; CHECK-NEXT: li a0, 34 -; CHECK-NEXT: vmv.v.i v0, 12 -; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 8 +; CHECK-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-NEXT: vcompress.vm v10, v8, v9 -; CHECK-NEXT: vmerge.vvm v8, v10, v11, v0 ; CHECK-NEXT: vse8.v v8, (a1) ; CHECK-NEXT: ret entry: @@ -268,10 +249,12 @@ define void @deinterleave8_0_i8(ptr %in, ptr %out) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v9, v8, 8 -; CHECK-NEXT: vsetivli zero, 2, e8, mf2, tu, ma -; CHECK-NEXT: vslideup.vi v8, v9, 1 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0 ; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma ; CHECK-NEXT: vse8.v v8, (a1) ; CHECK-NEXT: ret @@ -287,12 +270,14 @@ define void @deinterleave8_8_i8(ptr %in, ptr %out) { ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vmv.v.i v0, -3 -; CHECK-NEXT: vsetivli zero, 8, e8, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v9, v8, 8 -; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, mu -; CHECK-NEXT: vrgather.vi v9, v8, 1, v0.t -; CHECK-NEXT: vse8.v v9, (a1) +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 8 +; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0 +; CHECK-NEXT: vsetvli zero, zero, e8, mf8, ta, ma +; CHECK-NEXT: vnsrl.wi v8, v8, 0 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vse8.v v8, (a1) ; CHECK-NEXT: ret entry: %0 = load <16 x i8>, ptr %in, align 1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll index 312520ae28374..3aa16070470ea 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll @@ -441,13 +441,25 @@ entry: } define void @vnsrl_0_i8_single_src(ptr %in, ptr %out) { -; CHECK-LABEL: vnsrl_0_i8_single_src: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, ma -; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vnsrl.wi v8, v8, 0 -; CHECK-NEXT: vse8.v v8, (a1) -; CHECK-NEXT: ret +; V-LABEL: vnsrl_0_i8_single_src: +; V: # %bb.0: # %entry +; V-NEXT: vsetivli zero, 8, e8, mf4, ta, ma +; V-NEXT: vle8.v v8, (a0) +; V-NEXT: vsetivli zero, 4, e8, mf8, ta, ma +; V-NEXT: vnsrl.wi v8, v8, 0 +; V-NEXT: vsetivli zero, 8, e8, mf4, ta, ma +; V-NEXT: vse8.v v8, (a1) +; V-NEXT: ret +; +; ZVE32F-LABEL: vnsrl_0_i8_single_src: +; ZVE32F: # %bb.0: # %entry +; ZVE32F-NEXT: vsetivli zero, 8, e8, mf4, ta, ma +; ZVE32F-NEXT: vle8.v v8, (a0) +; ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVE32F-NEXT: vnsrl.wi v8, v8, 0 +; ZVE32F-NEXT: vsetivli zero, 8, e8, mf4, ta, ma +; ZVE32F-NEXT: vse8.v v8, (a1) +; ZVE32F-NEXT: ret entry: %0 = load <8 x i8>, ptr %in, align 1 %shuffle.i5 = shufflevector <8 x i8> %0, <8 x i8> poison, <8 x i32> @@ -456,13 +468,25 @@ entry: } define void @vnsrl_0_i8_single_src2(ptr %in, ptr %out) { -; CHECK-LABEL: vnsrl_0_i8_single_src2: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: vsetivli zero, 8, e8, mf4, ta, ma -; CHECK-NEXT: vle8.v v8, (a0) -; CHECK-NEXT: vnsrl.wi v8, v8, 0 -; CHECK-NEXT: vse8.v v8, (a1) -; CHECK-NEXT: ret +; V-LABEL: vnsrl_0_i8_single_src2: +; V: # %bb.0: # %entry +; V-NEXT: vsetivli zero, 8, e8, mf4, ta, ma +; V-NEXT: vle8.v v8, (a0) +; V-NEXT: vsetivli zero, 4, e8, mf8, ta, ma +; V-NEXT: vnsrl.wi v8, v8, 0 +; V-NEXT: vsetivli zero, 8, e8, mf4, ta, ma +; V-NEXT: vse8.v v8, (a1) +; V-NEXT: ret +; +; ZVE32F-LABEL: vnsrl_0_i8_single_src2: +; ZVE32F: # %bb.0: # %entry +; ZVE32F-NEXT: vsetivli zero, 8, e8, mf4, ta, ma +; ZVE32F-NEXT: vle8.v v8, (a0) +; ZVE32F-NEXT: vsetivli zero, 4, e8, mf4, ta, ma +; ZVE32F-NEXT: vnsrl.wi v8, v8, 0 +; ZVE32F-NEXT: vsetivli zero, 8, e8, mf4, ta, ma +; ZVE32F-NEXT: vse8.v v8, (a1) +; ZVE32F-NEXT: ret entry: %0 = load <8 x i8>, ptr %in, align 1 %shuffle.i5 = shufflevector <8 x i8> %0, <8 x i8> poison, <8 x i32> diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll index 41cf886c3ab75..6de846b2582da 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-deinterleave-fixed.ll @@ -19,18 +19,19 @@ define {<16 x i1>, <16 x i1>} @vector_deinterleave_v16i1_v32i1(<32 x i1> %vec) { ; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 ; CHECK-NEXT: vmv1r.v v0, v8 -; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, mu -; CHECK-NEXT: vmerge.vim v14, v10, 1, v0 -; CHECK-NEXT: vadd.vi v8, v12, -16 +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma +; CHECK-NEXT: vmerge.vim v8, v10, 1, v0 +; CHECK-NEXT: vadd.vi v10, v12, -16 ; CHECK-NEXT: vadd.vi v12, v12, -15 -; CHECK-NEXT: vnsrl.wi v10, v14, 0 +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vnsrl.wi v13, v8, 0 +; CHECK-NEXT: vnsrl.wi v8, v8, 8 ; CHECK-NEXT: vmv1r.v v0, v9 -; CHECK-NEXT: vrgather.vv v10, v11, v8, v0.t -; CHECK-NEXT: vnsrl.wi v8, v14, 8 -; CHECK-NEXT: vmsne.vi v10, v10, 0 +; CHECK-NEXT: vsetivli zero, 16, e8, m1, ta, mu +; CHECK-NEXT: vrgather.vv v13, v11, v10, v0.t ; CHECK-NEXT: vrgather.vv v8, v11, v12, v0.t +; CHECK-NEXT: vmsne.vi v0, v13, 0 ; CHECK-NEXT: vmsne.vi v8, v8, 0 -; CHECK-NEXT: vmv.v.v v0, v10 ; CHECK-NEXT: ret %retval = call {<16 x i1>, <16 x i1>} @llvm.vector.deinterleave2.v32i1(<32 x i1> %vec) ret {<16 x i1>, <16 x i1>} %retval