diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index da04880348af6..122d717b0ef1b 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -4498,11 +4498,9 @@ static SDValue lowerScalarInsert(SDValue Scalar, SDValue VL, MVT VT, } // Can this shuffle be performed on exactly one (possibly larger) input? -static SDValue getSingleShuffleSrc(MVT VT, MVT ContainerVT, SDValue V1, - SDValue V2) { +static SDValue getSingleShuffleSrc(MVT VT, SDValue V1, SDValue V2) { - if (V2.isUndef() && - RISCVTargetLowering::getLMUL(ContainerVT) != RISCVII::VLMUL::LMUL_8) + if (V2.isUndef()) return V1; // Both input must be extracts. @@ -5577,7 +5575,7 @@ static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, unsigned Index = 0; if (ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, Factor, Index) && 1 < count_if(Mask, [](int Idx) { return Idx != -1; })) { - if (SDValue Src = getSingleShuffleSrc(VT, ContainerVT, V1, V2)) + if (SDValue Src = getSingleShuffleSrc(VT, V1, V2)) return getDeinterleaveShiftAndTrunc(DL, VT, Src, Factor, Index, DAG); } } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll index 3c28e978842b9..4e5ef9c002f1a 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shufflevector-vnsrl.ll @@ -517,15 +517,15 @@ entry: ret void } -; Can't match the m8 result type as the source would have to be m16 which -; isn't a legal type. +; FIXME: We could use a smaller vl for the vnsrl since some elts are undefined. define void @vnsrl_0_i32_single_src_m8(ptr %in, ptr %out) { ; V-LABEL: vnsrl_0_i32_single_src_m8: ; V: # %bb.0: # %entry ; V-NEXT: li a2, 64 ; V-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; V-NEXT: vle32.v v8, (a0) -; V-NEXT: vsetivli zero, 16, e32, m2, ta, ma +; V-NEXT: li a0, 32 +; V-NEXT: vsetvli zero, a0, e32, m4, ta, ma ; V-NEXT: vnsrl.wi v16, v8, 0 ; V-NEXT: vsetvli zero, a2, e32, m8, ta, ma ; V-NEXT: vse32.v v16, (a1) @@ -551,3 +551,36 @@ entry: store <64 x i32> %shuffle.i5, ptr %out, align 4 ret void } + +define void @vnsrl_0_i32_single_src_m8_2(ptr %in, ptr %out) { +; V-LABEL: vnsrl_0_i32_single_src_m8_2: +; V: # %bb.0: # %entry +; V-NEXT: li a2, 64 +; V-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; V-NEXT: vle32.v v8, (a0) +; V-NEXT: li a0, 32 +; V-NEXT: vsetvli zero, a0, e32, m4, ta, ma +; V-NEXT: vnsrl.wi v16, v8, 0 +; V-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; V-NEXT: vse32.v v16, (a1) +; V-NEXT: ret +; +; ZVE32F-LABEL: vnsrl_0_i32_single_src_m8_2: +; ZVE32F: # %bb.0: # %entry +; ZVE32F-NEXT: li a2, 64 +; ZVE32F-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; ZVE32F-NEXT: vle32.v v8, (a0) +; ZVE32F-NEXT: lui a0, 349525 +; ZVE32F-NEXT: addi a0, a0, 1365 +; ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; ZVE32F-NEXT: vmv.v.x v16, a0 +; ZVE32F-NEXT: vsetvli zero, a2, e32, m8, ta, ma +; ZVE32F-NEXT: vcompress.vm v24, v8, v16 +; ZVE32F-NEXT: vse32.v v24, (a1) +; ZVE32F-NEXT: ret +entry: + %0 = load <64 x i32>, ptr %in, align 4 + %shuffle.i5 = shufflevector <64 x i32> %0, <64 x i32> poison, <64 x i32> + store <64 x i32> %shuffle.i5, ptr %out, align 4 + ret void +}