diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 05e656ac81702..9b87dc3959a07 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -3456,6 +3456,27 @@ static SDValue lowerBuildVectorOfConstants(SDValue Op, SelectionDAG &DAG, if (SDValue Res = lowerBuildVectorViaDominantValues(Op, DAG, Subtarget)) return Res; + // If the number of signbits allows, see if we can lower as a . + // We restrict this to N <= 4 to ensure the resulting narrow vector is + // 32 bits of smaller and can thus be materialized cheaply from scalar. + // The main motivation for this is the constant index vector required + // by vrgather.vv. This covers all indice vectors up to size 4. + // TODO: We really should be costing the smaller vector. There are + // profitable cases this misses. + const unsigned ScalarSize = + Op.getSimpleValueType().getScalarSizeInBits(); + if (ScalarSize > 8 && NumElts <= 4) { + unsigned SignBits = DAG.ComputeNumSignBits(Op); + if (ScalarSize - SignBits < 8) { + SDValue Source = + DAG.getNode(ISD::TRUNCATE, DL, VT.changeVectorElementType(MVT::i8), Op); + Source = convertToScalableVector(ContainerVT.changeVectorElementType(MVT::i8), + Source, DAG, Subtarget); + SDValue Res = DAG.getNode(RISCVISD::VSEXT_VL, DL, ContainerVT, Source, Mask, VL); + return convertFromScalableVector(VT, Res, DAG, Subtarget); + } + } + // For constant vectors, use generic constant pool lowering. Otherwise, // we'd have to materialize constants in GPRs just to move them into the // vector. diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll index 8ea9c15e86208..decd6ae099754 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-extract.ll @@ -806,18 +806,19 @@ define i32 @extractelt_sdiv_v4i32(<4 x i32> %x) { ; RV32NOM: # %bb.0: ; RV32NOM-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV32NOM-NEXT: vmv.v.i v9, 0 -; RV32NOM-NEXT: li a0, -1 -; RV32NOM-NEXT: vslide1down.vx v9, v9, a0 ; RV32NOM-NEXT: lui a0, %hi(.LCPI42_0) ; RV32NOM-NEXT: addi a0, a0, %lo(.LCPI42_0) ; RV32NOM-NEXT: vle32.v v10, (a0) -; RV32NOM-NEXT: lui a0, %hi(.LCPI42_1) -; RV32NOM-NEXT: addi a0, a0, %lo(.LCPI42_1) -; RV32NOM-NEXT: vle32.v v11, (a0) +; RV32NOM-NEXT: li a0, -1 +; RV32NOM-NEXT: vslide1down.vx v9, v9, a0 ; RV32NOM-NEXT: vand.vv v9, v8, v9 ; RV32NOM-NEXT: vmulh.vv v8, v8, v10 ; RV32NOM-NEXT: vadd.vv v8, v8, v9 -; RV32NOM-NEXT: vsra.vv v9, v8, v11 +; RV32NOM-NEXT: lui a0, 12320 +; RV32NOM-NEXT: addi a0, a0, 257 +; RV32NOM-NEXT: vmv.s.x v9, a0 +; RV32NOM-NEXT: vsext.vf4 v10, v9 +; RV32NOM-NEXT: vsra.vv v9, v8, v10 ; RV32NOM-NEXT: vsrl.vi v8, v8, 31 ; RV32NOM-NEXT: vadd.vv v8, v9, v8 ; RV32NOM-NEXT: vslidedown.vi v8, v8, 2 @@ -841,18 +842,19 @@ define i32 @extractelt_sdiv_v4i32(<4 x i32> %x) { ; RV64NOM: # %bb.0: ; RV64NOM-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; RV64NOM-NEXT: vmv.v.i v9, 0 -; RV64NOM-NEXT: li a0, -1 -; RV64NOM-NEXT: vslide1down.vx v9, v9, a0 ; RV64NOM-NEXT: lui a0, %hi(.LCPI42_0) ; RV64NOM-NEXT: addi a0, a0, %lo(.LCPI42_0) ; RV64NOM-NEXT: vle32.v v10, (a0) -; RV64NOM-NEXT: lui a0, %hi(.LCPI42_1) -; RV64NOM-NEXT: addi a0, a0, %lo(.LCPI42_1) -; RV64NOM-NEXT: vle32.v v11, (a0) +; RV64NOM-NEXT: li a0, -1 +; RV64NOM-NEXT: vslide1down.vx v9, v9, a0 ; RV64NOM-NEXT: vand.vv v9, v8, v9 ; RV64NOM-NEXT: vmulh.vv v8, v8, v10 ; RV64NOM-NEXT: vadd.vv v8, v8, v9 -; RV64NOM-NEXT: vsra.vv v8, v8, v11 +; RV64NOM-NEXT: lui a0, 12320 +; RV64NOM-NEXT: addiw a0, a0, 257 +; RV64NOM-NEXT: vmv.s.x v9, a0 +; RV64NOM-NEXT: vsext.vf4 v10, v9 +; RV64NOM-NEXT: vsra.vv v8, v8, v10 ; RV64NOM-NEXT: vsrl.vi v9, v8, 31 ; RV64NOM-NEXT: vadd.vv v8, v8, v9 ; RV64NOM-NEXT: vslidedown.vi v8, v8, 2 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll index cc6abfd8f2645..6fa9cddde622c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-interleave.ll @@ -435,40 +435,48 @@ define <4 x float> @unary_interleave_v4f32(<4 x float> %x) { define <4 x double> @unary_interleave_v4f64(<4 x double> %x) { ; RV32-V128-LABEL: unary_interleave_v4f64: ; RV32-V128: # %bb.0: -; RV32-V128-NEXT: lui a0, %hi(.LCPI13_0) -; RV32-V128-NEXT: addi a0, a0, %lo(.LCPI13_0) -; RV32-V128-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-V128-NEXT: vle16.v v12, (a0) +; RV32-V128-NEXT: lui a0, 12304 +; RV32-V128-NEXT: addi a0, a0, 512 +; RV32-V128-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-V128-NEXT: vmv.s.x v10, a0 +; RV32-V128-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV32-V128-NEXT: vsext.vf2 v12, v10 +; RV32-V128-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; RV32-V128-NEXT: vrgatherei16.vv v10, v8, v12 ; RV32-V128-NEXT: vmv.v.v v8, v10 ; RV32-V128-NEXT: ret ; ; RV64-V128-LABEL: unary_interleave_v4f64: ; RV64-V128: # %bb.0: -; RV64-V128-NEXT: lui a0, %hi(.LCPI13_0) -; RV64-V128-NEXT: addi a0, a0, %lo(.LCPI13_0) +; RV64-V128-NEXT: lui a0, 12304 +; RV64-V128-NEXT: addiw a0, a0, 512 ; RV64-V128-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64-V128-NEXT: vle64.v v12, (a0) +; RV64-V128-NEXT: vmv.s.x v10, a0 +; RV64-V128-NEXT: vsext.vf8 v12, v10 ; RV64-V128-NEXT: vrgather.vv v10, v8, v12 ; RV64-V128-NEXT: vmv.v.v v8, v10 ; RV64-V128-NEXT: ret ; ; RV32-V512-LABEL: unary_interleave_v4f64: ; RV32-V512: # %bb.0: -; RV32-V512-NEXT: lui a0, %hi(.LCPI13_0) -; RV32-V512-NEXT: addi a0, a0, %lo(.LCPI13_0) -; RV32-V512-NEXT: vsetivli zero, 4, e64, m1, ta, ma -; RV32-V512-NEXT: vle16.v v10, (a0) +; RV32-V512-NEXT: lui a0, 12304 +; RV32-V512-NEXT: addi a0, a0, 512 +; RV32-V512-NEXT: vsetivli zero, 4, e32, mf2, ta, ma +; RV32-V512-NEXT: vmv.s.x v9, a0 +; RV32-V512-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; RV32-V512-NEXT: vsext.vf2 v10, v9 +; RV32-V512-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; RV32-V512-NEXT: vrgatherei16.vv v9, v8, v10 ; RV32-V512-NEXT: vmv.v.v v8, v9 ; RV32-V512-NEXT: ret ; ; RV64-V512-LABEL: unary_interleave_v4f64: ; RV64-V512: # %bb.0: -; RV64-V512-NEXT: lui a0, %hi(.LCPI13_0) -; RV64-V512-NEXT: addi a0, a0, %lo(.LCPI13_0) +; RV64-V512-NEXT: lui a0, 12304 +; RV64-V512-NEXT: addiw a0, a0, 512 ; RV64-V512-NEXT: vsetivli zero, 4, e64, m1, ta, ma -; RV64-V512-NEXT: vle64.v v10, (a0) +; RV64-V512-NEXT: vmv.s.x v9, a0 +; RV64-V512-NEXT: vsext.vf8 v10, v9 ; RV64-V512-NEXT: vrgather.vv v9, v8, v10 ; RV64-V512-NEXT: vmv.v.v v8, v9 ; RV64-V512-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll index 9fe1eb7f7ed2b..8d66248a1e57d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fp-shuffles.ll @@ -57,20 +57,24 @@ define <4 x double> @shuffle_vf_v4f64(<4 x double> %x) { define <4 x double> @vrgather_permute_shuffle_vu_v4f64(<4 x double> %x) { ; RV32-LABEL: vrgather_permute_shuffle_vu_v4f64: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI4_0) -; RV32-NEXT: addi a0, a0, %lo(.LCPI4_0) -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vle16.v v12, (a0) +; RV32-NEXT: lui a0, 4096 +; RV32-NEXT: addi a0, a0, 513 +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vmv.s.x v10, a0 +; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV32-NEXT: vsext.vf2 v12, v10 +; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; RV32-NEXT: vrgatherei16.vv v10, v8, v12 ; RV32-NEXT: vmv.v.v v8, v10 ; RV32-NEXT: ret ; ; RV64-LABEL: vrgather_permute_shuffle_vu_v4f64: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI4_0) -; RV64-NEXT: addi a0, a0, %lo(.LCPI4_0) +; RV64-NEXT: lui a0, 4096 +; RV64-NEXT: addiw a0, a0, 513 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64-NEXT: vle64.v v12, (a0) +; RV64-NEXT: vmv.s.x v10, a0 +; RV64-NEXT: vsext.vf8 v12, v10 ; RV64-NEXT: vrgather.vv v10, v8, v12 ; RV64-NEXT: vmv.v.v v8, v10 ; RV64-NEXT: ret @@ -81,20 +85,24 @@ define <4 x double> @vrgather_permute_shuffle_vu_v4f64(<4 x double> %x) { define <4 x double> @vrgather_permute_shuffle_uv_v4f64(<4 x double> %x) { ; RV32-LABEL: vrgather_permute_shuffle_uv_v4f64: ; RV32: # %bb.0: -; RV32-NEXT: lui a0, %hi(.LCPI5_0) -; RV32-NEXT: addi a0, a0, %lo(.LCPI5_0) -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: vle16.v v12, (a0) +; RV32-NEXT: lui a0, 4096 +; RV32-NEXT: addi a0, a0, 513 +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vmv.s.x v10, a0 +; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV32-NEXT: vsext.vf2 v12, v10 +; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; RV32-NEXT: vrgatherei16.vv v10, v8, v12 ; RV32-NEXT: vmv.v.v v8, v10 ; RV32-NEXT: ret ; ; RV64-LABEL: vrgather_permute_shuffle_uv_v4f64: ; RV64: # %bb.0: -; RV64-NEXT: lui a0, %hi(.LCPI5_0) -; RV64-NEXT: addi a0, a0, %lo(.LCPI5_0) +; RV64-NEXT: lui a0, 4096 +; RV64-NEXT: addiw a0, a0, 513 ; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64-NEXT: vle64.v v12, (a0) +; RV64-NEXT: vmv.s.x v10, a0 +; RV64-NEXT: vsext.vf8 v12, v10 ; RV64-NEXT: vrgather.vv v10, v8, v12 ; RV64-NEXT: vmv.v.v v8, v10 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll index 790ebe82a1e4c..f9a64498afacc 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-interleave.ll @@ -668,40 +668,48 @@ define <4 x i32> @unary_interleave_v4i32(<4 x i32> %x) { define <4 x i64> @unary_interleave_v4i64(<4 x i64> %x) { ; RV32-V128-LABEL: unary_interleave_v4i64: ; RV32-V128: # %bb.0: -; RV32-V128-NEXT: lui a0, %hi(.LCPI22_0) -; RV32-V128-NEXT: addi a0, a0, %lo(.LCPI22_0) -; RV32-V128-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-V128-NEXT: vle16.v v12, (a0) +; RV32-V128-NEXT: lui a0, 12304 +; RV32-V128-NEXT: addi a0, a0, 512 +; RV32-V128-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-V128-NEXT: vmv.s.x v10, a0 +; RV32-V128-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV32-V128-NEXT: vsext.vf2 v12, v10 +; RV32-V128-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; RV32-V128-NEXT: vrgatherei16.vv v10, v8, v12 ; RV32-V128-NEXT: vmv.v.v v8, v10 ; RV32-V128-NEXT: ret ; ; RV64-V128-LABEL: unary_interleave_v4i64: ; RV64-V128: # %bb.0: -; RV64-V128-NEXT: lui a0, %hi(.LCPI22_0) -; RV64-V128-NEXT: addi a0, a0, %lo(.LCPI22_0) +; RV64-V128-NEXT: lui a0, 12304 +; RV64-V128-NEXT: addiw a0, a0, 512 ; RV64-V128-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64-V128-NEXT: vle64.v v12, (a0) +; RV64-V128-NEXT: vmv.s.x v10, a0 +; RV64-V128-NEXT: vsext.vf8 v12, v10 ; RV64-V128-NEXT: vrgather.vv v10, v8, v12 ; RV64-V128-NEXT: vmv.v.v v8, v10 ; RV64-V128-NEXT: ret ; ; RV32-V512-LABEL: unary_interleave_v4i64: ; RV32-V512: # %bb.0: -; RV32-V512-NEXT: lui a0, %hi(.LCPI22_0) -; RV32-V512-NEXT: addi a0, a0, %lo(.LCPI22_0) -; RV32-V512-NEXT: vsetivli zero, 4, e64, m1, ta, ma -; RV32-V512-NEXT: vle16.v v10, (a0) +; RV32-V512-NEXT: lui a0, 12304 +; RV32-V512-NEXT: addi a0, a0, 512 +; RV32-V512-NEXT: vsetivli zero, 4, e32, mf2, ta, ma +; RV32-V512-NEXT: vmv.s.x v9, a0 +; RV32-V512-NEXT: vsetvli zero, zero, e16, mf4, ta, ma +; RV32-V512-NEXT: vsext.vf2 v10, v9 +; RV32-V512-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; RV32-V512-NEXT: vrgatherei16.vv v9, v8, v10 ; RV32-V512-NEXT: vmv.v.v v8, v9 ; RV32-V512-NEXT: ret ; ; RV64-V512-LABEL: unary_interleave_v4i64: ; RV64-V512: # %bb.0: -; RV64-V512-NEXT: lui a0, %hi(.LCPI22_0) -; RV64-V512-NEXT: addi a0, a0, %lo(.LCPI22_0) +; RV64-V512-NEXT: lui a0, 12304 +; RV64-V512-NEXT: addiw a0, a0, 512 ; RV64-V512-NEXT: vsetivli zero, 4, e64, m1, ta, ma -; RV64-V512-NEXT: vle64.v v10, (a0) +; RV64-V512-NEXT: vmv.s.x v9, a0 +; RV64-V512-NEXT: vsext.vf8 v10, v9 ; RV64-V512-NEXT: vrgather.vv v9, v8, v10 ; RV64-V512-NEXT: vmv.v.v v8, v9 ; RV64-V512-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll index 3f2b2d3c2a818..b4f3a0fb197a5 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int-shuffles.ll @@ -51,29 +51,57 @@ define <4 x i16> @shuffle_vx_v4i16(<4 x i16> %x) { } define <4 x i16> @vrgather_permute_shuffle_vu_v4i16(<4 x i16> %x) { -; CHECK-LABEL: vrgather_permute_shuffle_vu_v4i16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI4_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI4_0) -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vle16.v v10, (a0) -; CHECK-NEXT: vrgather.vv v9, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret +; RV32-LABEL: vrgather_permute_shuffle_vu_v4i16: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, 4096 +; RV32-NEXT: addi a0, a0, 513 +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vmv.s.x v9, a0 +; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV32-NEXT: vsext.vf2 v10, v9 +; RV32-NEXT: vrgather.vv v9, v8, v10 +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: vrgather_permute_shuffle_vu_v4i16: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, 4096 +; RV64-NEXT: addiw a0, a0, 513 +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vmv.s.x v9, a0 +; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64-NEXT: vsext.vf2 v10, v9 +; RV64-NEXT: vrgather.vv v9, v8, v10 +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret %s = shufflevector <4 x i16> %x, <4 x i16> poison, <4 x i32> ret <4 x i16> %s } define <4 x i16> @vrgather_permute_shuffle_uv_v4i16(<4 x i16> %x) { -; CHECK-LABEL: vrgather_permute_shuffle_uv_v4i16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a0, %hi(.LCPI5_0) -; CHECK-NEXT: addi a0, a0, %lo(.LCPI5_0) -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vle16.v v10, (a0) -; CHECK-NEXT: vrgather.vv v9, v8, v10 -; CHECK-NEXT: vmv1r.v v8, v9 -; CHECK-NEXT: ret +; RV32-LABEL: vrgather_permute_shuffle_uv_v4i16: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, 4096 +; RV32-NEXT: addi a0, a0, 513 +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vmv.s.x v9, a0 +; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV32-NEXT: vsext.vf2 v10, v9 +; RV32-NEXT: vrgather.vv v9, v8, v10 +; RV32-NEXT: vmv1r.v v8, v9 +; RV32-NEXT: ret +; +; RV64-LABEL: vrgather_permute_shuffle_uv_v4i16: +; RV64: # %bb.0: +; RV64-NEXT: lui a0, 4096 +; RV64-NEXT: addiw a0, a0, 513 +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vmv.s.x v9, a0 +; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64-NEXT: vsext.vf2 v10, v9 +; RV64-NEXT: vrgather.vv v9, v8, v10 +; RV64-NEXT: vmv1r.v v8, v9 +; RV64-NEXT: ret %s = shufflevector <4 x i16> poison, <4 x i16> %x, <4 x i32> ret <4 x i16> %s } diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll index 68309001f445b..dbf7dfbcab49c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-int.ll @@ -1238,27 +1238,53 @@ define void @mulhu_v8i16(ptr %x) { } define void @mulhu_v6i16(ptr %x) { -; CHECK-LABEL: mulhu_v6i16: -; CHECK: # %bb.0: -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vle16.v v8, (a0) -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vid.v v9 -; CHECK-NEXT: vadd.vi v9, v9, 12 -; CHECK-NEXT: vsetivli zero, 2, e16, m1, ta, ma -; CHECK-NEXT: vslidedown.vi v10, v8, 4 -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma -; CHECK-NEXT: vdivu.vv v9, v10, v9 -; CHECK-NEXT: lui a1, %hi(.LCPI67_0) -; CHECK-NEXT: addi a1, a1, %lo(.LCPI67_0) -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vle16.v v10, (a1) -; CHECK-NEXT: vdivu.vv v8, v8, v10 -; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma -; CHECK-NEXT: vslideup.vi v8, v9, 4 -; CHECK-NEXT: vsetivli zero, 6, e16, m1, ta, ma -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; RV32-LABEL: mulhu_v6i16: +; RV32: # %bb.0: +; RV32-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; RV32-NEXT: vle16.v v8, (a0) +; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; RV32-NEXT: vid.v v9 +; RV32-NEXT: vadd.vi v9, v9, 12 +; RV32-NEXT: vsetivli zero, 2, e16, m1, ta, ma +; RV32-NEXT: vslidedown.vi v10, v8, 4 +; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; RV32-NEXT: vdivu.vv v9, v10, v9 +; RV32-NEXT: lui a1, 45217 +; RV32-NEXT: addi a1, a1, -1785 +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vmv.s.x v10, a1 +; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV32-NEXT: vsext.vf2 v11, v10 +; RV32-NEXT: vdivu.vv v8, v8, v11 +; RV32-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV32-NEXT: vslideup.vi v8, v9, 4 +; RV32-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; RV32-NEXT: vse16.v v8, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: mulhu_v6i16: +; RV64: # %bb.0: +; RV64-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; RV64-NEXT: vle16.v v8, (a0) +; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; RV64-NEXT: vid.v v9 +; RV64-NEXT: vadd.vi v9, v9, 12 +; RV64-NEXT: vsetivli zero, 2, e16, m1, ta, ma +; RV64-NEXT: vslidedown.vi v10, v8, 4 +; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; RV64-NEXT: vdivu.vv v9, v10, v9 +; RV64-NEXT: lui a1, 45217 +; RV64-NEXT: addiw a1, a1, -1785 +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vmv.s.x v10, a1 +; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64-NEXT: vsext.vf2 v11, v10 +; RV64-NEXT: vdivu.vv v8, v8, v11 +; RV64-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; RV64-NEXT: vslideup.vi v8, v9, 4 +; RV64-NEXT: vsetivli zero, 6, e16, m1, ta, ma +; RV64-NEXT: vse16.v v8, (a0) +; RV64-NEXT: ret %a = load <6 x i16>, ptr %x %b = udiv <6 x i16> %a, store <6 x i16> %b, ptr %x @@ -1306,12 +1332,13 @@ define void @mulhu_v2i64(ptr %x) { ; RV32-NEXT: vle32.v v9, (a1) ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vmulhu.vv v8, v8, v9 -; RV32-NEXT: lui a1, %hi(.LCPI69_1) -; RV32-NEXT: addi a1, a1, %lo(.LCPI69_1) +; RV32-NEXT: lui a1, 32 +; RV32-NEXT: addi a1, a1, 1 +; RV32-NEXT: vmv.s.x v9, a1 ; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32-NEXT: vle32.v v9, (a1) +; RV32-NEXT: vsext.vf4 v10, v9 ; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; RV32-NEXT: vsrl.vv v8, v8, v9 +; RV32-NEXT: vsrl.vv v8, v8, v10 ; RV32-NEXT: vse64.v v8, (a0) ; RV32-NEXT: ret ; @@ -5214,16 +5241,17 @@ define void @mulhu_v8i32(ptr %x) { ; LMULMAX1-RV64-LABEL: mulhu_v8i32: ; LMULMAX1-RV64: # %bb.0: ; LMULMAX1-RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; LMULMAX1-RV64-NEXT: vle32.v v8, (a0) ; LMULMAX1-RV64-NEXT: addi a1, a0, 16 -; LMULMAX1-RV64-NEXT: vle32.v v8, (a1) -; LMULMAX1-RV64-NEXT: lui a2, %hi(.LCPI183_0) -; LMULMAX1-RV64-NEXT: addi a2, a2, %lo(.LCPI183_0) -; LMULMAX1-RV64-NEXT: vle32.v v9, (a2) -; LMULMAX1-RV64-NEXT: vle32.v v10, (a0) -; LMULMAX1-RV64-NEXT: vdivu.vv v8, v8, v9 -; LMULMAX1-RV64-NEXT: vdivu.vv v9, v10, v9 -; LMULMAX1-RV64-NEXT: vse32.v v9, (a0) -; LMULMAX1-RV64-NEXT: vse32.v v8, (a1) +; LMULMAX1-RV64-NEXT: vle32.v v9, (a1) +; LMULMAX1-RV64-NEXT: lui a2, 36976 +; LMULMAX1-RV64-NEXT: addiw a2, a2, 1541 +; LMULMAX1-RV64-NEXT: vmv.s.x v10, a2 +; LMULMAX1-RV64-NEXT: vsext.vf4 v11, v10 +; LMULMAX1-RV64-NEXT: vdivu.vv v9, v9, v11 +; LMULMAX1-RV64-NEXT: vdivu.vv v8, v8, v11 +; LMULMAX1-RV64-NEXT: vse32.v v8, (a0) +; LMULMAX1-RV64-NEXT: vse32.v v9, (a1) ; LMULMAX1-RV64-NEXT: ret %a = load <8 x i32>, ptr %x %b = udiv <8 x i32> %a, @@ -5276,13 +5304,14 @@ define void @mulhu_v4i64(ptr %x) { ; LMULMAX2-RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; LMULMAX2-RV64-NEXT: vle64.v v10, (a1) ; LMULMAX2-RV64-NEXT: vmulhu.vv v10, v8, v10 -; LMULMAX2-RV64-NEXT: lui a1, %hi(.LCPI184_1) -; LMULMAX2-RV64-NEXT: addi a1, a1, %lo(.LCPI184_1) -; LMULMAX2-RV64-NEXT: vle64.v v14, (a1) ; LMULMAX2-RV64-NEXT: vsub.vv v8, v8, v10 ; LMULMAX2-RV64-NEXT: vmulhu.vv v8, v8, v12 ; LMULMAX2-RV64-NEXT: vadd.vv v8, v8, v10 -; LMULMAX2-RV64-NEXT: vsrl.vv v8, v8, v14 +; LMULMAX2-RV64-NEXT: lui a1, 12320 +; LMULMAX2-RV64-NEXT: addiw a1, a1, 513 +; LMULMAX2-RV64-NEXT: vmv.s.x v10, a1 +; LMULMAX2-RV64-NEXT: vsext.vf8 v12, v10 +; LMULMAX2-RV64-NEXT: vsrl.vv v8, v8, v12 ; LMULMAX2-RV64-NEXT: vse64.v v8, (a0) ; LMULMAX2-RV64-NEXT: ret ; @@ -5292,18 +5321,20 @@ define void @mulhu_v4i64(ptr %x) { ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a1, a0, 16 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a1) -; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI184_0) -; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI184_0) +; LMULMAX1-RV32-NEXT: lui a2, 144 +; LMULMAX1-RV32-NEXT: addi a2, a2, 7 +; LMULMAX1-RV32-NEXT: vmv.s.x v10, a2 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle32.v v10, (a2) +; LMULMAX1-RV32-NEXT: vsext.vf4 v11, v10 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vdivu.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI184_1) -; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI184_1) +; LMULMAX1-RV32-NEXT: vdivu.vv v9, v9, v11 +; LMULMAX1-RV32-NEXT: lui a2, 80 +; LMULMAX1-RV32-NEXT: addi a2, a2, 3 +; LMULMAX1-RV32-NEXT: vmv.s.x v10, a2 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle32.v v10, (a2) +; LMULMAX1-RV32-NEXT: vsext.vf4 v11, v10 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vdivu.vv v8, v8, v10 +; LMULMAX1-RV32-NEXT: vdivu.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse64.v v9, (a1) ; LMULMAX1-RV32-NEXT: ret @@ -5660,13 +5691,14 @@ define void @mulhs_v4i64(ptr %x) { ; LMULMAX1-RV32-NEXT: vle64.v v8, (a0) ; LMULMAX1-RV32-NEXT: addi a1, a0, 16 ; LMULMAX1-RV32-NEXT: vle64.v v9, (a1) -; LMULMAX1-RV32-NEXT: lui a2, %hi(.LCPI188_0) -; LMULMAX1-RV32-NEXT: addi a2, a2, %lo(.LCPI188_0) +; LMULMAX1-RV32-NEXT: lui a2, 1048528 +; LMULMAX1-RV32-NEXT: addi a2, a2, 3 +; LMULMAX1-RV32-NEXT: vmv.s.x v10, a2 ; LMULMAX1-RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; LMULMAX1-RV32-NEXT: vle32.v v10, (a2) +; LMULMAX1-RV32-NEXT: vsext.vf4 v11, v10 ; LMULMAX1-RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma -; LMULMAX1-RV32-NEXT: vdiv.vv v9, v9, v10 -; LMULMAX1-RV32-NEXT: vdiv.vv v8, v8, v10 +; LMULMAX1-RV32-NEXT: vdiv.vv v9, v9, v11 +; LMULMAX1-RV32-NEXT: vdiv.vv v8, v8, v11 ; LMULMAX1-RV32-NEXT: vse64.v v8, (a0) ; LMULMAX1-RV32-NEXT: vse64.v v9, (a1) ; LMULMAX1-RV32-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll index dc52e69e5364d..cb3ee899dde7d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -13130,10 +13130,12 @@ define <4 x i32> @mgather_unit_stride_load_with_offset(ptr %base) { ; ; RV64V-LABEL: mgather_unit_stride_load_with_offset: ; RV64V: # %bb.0: -; RV64V-NEXT: lui a1, %hi(.LCPI103_0) -; RV64V-NEXT: addi a1, a1, %lo(.LCPI103_0) -; RV64V-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV64V-NEXT: vle64.v v10, (a1) +; RV64V-NEXT: lui a1, 115073 +; RV64V-NEXT: addiw a1, a1, 1040 +; RV64V-NEXT: vsetivli zero, 4, e64, m2, ta, ma +; RV64V-NEXT: vmv.s.x v8, a1 +; RV64V-NEXT: vsext.vf8 v10, v8 +; RV64V-NEXT: vsetvli zero, zero, e32, m1, ta, ma ; RV64V-NEXT: vluxei64.v v8, (a0), v10 ; RV64V-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll index f9c49be55986c..13e81d30d66a3 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-stepvector.ll @@ -230,10 +230,11 @@ define <4 x i64> @stepvector_v4i64() { ; RV32LMULMAX1-NEXT: vmv.v.i v8, 0 ; RV32LMULMAX1-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; RV32LMULMAX1-NEXT: vslideup.vi v8, v9, 2 -; RV32LMULMAX1-NEXT: lui a0, %hi(.LCPI14_0) -; RV32LMULMAX1-NEXT: addi a0, a0, %lo(.LCPI14_0) +; RV32LMULMAX1-NEXT: lui a0, 48 +; RV32LMULMAX1-NEXT: addi a0, a0, 2 +; RV32LMULMAX1-NEXT: vmv.s.x v10, a0 ; RV32LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32LMULMAX1-NEXT: vle32.v v9, (a0) +; RV32LMULMAX1-NEXT: vsext.vf4 v9, v10 ; RV32LMULMAX1-NEXT: ret ; ; RV64LMULMAX1-LABEL: stepvector_v4i64: @@ -270,16 +271,19 @@ define <8 x i64> @stepvector_v8i64() { ; RV32LMULMAX1-NEXT: vmv.v.i v8, 0 ; RV32LMULMAX1-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; RV32LMULMAX1-NEXT: vslideup.vi v8, v9, 2 -; RV32LMULMAX1-NEXT: lui a0, %hi(.LCPI15_0) -; RV32LMULMAX1-NEXT: addi a0, a0, %lo(.LCPI15_0) +; RV32LMULMAX1-NEXT: lui a0, 48 +; RV32LMULMAX1-NEXT: addi a0, a0, 2 +; RV32LMULMAX1-NEXT: vmv.s.x v10, a0 ; RV32LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32LMULMAX1-NEXT: vle32.v v9, (a0) -; RV32LMULMAX1-NEXT: lui a0, %hi(.LCPI15_1) -; RV32LMULMAX1-NEXT: addi a0, a0, %lo(.LCPI15_1) -; RV32LMULMAX1-NEXT: vle32.v v10, (a0) -; RV32LMULMAX1-NEXT: lui a0, %hi(.LCPI15_2) -; RV32LMULMAX1-NEXT: addi a0, a0, %lo(.LCPI15_2) -; RV32LMULMAX1-NEXT: vle32.v v11, (a0) +; RV32LMULMAX1-NEXT: vsext.vf4 v9, v10 +; RV32LMULMAX1-NEXT: lui a0, 80 +; RV32LMULMAX1-NEXT: addi a0, a0, 4 +; RV32LMULMAX1-NEXT: vmv.s.x v11, a0 +; RV32LMULMAX1-NEXT: vsext.vf4 v10, v11 +; RV32LMULMAX1-NEXT: lui a0, 112 +; RV32LMULMAX1-NEXT: addi a0, a0, 6 +; RV32LMULMAX1-NEXT: vmv.s.x v12, a0 +; RV32LMULMAX1-NEXT: vsext.vf4 v11, v12 ; RV32LMULMAX1-NEXT: ret ; ; RV64LMULMAX1-LABEL: stepvector_v8i64: @@ -322,28 +326,35 @@ define <16 x i64> @stepvector_v16i64() { ; RV32LMULMAX1-NEXT: vmv.v.i v8, 0 ; RV32LMULMAX1-NEXT: vsetivli zero, 3, e32, m1, tu, ma ; RV32LMULMAX1-NEXT: vslideup.vi v8, v9, 2 -; RV32LMULMAX1-NEXT: lui a0, %hi(.LCPI16_0) -; RV32LMULMAX1-NEXT: addi a0, a0, %lo(.LCPI16_0) +; RV32LMULMAX1-NEXT: lui a0, 48 +; RV32LMULMAX1-NEXT: addi a0, a0, 2 +; RV32LMULMAX1-NEXT: vmv.s.x v10, a0 ; RV32LMULMAX1-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; RV32LMULMAX1-NEXT: vle32.v v9, (a0) -; RV32LMULMAX1-NEXT: lui a0, %hi(.LCPI16_1) -; RV32LMULMAX1-NEXT: addi a0, a0, %lo(.LCPI16_1) -; RV32LMULMAX1-NEXT: vle32.v v10, (a0) -; RV32LMULMAX1-NEXT: lui a0, %hi(.LCPI16_2) -; RV32LMULMAX1-NEXT: addi a0, a0, %lo(.LCPI16_2) -; RV32LMULMAX1-NEXT: vle32.v v11, (a0) -; RV32LMULMAX1-NEXT: lui a0, %hi(.LCPI16_3) -; RV32LMULMAX1-NEXT: addi a0, a0, %lo(.LCPI16_3) -; RV32LMULMAX1-NEXT: vle32.v v12, (a0) -; RV32LMULMAX1-NEXT: lui a0, %hi(.LCPI16_4) -; RV32LMULMAX1-NEXT: addi a0, a0, %lo(.LCPI16_4) -; RV32LMULMAX1-NEXT: vle32.v v13, (a0) -; RV32LMULMAX1-NEXT: lui a0, %hi(.LCPI16_5) -; RV32LMULMAX1-NEXT: addi a0, a0, %lo(.LCPI16_5) -; RV32LMULMAX1-NEXT: vle32.v v14, (a0) -; RV32LMULMAX1-NEXT: lui a0, %hi(.LCPI16_6) -; RV32LMULMAX1-NEXT: addi a0, a0, %lo(.LCPI16_6) -; RV32LMULMAX1-NEXT: vle32.v v15, (a0) +; RV32LMULMAX1-NEXT: vsext.vf4 v9, v10 +; RV32LMULMAX1-NEXT: lui a0, 80 +; RV32LMULMAX1-NEXT: addi a0, a0, 4 +; RV32LMULMAX1-NEXT: vmv.s.x v11, a0 +; RV32LMULMAX1-NEXT: vsext.vf4 v10, v11 +; RV32LMULMAX1-NEXT: lui a0, 112 +; RV32LMULMAX1-NEXT: addi a0, a0, 6 +; RV32LMULMAX1-NEXT: vmv.s.x v12, a0 +; RV32LMULMAX1-NEXT: vsext.vf4 v11, v12 +; RV32LMULMAX1-NEXT: lui a0, 144 +; RV32LMULMAX1-NEXT: addi a0, a0, 8 +; RV32LMULMAX1-NEXT: vmv.s.x v13, a0 +; RV32LMULMAX1-NEXT: vsext.vf4 v12, v13 +; RV32LMULMAX1-NEXT: lui a0, 176 +; RV32LMULMAX1-NEXT: addi a0, a0, 10 +; RV32LMULMAX1-NEXT: vmv.s.x v14, a0 +; RV32LMULMAX1-NEXT: vsext.vf4 v13, v14 +; RV32LMULMAX1-NEXT: lui a0, 208 +; RV32LMULMAX1-NEXT: addi a0, a0, 12 +; RV32LMULMAX1-NEXT: vmv.s.x v15, a0 +; RV32LMULMAX1-NEXT: vsext.vf4 v14, v15 +; RV32LMULMAX1-NEXT: lui a0, 240 +; RV32LMULMAX1-NEXT: addi a0, a0, 14 +; RV32LMULMAX1-NEXT: vmv.s.x v16, a0 +; RV32LMULMAX1-NEXT: vsext.vf4 v15, v16 ; RV32LMULMAX1-NEXT: ret ; ; RV64LMULMAX1-LABEL: stepvector_v16i64: diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll index e6430f63f5ada..fc05648270367 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-store.ll @@ -168,27 +168,51 @@ define void @store_constant_v4i8(ptr %p) { } define void @store_constant_v4i16(ptr %p) { -; CHECK-LABEL: store_constant_v4i16: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI13_0) -; CHECK-NEXT: addi a1, a1, %lo(.LCPI13_0) -; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-NEXT: vle16.v v8, (a1) -; CHECK-NEXT: vse16.v v8, (a0) -; CHECK-NEXT: ret +; RV32-LABEL: store_constant_v4i16: +; RV32: # %bb.0: +; RV32-NEXT: lui a1, 4176 +; RV32-NEXT: addi a1, a1, 1539 +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vmv.s.x v8, a1 +; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV32-NEXT: vsext.vf2 v9, v8 +; RV32-NEXT: vse16.v v9, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: store_constant_v4i16: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, 4176 +; RV64-NEXT: addiw a1, a1, 1539 +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vmv.s.x v8, a1 +; RV64-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV64-NEXT: vsext.vf2 v9, v8 +; RV64-NEXT: vse16.v v9, (a0) +; RV64-NEXT: ret store <4 x i16> , ptr %p ret void } define void @store_constant_v4i32(ptr %p) { -; CHECK-LABEL: store_constant_v4i32: -; CHECK: # %bb.0: -; CHECK-NEXT: lui a1, %hi(.LCPI14_0) -; CHECK-NEXT: addi a1, a1, %lo(.LCPI14_0) -; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma -; CHECK-NEXT: vle32.v v8, (a1) -; CHECK-NEXT: vse32.v v8, (a0) -; CHECK-NEXT: ret +; RV32-LABEL: store_constant_v4i32: +; RV32: # %bb.0: +; RV32-NEXT: lui a1, 4176 +; RV32-NEXT: addi a1, a1, 1539 +; RV32-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV32-NEXT: vmv.s.x v8, a1 +; RV32-NEXT: vsext.vf4 v9, v8 +; RV32-NEXT: vse32.v v9, (a0) +; RV32-NEXT: ret +; +; RV64-LABEL: store_constant_v4i32: +; RV64: # %bb.0: +; RV64-NEXT: lui a1, 4176 +; RV64-NEXT: addiw a1, a1, 1539 +; RV64-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; RV64-NEXT: vmv.s.x v8, a1 +; RV64-NEXT: vsext.vf4 v9, v8 +; RV64-NEXT: vse32.v v9, (a0) +; RV64-NEXT: ret store <4 x i32> , ptr %p ret void } diff --git a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll index 02927c60dc4dc..d113c9fd31bbf 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll @@ -55,24 +55,28 @@ define <8 x i32> @vector_interleave_v8i32_v4i32(<4 x i32> %a, <4 x i32> %b) { define <4 x i64> @vector_interleave_v4i64_v2i64(<2 x i64> %a, <2 x i64> %b) { ; RV32-LABEL: vector_interleave_v4i64_v2i64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: lui a0, %hi(.LCPI3_0) -; RV32-NEXT: addi a0, a0, %lo(.LCPI3_0) -; RV32-NEXT: vle16.v v12, (a0) ; RV32-NEXT: vmv1r.v v10, v9 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vslideup.vi v8, v10, 2 +; RV32-NEXT: lui a0, 12304 +; RV32-NEXT: addi a0, a0, 512 +; RV32-NEXT: vmv.s.x v10, a0 +; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV32-NEXT: vsext.vf2 v12, v10 +; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; RV32-NEXT: vrgatherei16.vv v10, v8, v12 ; RV32-NEXT: vmv.v.v v8, v10 ; RV32-NEXT: ret ; ; RV64-LABEL: vector_interleave_v4i64_v2i64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64-NEXT: lui a0, %hi(.LCPI3_0) -; RV64-NEXT: addi a0, a0, %lo(.LCPI3_0) -; RV64-NEXT: vle64.v v12, (a0) ; RV64-NEXT: vmv1r.v v10, v9 +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: vslideup.vi v8, v10, 2 +; RV64-NEXT: lui a0, 12304 +; RV64-NEXT: addiw a0, a0, 512 +; RV64-NEXT: vmv.s.x v10, a0 +; RV64-NEXT: vsext.vf8 v12, v10 ; RV64-NEXT: vrgather.vv v10, v8, v12 ; RV64-NEXT: vmv.v.v v8, v10 ; RV64-NEXT: ret @@ -155,24 +159,28 @@ define <8 x float> @vector_interleave_v8f32_v4f32(<4 x float> %a, <4 x float> %b define <4 x double> @vector_interleave_v4f64_v2f64(<2 x double> %a, <2 x double> %b) { ; RV32-LABEL: vector_interleave_v4f64_v2f64: ; RV32: # %bb.0: -; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV32-NEXT: lui a0, %hi(.LCPI9_0) -; RV32-NEXT: addi a0, a0, %lo(.LCPI9_0) -; RV32-NEXT: vle16.v v12, (a0) ; RV32-NEXT: vmv1r.v v10, v9 +; RV32-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV32-NEXT: vslideup.vi v8, v10, 2 +; RV32-NEXT: lui a0, 12304 +; RV32-NEXT: addi a0, a0, 512 +; RV32-NEXT: vmv.s.x v10, a0 +; RV32-NEXT: vsetvli zero, zero, e16, mf2, ta, ma +; RV32-NEXT: vsext.vf2 v12, v10 +; RV32-NEXT: vsetvli zero, zero, e64, m2, ta, ma ; RV32-NEXT: vrgatherei16.vv v10, v8, v12 ; RV32-NEXT: vmv.v.v v8, v10 ; RV32-NEXT: ret ; ; RV64-LABEL: vector_interleave_v4f64_v2f64: ; RV64: # %bb.0: -; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; RV64-NEXT: lui a0, %hi(.LCPI9_0) -; RV64-NEXT: addi a0, a0, %lo(.LCPI9_0) -; RV64-NEXT: vle64.v v12, (a0) ; RV64-NEXT: vmv1r.v v10, v9 +; RV64-NEXT: vsetivli zero, 4, e64, m2, ta, ma ; RV64-NEXT: vslideup.vi v8, v10, 2 +; RV64-NEXT: lui a0, 12304 +; RV64-NEXT: addiw a0, a0, 512 +; RV64-NEXT: vmv.s.x v10, a0 +; RV64-NEXT: vsext.vf8 v12, v10 ; RV64-NEXT: vrgather.vv v10, v8, v12 ; RV64-NEXT: vmv.v.v v8, v10 ; RV64-NEXT: ret diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll index cac159c3e3a25..4ece90dac18ac 100644 --- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll @@ -766,13 +766,14 @@ define void @test_srem_vec(ptr %X) nounwind { ; RV64MV-NEXT: vslide1down.vx v8, v8, a3 ; RV64MV-NEXT: vslide1down.vx v8, v8, a2 ; RV64MV-NEXT: vslidedown.vi v8, v8, 1 -; RV64MV-NEXT: lui a1, %hi(.LCPI3_3) -; RV64MV-NEXT: addi a1, a1, %lo(.LCPI3_3) -; RV64MV-NEXT: vle64.v v10, (a1) ; RV64MV-NEXT: li a1, -1 ; RV64MV-NEXT: srli a1, a1, 31 ; RV64MV-NEXT: vand.vx v8, v8, a1 -; RV64MV-NEXT: vmsne.vv v0, v8, v10 +; RV64MV-NEXT: lui a2, 32 +; RV64MV-NEXT: addiw a2, a2, 256 +; RV64MV-NEXT: vmv.s.x v10, a2 +; RV64MV-NEXT: vsext.vf8 v12, v10 +; RV64MV-NEXT: vmsne.vv v0, v8, v12 ; RV64MV-NEXT: vmv.v.i v8, 0 ; RV64MV-NEXT: vmerge.vim v8, v8, -1, v0 ; RV64MV-NEXT: vsetivli zero, 1, e64, m2, ta, ma