From b4e4666fe2a91fdae98cee86a550ef6818c10b4e Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 24 Feb 2025 13:05:35 -0800 Subject: [PATCH 01/14] [VP][RISCV][WIP] Add a vp.load.ff intrinsic for fault only first load. Seems there's been some interested in supporting early-exit loops recently. https://discourse.llvm.org/t/rfc-supporting-more-early-exit-loops/84690 This patch was extracted from our downstream where we've been using it in our vectorizer. Still need to write up LangRef. Type legalization is also missing. --- llvm/include/llvm/CodeGen/SelectionDAG.h | 2 + llvm/include/llvm/CodeGen/SelectionDAGNodes.h | 17 + llvm/include/llvm/IR/Intrinsics.td | 6 + llvm/include/llvm/IR/VPIntrinsics.def | 6 + .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 28 + .../SelectionDAG/SelectionDAGBuilder.cpp | 32 + .../SelectionDAG/SelectionDAGBuilder.h | 2 + llvm/lib/IR/IntrinsicInst.cpp | 5 + llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 58 + llvm/lib/Target/RISCV/RISCVISelLowering.h | 1 + .../RISCV/rvv/fixed-vectors-vploadff.ll | 633 +++++++++ llvm/test/CodeGen/RISCV/rvv/vploadff.ll | 1127 +++++++++++++++++ llvm/unittests/IR/VPIntrinsicTest.cpp | 2 + 13 files changed, 1919 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vploadff.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vploadff.ll diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index aa0dfbe666cde..b00b939ab2afc 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -1572,6 +1572,8 @@ class SelectionDAG { SDValue getMaskedHistogram(SDVTList VTs, EVT MemVT, const SDLoc &dl, ArrayRef Ops, MachineMemOperand *MMO, ISD::MemIndexType IndexType); + SDValue getLoadFFVP(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, + SDValue Mask, SDValue EVL, MachineMemOperand *MMO); SDValue getGetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT, MachineMemOperand *MMO); diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index 20283ad8f2689..007055d88424b 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -3057,6 +3057,23 @@ class MaskedHistogramSDNode : public MaskedGatherScatterSDNode { } }; +class VPLoadFFSDNode : public MemSDNode { +public: + friend class SelectionDAG; + + VPLoadFFSDNode(unsigned Order, const DebugLoc &dl, SDVTList VTs, EVT MemVT, + MachineMemOperand *MMO) + : MemSDNode(ISD::VP_LOAD_FF, Order, dl, VTs, MemVT, MMO) {} + + const SDValue &getBasePtr() const { return getOperand(1); } + const SDValue &getMask() const { return getOperand(2); } + const SDValue &getVectorLength() const { return getOperand(3); } + + static bool classof(const SDNode *N) { + return N->getOpcode() == ISD::VP_LOAD_FF; + } +}; + class FPStateAccessSDNode : public MemSDNode { public: friend class SelectionDAG; diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 14ecae41ff08f..0c26c7bcfbad8 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1911,6 +1911,12 @@ def int_vp_load : DefaultAttrsIntrinsic<[ llvm_anyvector_ty], llvm_i32_ty], [ NoCapture>, IntrNoSync, IntrReadMem, IntrWillReturn, IntrArgMemOnly ]>; +def int_vp_load_ff : DefaultAttrsIntrinsic<[ llvm_anyvector_ty, llvm_i32_ty ], + [ llvm_anyptr_ty, + LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, + llvm_i32_ty], + [ NoCapture>, IntrNoSync, IntrReadMem, IntrWillReturn, IntrArgMemOnly ]>; + def int_vp_gather: DefaultAttrsIntrinsic<[ llvm_anyvector_ty], [ LLVMVectorOfAnyPointersToElt<0>, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def index 55f4719da7c8b..4a71097226f18 100644 --- a/llvm/include/llvm/IR/VPIntrinsics.def +++ b/llvm/include/llvm/IR/VPIntrinsics.def @@ -587,6 +587,12 @@ VP_PROPERTY_FUNCTIONAL_OPC(Load) VP_PROPERTY_FUNCTIONAL_INTRINSIC(masked_load) END_REGISTER_VP(vp_load, VP_LOAD) +BEGIN_REGISTER_VP_INTRINSIC(vp_load_ff, 1, 2) +// val,chain = VP_LOAD_FF chain,base,mask,evl +BEGIN_REGISTER_VP_SDNODE(VP_LOAD_FF, -1, vp_load_ff, 2, 3) +HELPER_MAP_VPID_TO_VPSD(vp_load_ff, VP_LOAD_FF) +VP_PROPERTY_NO_FUNCTIONAL +END_REGISTER_VP(vp_load_ff, VP_LOAD_FF) // llvm.experimental.vp.strided.load(ptr,stride,mask,vlen) BEGIN_REGISTER_VP_INTRINSIC(experimental_vp_strided_load, 2, 3) // chain = EXPERIMENTAL_VP_STRIDED_LOAD chain,base,offset,stride,mask,evl diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 9e61df7047d4a..ff6ef4d02c520 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -10139,6 +10139,34 @@ SDValue SelectionDAG::getMaskedHistogram(SDVTList VTs, EVT MemVT, return V; } +SDValue SelectionDAG::getLoadFFVP(EVT VT, const SDLoc &dl, SDValue Chain, + SDValue Ptr, SDValue Mask, SDValue EVL, + MachineMemOperand *MMO) { + SDVTList VTs = getVTList(VT, EVL.getValueType(), MVT::Other); + SDValue Ops[] = {Chain, Ptr, Mask, EVL}; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::VP_LOAD_FF, VTs, Ops); + ID.AddInteger(VT.getRawBits()); + ID.AddInteger(getSyntheticNodeSubclassData(dl.getIROrder(), + VTs, VT, MMO)); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + ID.AddInteger(MMO->getFlags()); + void *IP = nullptr; + if (SDNode *E = FindNodeOrInsertPos(ID, dl, IP)) { + cast(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + auto *N = newSDNode(dl.getIROrder(), dl.getDebugLoc(), VTs, + VT, MMO); + createOperands(N, Ops); + + CSEMap.InsertNode(N, IP); + InsertNode(N); + SDValue V(N, 0); + NewSDValueDbgMsg(V, "Creating new node: ", this); + return V; +} + SDValue SelectionDAG::getGetFPEnv(SDValue Chain, const SDLoc &dl, SDValue Ptr, EVT MemVT, MachineMemOperand *MMO) { assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 1c58a7f05446c..a287bdeb1eb90 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -8462,6 +8462,35 @@ void SelectionDAGBuilder::visitVPLoad( setValue(&VPIntrin, LD); } +void SelectionDAGBuilder::visitVPLoadFF( + const VPIntrinsic &VPIntrin, EVT VT, EVT EVLVT, + const SmallVectorImpl &OpValues) { + assert(OpValues.size() == 3); + SDLoc DL = getCurSDLoc(); + Value *PtrOperand = VPIntrin.getArgOperand(0); + MaybeAlign Alignment = VPIntrin.getPointerAlignment(); + AAMDNodes AAInfo = VPIntrin.getAAMetadata(); + const MDNode *Ranges = VPIntrin.getMetadata(LLVMContext::MD_range); + SDValue LD; + bool AddToChain = true; + // Do not serialize variable-length loads of constant memory with + // anything. + if (!Alignment) + Alignment = DAG.getEVTAlign(VT); + MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo); + AddToChain = !BatchAA || !BatchAA->pointsToConstantMemory(ML); + SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad, + MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges); + LD = DAG.getLoadFFVP(VT, DL, InChain, OpValues[0], OpValues[1], OpValues[2], + MMO); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, EVLVT, LD.getValue(1)); + if (AddToChain) + PendingLoads.push_back(LD.getValue(2)); + setValue(&VPIntrin, DAG.getMergeValues({LD.getValue(0), Trunc}, DL)); +} + void SelectionDAGBuilder::visitVPGather( const VPIntrinsic &VPIntrin, EVT VT, const SmallVectorImpl &OpValues) { @@ -8695,6 +8724,9 @@ void SelectionDAGBuilder::visitVectorPredicationIntrinsic( case ISD::VP_LOAD: visitVPLoad(VPIntrin, ValueVTs[0], OpValues); break; + case ISD::VP_LOAD_FF: + visitVPLoadFF(VPIntrin, ValueVTs[0], ValueVTs[1], OpValues); + break; case ISD::VP_GATHER: visitVPGather(VPIntrin, ValueVTs[0], OpValues); break; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 8496f8ae78ce6..b30695876828d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -632,6 +632,8 @@ class SelectionDAGBuilder { void visitVectorExtractLastActive(const CallInst &I, unsigned Intrinsic); void visitVPLoad(const VPIntrinsic &VPIntrin, EVT VT, const SmallVectorImpl &OpValues); + void visitVPLoadFF(const VPIntrinsic &VPIntrin, EVT VT, EVT EVLVT, + const SmallVectorImpl &OpValues); void visitVPStore(const VPIntrinsic &VPIntrin, const SmallVectorImpl &OpValues); void visitVPGather(const VPIntrinsic &VPIntrin, EVT VT, diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp index 256bce1abe71f..7ddea32f57f02 100644 --- a/llvm/lib/IR/IntrinsicInst.cpp +++ b/llvm/lib/IR/IntrinsicInst.cpp @@ -448,6 +448,7 @@ VPIntrinsic::getMemoryPointerParamPos(Intrinsic::ID VPID) { case Intrinsic::experimental_vp_strided_store: return 1; case Intrinsic::vp_load: + case Intrinsic::vp_load_ff: case Intrinsic::vp_gather: case Intrinsic::experimental_vp_strided_load: return 0; @@ -671,6 +672,10 @@ Function *VPIntrinsic::getOrInsertDeclarationForParams( VPFunc = Intrinsic::getOrInsertDeclaration( M, VPID, {ReturnType, Params[0]->getType()}); break; + case Intrinsic::vp_load_ff: + VPFunc = Intrinsic::getOrInsertDeclaration( + M, VPID, {ReturnType->getStructElementType(0), Params[0]->getType()}); + break; case Intrinsic::experimental_vp_strided_load: VPFunc = Intrinsic::getOrInsertDeclaration( M, VPID, {ReturnType, Params[0]->getType(), Params[1]->getType()}); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 6076fe56416ad..3da92c0f43590 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -880,6 +880,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD, ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER}, VT, Custom); + setOperationAction(ISD::VP_LOAD_FF, VT, Custom); setOperationAction({ISD::CONCAT_VECTORS, ISD::INSERT_SUBVECTOR, ISD::EXTRACT_SUBVECTOR, ISD::SCALAR_TO_VECTOR}, @@ -1031,6 +1032,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, {ISD::VP_LOAD, ISD::VP_STORE, ISD::EXPERIMENTAL_VP_STRIDED_LOAD, ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER}, VT, Custom); + setOperationAction(ISD::VP_LOAD_FF, VT, Custom); setOperationAction(ISD::SELECT, VT, Custom); setOperationAction(ISD::SELECT_CC, VT, Expand); @@ -1101,6 +1103,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER}, VT, Custom); + setOperationAction(ISD::VP_LOAD_FF, VT, Custom); setOperationAction(ISD::FNEG, VT, Expand); setOperationAction(ISD::FABS, VT, Expand); @@ -1269,6 +1272,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::EXPERIMENTAL_VP_STRIDED_STORE, ISD::VP_GATHER, ISD::VP_SCATTER}, VT, Custom); + setOperationAction(ISD::VP_LOAD_FF, VT, Custom); setOperationAction({ISD::ADD, ISD::MUL, ISD::SUB, ISD::AND, ISD::OR, ISD::XOR, ISD::SDIV, ISD::SREM, ISD::UDIV, @@ -1357,6 +1361,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, ISD::VP_SCATTER, ISD::EXPERIMENTAL_VP_STRIDED_LOAD, ISD::EXPERIMENTAL_VP_STRIDED_STORE}, VT, Custom); + setOperationAction(ISD::VP_LOAD_FF, VT, Custom); setOperationAction({ISD::FP_ROUND, ISD::FP_EXTEND}, VT, Custom); setOperationAction({ISD::STRICT_FP_ROUND, ISD::STRICT_FP_EXTEND}, VT, @@ -7616,6 +7621,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op, case ISD::MLOAD: case ISD::VP_LOAD: return lowerMaskedLoad(Op, DAG); + case ISD::VP_LOAD_FF: + return lowerLoadFF(Op, DAG); case ISD::MSTORE: case ISD::VP_STORE: return lowerMaskedStore(Op, DAG); @@ -11965,6 +11972,57 @@ SDValue RISCVTargetLowering::lowerMaskedLoad(SDValue Op, return DAG.getMergeValues({Result, Chain}, DL); } +SDValue RISCVTargetLowering::lowerLoadFF(SDValue Op, SelectionDAG &DAG) const { + assert(Op.getResNo() == 0); + SDLoc DL(Op); + MVT VT = Op.getSimpleValueType(); + + const auto *VPLoadFF = cast(Op); + EVT MemVT = VPLoadFF->getMemoryVT(); + MachineMemOperand *MMO = VPLoadFF->getMemOperand(); + SDValue Chain = VPLoadFF->getChain(); + SDValue BasePtr = VPLoadFF->getBasePtr(); + + SDValue Mask = VPLoadFF->getMask(); + SDValue VL = VPLoadFF->getVectorLength(); + + bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); + + MVT XLenVT = Subtarget.getXLenVT(); + + MVT ContainerVT = VT; + if (VT.isFixedLengthVector()) { + ContainerVT = getContainerForFixedLengthVector(VT); + if (!IsUnmasked) { + MVT MaskVT = getMaskTypeFor(ContainerVT); + Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); + } + } + + unsigned IntID = + IsUnmasked ? Intrinsic::riscv_vleff : Intrinsic::riscv_vleff_mask; + SmallVector Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)}; + Ops.push_back(DAG.getUNDEF(ContainerVT)); + Ops.push_back(BasePtr); + if (!IsUnmasked) + Ops.push_back(Mask); + Ops.push_back(VL); + if (!IsUnmasked) + Ops.push_back(DAG.getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT)); + + SDVTList VTs = DAG.getVTList({ContainerVT, Op->getValueType(1), MVT::Other}); + + SDValue Result = + DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MemVT, MMO); + SDValue OutVL = Result.getValue(1); + Chain = Result.getValue(2); + + if (VT.isFixedLengthVector()) + Result = convertFromScalableVector(VT, Result, DAG, Subtarget); + + return DAG.getMergeValues({Result, OutVL, Chain}, DL); +} + SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op, SelectionDAG &DAG) const { SDLoc DL(Op); diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h index 26b888653c81d..8bba8c50ba862 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.h +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h @@ -991,6 +991,7 @@ class RISCVTargetLowering : public TargetLowering { SDValue lowerVECTOR_SPLICE(SDValue Op, SelectionDAG &DAG) const; SDValue lowerABS(SDValue Op, SelectionDAG &DAG) const; SDValue lowerMaskedLoad(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerLoadFF(SDValue Op, SelectionDAG &DAG) const; SDValue lowerMaskedStore(SDValue Op, SelectionDAG &DAG) const; SDValue lowerVectorCompress(SDValue Op, SelectionDAG &DAG) const; SDValue lowerFixedLengthVectorFCOPYSIGNToRVV(SDValue Op, diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vploadff.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vploadff.ll new file mode 100644 index 0000000000000..9f982293256ac --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vploadff.ll @@ -0,0 +1,633 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zvfbfmin,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zvfbfmin,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s + +declare { <2 x i8>, i32 } @llvm.vp.load.ff.v2i8.p0(ptr, <2 x i1>, i32) + +define { <2 x i8>, i32 } @vploadff_v2i8(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vle8ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <2 x i8>, i32 } @llvm.vp.load.ff.v2i8.p0(ptr %ptr, <2 x i1> %m, i32 %evl) + ret { <2 x i8>, i32 } %load +} + +define { <2 x i8>, i32 } @vploadff_v2i8_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v2i8_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vle8ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement <2 x i1> poison, i1 true, i32 0 + %b = shufflevector <2 x i1> %a, <2 x i1> poison, <2 x i32> zeroinitializer + %load = call { <2 x i8>, i32 } @llvm.vp.load.ff.v2i8.p0(ptr %ptr, <2 x i1> %b, i32 %evl) + ret { <2 x i8>, i32 } %load +} + +declare { <4 x i8>, i32 } @llvm.vp.load.ff.v4i8.p0(ptr, <4 x i1>, i32) + +define { <4 x i8>, i32 } @vploadff_v4i8(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vle8ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <4 x i8>, i32 } @llvm.vp.load.ff.v4i8.p0(ptr %ptr, <4 x i1> %m, i32 %evl) + ret { <4 x i8>, i32 } %load +} + +define { <4 x i8>, i32 } @vploadff_v4i8_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v4i8_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vle8ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement <4 x i1> poison, i1 true, i32 0 + %b = shufflevector <4 x i1> %a, <4 x i1> poison, <4 x i32> zeroinitializer + %load = call { <4 x i8>, i32 } @llvm.vp.load.ff.v4i8.p0(ptr %ptr, <4 x i1> %b, i32 %evl) + ret { <4 x i8>, i32 } %load +} + +declare { <8 x i8>, i32 } @llvm.vp.load.ff.v8i8.p0(ptr, <8 x i1>, i32) + +define { <8 x i8>, i32 } @vploadff_v8i8(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vle8ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <8 x i8>, i32 } @llvm.vp.load.ff.v8i8.p0(ptr %ptr, <8 x i1> %m, i32 %evl) + ret { <8 x i8>, i32 } %load +} + +define { <8 x i8>, i32 } @vploadff_v8i8_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v8i8_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vle8ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement <8 x i1> poison, i1 true, i32 0 + %b = shufflevector <8 x i1> %a, <8 x i1> poison, <8 x i32> zeroinitializer + %load = call { <8 x i8>, i32 } @llvm.vp.load.ff.v8i8.p0(ptr %ptr, <8 x i1> %b, i32 %evl) + ret { <8 x i8>, i32 } %load +} + +declare { <2 x i16>, i32 } @llvm.vp.load.ff.v2i16.p0(ptr, <2 x i1>, i32) + +define { <2 x i16>, i32 } @vploadff_v2i16(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <2 x i16>, i32 } @llvm.vp.load.ff.v2i16.p0(ptr %ptr, <2 x i1> %m, i32 %evl) + ret { <2 x i16>, i32 } %load +} + +define { <2 x i16>, i32 } @vploadff_v2i16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v2i16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement <2 x i1> poison, i1 true, i32 0 + %b = shufflevector <2 x i1> %a, <2 x i1> poison, <2 x i32> zeroinitializer + %load = call { <2 x i16>, i32 } @llvm.vp.load.ff.v2i16.p0(ptr %ptr, <2 x i1> %b, i32 %evl) + ret { <2 x i16>, i32 } %load +} + +declare { <4 x i16>, i32 } @llvm.vp.load.ff.v4i16.p0(ptr, <4 x i1>, i32) + +define { <4 x i16>, i32 } @vploadff_v4i16(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <4 x i16>, i32 } @llvm.vp.load.ff.v4i16.p0(ptr %ptr, <4 x i1> %m, i32 %evl) + ret { <4 x i16>, i32 } %load +} + +define { <4 x i16>, i32 } @vploadff_v4i16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v4i16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement <4 x i1> poison, i1 true, i32 0 + %b = shufflevector <4 x i1> %a, <4 x i1> poison, <4 x i32> zeroinitializer + %load = call { <4 x i16>, i32 } @llvm.vp.load.ff.v4i16.p0(ptr %ptr, <4 x i1> %b, i32 %evl) + ret { <4 x i16>, i32 } %load +} + +declare { <8 x i16>, i32 } @llvm.vp.load.ff.v8i16.p0(ptr, <8 x i1>, i32) + +define { <8 x i16>, i32 } @vploadff_v8i16(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <8 x i16>, i32 } @llvm.vp.load.ff.v8i16.p0(ptr %ptr, <8 x i1> %m, i32 %evl) + ret { <8 x i16>, i32 } %load +} + +define { <8 x i16>, i32 } @vploadff_v8i16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v8i16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement <8 x i1> poison, i1 true, i32 0 + %b = shufflevector <8 x i1> %a, <8 x i1> poison, <8 x i32> zeroinitializer + %load = call { <8 x i16>, i32 } @llvm.vp.load.ff.v8i16.p0(ptr %ptr, <8 x i1> %b, i32 %evl) + ret { <8 x i16>, i32 } %load +} + +declare { <2 x i32>, i32 } @llvm.vp.load.ff.v2i32.p0(ptr, <2 x i1>, i32) + +define { <2 x i32>, i32 } @vploadff_v2i32(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <2 x i32>, i32 } @llvm.vp.load.ff.v2i32.p0(ptr %ptr, <2 x i1> %m, i32 %evl) + ret { <2 x i32>, i32 } %load +} + +define { <2 x i32>, i32 } @vploadff_v2i32_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v2i32_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement <2 x i1> poison, i1 true, i32 0 + %b = shufflevector <2 x i1> %a, <2 x i1> poison, <2 x i32> zeroinitializer + %load = call { <2 x i32>, i32 } @llvm.vp.load.ff.v2i32.p0(ptr %ptr, <2 x i1> %b, i32 %evl) + ret { <2 x i32>, i32 } %load +} + +declare { <4 x i32>, i32 } @llvm.vp.load.ff.v4i32.p0(ptr, <4 x i1>, i32) + +define { <4 x i32>, i32 } @vploadff_v4i32(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <4 x i32>, i32 } @llvm.vp.load.ff.v4i32.p0(ptr %ptr, <4 x i1> %m, i32 %evl) + ret { <4 x i32>, i32 } %load +} + +define { <4 x i32>, i32 } @vploadff_v4i32_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v4i32_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement <4 x i1> poison, i1 true, i32 0 + %b = shufflevector <4 x i1> %a, <4 x i1> poison, <4 x i32> zeroinitializer + %load = call { <4 x i32>, i32 } @llvm.vp.load.ff.v4i32.p0(ptr %ptr, <4 x i1> %b, i32 %evl) + ret { <4 x i32>, i32 } %load +} + +declare { <8 x i32>, i32 } @llvm.vp.load.ff.v8i32.p0(ptr, <8 x i1>, i32) + +define { <8 x i32>, i32 } @vploadff_v8i32(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <8 x i32>, i32 } @llvm.vp.load.ff.v8i32.p0(ptr %ptr, <8 x i1> %m, i32 %evl) + ret { <8 x i32>, i32 } %load +} + +define { <8 x i32>, i32 } @vploadff_v8i32_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v8i32_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement <8 x i1> poison, i1 true, i32 0 + %b = shufflevector <8 x i1> %a, <8 x i1> poison, <8 x i32> zeroinitializer + %load = call { <8 x i32>, i32 } @llvm.vp.load.ff.v8i32.p0(ptr %ptr, <8 x i1> %b, i32 %evl) + ret { <8 x i32>, i32 } %load +} + +declare { <2 x i64>, i32 } @llvm.vp.load.ff.v2i64.p0(ptr, <2 x i1>, i32) + +define { <2 x i64>, i32 } @vploadff_v2i64(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <2 x i64>, i32 } @llvm.vp.load.ff.v2i64.p0(ptr %ptr, <2 x i1> %m, i32 %evl) + ret { <2 x i64>, i32 } %load +} + +define { <2 x i64>, i32 } @vploadff_v2i64_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v2i64_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement <2 x i1> poison, i1 true, i32 0 + %b = shufflevector <2 x i1> %a, <2 x i1> poison, <2 x i32> zeroinitializer + %load = call { <2 x i64>, i32 } @llvm.vp.load.ff.v2i64.p0(ptr %ptr, <2 x i1> %b, i32 %evl) + ret { <2 x i64>, i32 } %load +} + +declare { <4 x i64>, i32 } @llvm.vp.load.ff.v4i64.p0(ptr, <4 x i1>, i32) + +define { <4 x i64>, i32 } @vploadff_v4i64(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <4 x i64>, i32 } @llvm.vp.load.ff.v4i64.p0(ptr %ptr, <4 x i1> %m, i32 %evl) + ret { <4 x i64>, i32 } %load +} + +define { <4 x i64>, i32 } @vploadff_v4i64_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v4i64_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement <4 x i1> poison, i1 true, i32 0 + %b = shufflevector <4 x i1> %a, <4 x i1> poison, <4 x i32> zeroinitializer + %load = call { <4 x i64>, i32 } @llvm.vp.load.ff.v4i64.p0(ptr %ptr, <4 x i1> %b, i32 %evl) + ret { <4 x i64>, i32 } %load +} + +declare { <8 x i64>, i32 } @llvm.vp.load.ff.v8i64.p0(ptr, <8 x i1>, i32) + +define { <8 x i64>, i32 } @vploadff_v8i64(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <8 x i64>, i32 } @llvm.vp.load.ff.v8i64.p0(ptr %ptr, <8 x i1> %m, i32 %evl) + ret { <8 x i64>, i32 } %load +} + +define { <8 x i64>, i32 } @vploadff_v8i64_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v8i64_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement <8 x i1> poison, i1 true, i32 0 + %b = shufflevector <8 x i1> %a, <8 x i1> poison, <8 x i32> zeroinitializer + %load = call { <8 x i64>, i32 } @llvm.vp.load.ff.v8i64.p0(ptr %ptr, <8 x i1> %b, i32 %evl) + ret { <8 x i64>, i32 } %load +} + +declare { <2 x half>, i32 } @llvm.vp.load.ff.v2f16.p0(ptr, <2 x i1>, i32) + +define { <2 x half>, i32 } @vploadff_v2f16(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <2 x half>, i32 } @llvm.vp.load.ff.v2f16.p0(ptr %ptr, <2 x i1> %m, i32 %evl) + ret { <2 x half>, i32 } %load +} + +define { <2 x half>, i32 } @vploadff_v2f16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v2f16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement <2 x i1> poison, i1 true, i32 0 + %b = shufflevector <2 x i1> %a, <2 x i1> poison, <2 x i32> zeroinitializer + %load = call { <2 x half>, i32 } @llvm.vp.load.ff.v2f16.p0(ptr %ptr, <2 x i1> %b, i32 %evl) + ret { <2 x half>, i32 } %load +} + +declare { <4 x half>, i32 } @llvm.vp.load.ff.v4f16.p0(ptr, <4 x i1>, i32) + +define { <4 x half>, i32 } @vploadff_v4f16(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <4 x half>, i32 } @llvm.vp.load.ff.v4f16.p0(ptr %ptr, <4 x i1> %m, i32 %evl) + ret { <4 x half>, i32 } %load +} + +define { <4 x half>, i32 } @vploadff_v4f16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v4f16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement <4 x i1> poison, i1 true, i32 0 + %b = shufflevector <4 x i1> %a, <4 x i1> poison, <4 x i32> zeroinitializer + %load = call { <4 x half>, i32 } @llvm.vp.load.ff.v4f16.p0(ptr %ptr, <4 x i1> %b, i32 %evl) + ret { <4 x half>, i32 } %load +} + +declare { <8 x half>, i32 } @llvm.vp.load.ff.v8f16.p0(ptr, <8 x i1>, i32) + +define { <8 x half>, i32 } @vploadff_v8f16(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <8 x half>, i32 } @llvm.vp.load.ff.v8f16.p0(ptr %ptr, <8 x i1> %m, i32 %evl) + ret { <8 x half>, i32 } %load +} + +define { <8 x half>, i32 } @vploadff_v8f16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v8f16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement <8 x i1> poison, i1 true, i32 0 + %b = shufflevector <8 x i1> %a, <8 x i1> poison, <8 x i32> zeroinitializer + %load = call { <8 x half>, i32 } @llvm.vp.load.ff.v8f16.p0(ptr %ptr, <8 x i1> %b, i32 %evl) + ret { <8 x half>, i32 } %load +} + +declare { <2 x float>, i32 } @llvm.vp.load.ff.v2f32.p0(ptr, <2 x i1>, i32) + +define { <2 x float>, i32 } @vploadff_v2f32(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <2 x float>, i32 } @llvm.vp.load.ff.v2f32.p0(ptr %ptr, <2 x i1> %m, i32 %evl) + ret { <2 x float>, i32 } %load +} + +define { <2 x float>, i32 } @vploadff_v2f32_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v2f32_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement <2 x i1> poison, i1 true, i32 0 + %b = shufflevector <2 x i1> %a, <2 x i1> poison, <2 x i32> zeroinitializer + %load = call { <2 x float>, i32 } @llvm.vp.load.ff.v2f32.p0(ptr %ptr, <2 x i1> %b, i32 %evl) + ret { <2 x float>, i32 } %load +} + +declare { <4 x float>, i32 } @llvm.vp.load.ff.v4f32.p0(ptr, <4 x i1>, i32) + +define { <4 x float>, i32 } @vploadff_v4f32(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <4 x float>, i32 } @llvm.vp.load.ff.v4f32.p0(ptr %ptr, <4 x i1> %m, i32 %evl) + ret { <4 x float>, i32 } %load +} + +define { <4 x float>, i32 } @vploadff_v4f32_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v4f32_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement <4 x i1> poison, i1 true, i32 0 + %b = shufflevector <4 x i1> %a, <4 x i1> poison, <4 x i32> zeroinitializer + %load = call { <4 x float>, i32 } @llvm.vp.load.ff.v4f32.p0(ptr %ptr, <4 x i1> %b, i32 %evl) + ret { <4 x float>, i32 } %load +} + +declare { <8 x float>, i32 } @llvm.vp.load.ff.v8f32.p0(ptr, <8 x i1>, i32) + +define { <8 x float>, i32 } @vploadff_v8f32(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <8 x float>, i32 } @llvm.vp.load.ff.v8f32.p0(ptr %ptr, <8 x i1> %m, i32 %evl) + ret { <8 x float>, i32 } %load +} + +define { <8 x float>, i32 } @vploadff_v8f32_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v8f32_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement <8 x i1> poison, i1 true, i32 0 + %b = shufflevector <8 x i1> %a, <8 x i1> poison, <8 x i32> zeroinitializer + %load = call { <8 x float>, i32 } @llvm.vp.load.ff.v8f32.p0(ptr %ptr, <8 x i1> %b, i32 %evl) + ret { <8 x float>, i32 } %load +} + +declare { <2 x double>, i32 } @llvm.vp.load.ff.v2f64.p0(ptr, <2 x i1>, i32) + +define { <2 x double>, i32 } @vploadff_v2f64(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <2 x double>, i32 } @llvm.vp.load.ff.v2f64.p0(ptr %ptr, <2 x i1> %m, i32 %evl) + ret { <2 x double>, i32 } %load +} + +define { <2 x double>, i32 } @vploadff_v2f64_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v2f64_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement <2 x i1> poison, i1 true, i32 0 + %b = shufflevector <2 x i1> %a, <2 x i1> poison, <2 x i32> zeroinitializer + %load = call { <2 x double>, i32 } @llvm.vp.load.ff.v2f64.p0(ptr %ptr, <2 x i1> %b, i32 %evl) + ret { <2 x double>, i32 } %load +} + +declare { <4 x double>, i32 } @llvm.vp.load.ff.v4f64.p0(ptr, <4 x i1>, i32) + +define { <4 x double>, i32 } @vploadff_v4f64(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <4 x double>, i32 } @llvm.vp.load.ff.v4f64.p0(ptr %ptr, <4 x i1> %m, i32 %evl) + ret { <4 x double>, i32 } %load +} + +define { <4 x double>, i32 } @vploadff_v4f64_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v4f64_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement <4 x i1> poison, i1 true, i32 0 + %b = shufflevector <4 x i1> %a, <4 x i1> poison, <4 x i32> zeroinitializer + %load = call { <4 x double>, i32 } @llvm.vp.load.ff.v4f64.p0(ptr %ptr, <4 x i1> %b, i32 %evl) + ret { <4 x double>, i32 } %load +} + +declare { <8 x double>, i32 } @llvm.vp.load.ff.v8f64.p0(ptr, <8 x i1>, i32) + +define { <8 x double>, i32 } @vploadff_v8f64(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <8 x double>, i32 } @llvm.vp.load.ff.v8f64.p0(ptr %ptr, <8 x i1> %m, i32 %evl) + ret { <8 x double>, i32 } %load +} + +define { <8 x double>, i32 } @vploadff_v8f64_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v8f64_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement <8 x i1> poison, i1 true, i32 0 + %b = shufflevector <8 x i1> %a, <8 x i1> poison, <8 x i32> zeroinitializer + %load = call { <8 x double>, i32 } @llvm.vp.load.ff.v8f64.p0(ptr %ptr, <8 x i1> %b, i32 %evl) + ret { <8 x double>, i32 } %load +} + +declare { <2 x bfloat>, i32 } @llvm.vp.load.ff.v2bf16.p0(ptr, <2 x i1>, i32) + +define { <2 x bfloat>, i32 } @vploadff_v2bf16(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <2 x bfloat>, i32 } @llvm.vp.load.ff.v2bf16.p0(ptr %ptr, <2 x i1> %m, i32 %evl) + ret { <2 x bfloat>, i32 } %load +} + +define { <2 x bfloat>, i32 } @vploadff_v2bf16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v2bf16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement <2 x i1> poison, i1 true, i32 0 + %b = shufflevector <2 x i1> %a, <2 x i1> poison, <2 x i32> zeroinitializer + %load = call { <2 x bfloat>, i32 } @llvm.vp.load.ff.v2bf16.p0(ptr %ptr, <2 x i1> %b, i32 %evl) + ret { <2 x bfloat>, i32 } %load +} + +declare { <4 x bfloat>, i32 } @llvm.vp.load.ff.v4bf16.p0(ptr, <4 x i1>, i32) + +define { <4 x bfloat>, i32 } @vploadff_v4bf16(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <4 x bfloat>, i32 } @llvm.vp.load.ff.v4bf16.p0(ptr %ptr, <4 x i1> %m, i32 %evl) + ret { <4 x bfloat>, i32 } %load +} + +define { <4 x bfloat>, i32 } @vploadff_v4bf16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v4bf16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement <4 x i1> poison, i1 true, i32 0 + %b = shufflevector <4 x i1> %a, <4 x i1> poison, <4 x i32> zeroinitializer + %load = call { <4 x bfloat>, i32 } @llvm.vp.load.ff.v4bf16.p0(ptr %ptr, <4 x i1> %b, i32 %evl) + ret { <4 x bfloat>, i32 } %load +} + +declare { <8 x bfloat>, i32 } @llvm.vp.load.ff.v8bf16.p0(ptr, <8 x i1>, i32) + +define { <8 x bfloat>, i32 } @vploadff_v8bf16(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <8 x bfloat>, i32 } @llvm.vp.load.ff.v8bf16.p0(ptr %ptr, <8 x i1> %m, i32 %evl) + ret { <8 x bfloat>, i32 } %load +} + +define { <8 x bfloat>, i32 } @vploadff_v8bf16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v8bf16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement <8 x i1> poison, i1 true, i32 0 + %b = shufflevector <8 x i1> %a, <8 x i1> poison, <8 x i32> zeroinitializer + %load = call { <8 x bfloat>, i32 } @llvm.vp.load.ff.v8bf16.p0(ptr %ptr, <8 x i1> %b, i32 %evl) + ret { <8 x bfloat>, i32 } %load +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vploadff.ll b/llvm/test/CodeGen/RISCV/rvv/vploadff.ll new file mode 100644 index 0000000000000..11812eec6ac46 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vploadff.ll @@ -0,0 +1,1127 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zvfbfmin,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zvfbfmin,+v \ +; RUN: -verify-machineinstrs < %s | FileCheck %s + +declare { , i32 } @llvm.vp.load.ff.nxv1i8.p0(ptr, , i32) + +define { , i32 } @vploadff_nxv1i8(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv1i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vle8ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { , i32 } @llvm.vp.load.ff.nxv1i8.p0(ptr %ptr, %m, i32 %evl) + ret { , i32 } %load +} + +define { , i32 } @vploadff_nxv1i8_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv1i8_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma +; CHECK-NEXT: vle8ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement poison, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + %load = call { , i32 } @llvm.vp.load.ff.nxv1i8.p0(ptr %ptr, %b, i32 %evl) + ret { , i32 } %load +} + +declare { , i32 } @llvm.vp.load.ff.nxv2i8.p0(ptr, , i32) + +define { , i32 } @vploadff_nxv2i8(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vle8ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { , i32 } @llvm.vp.load.ff.nxv2i8.p0(ptr %ptr, %m, i32 %evl) + ret { , i32 } %load +} + +define { , i32 } @vploadff_nxv2i8_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv2i8_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma +; CHECK-NEXT: vle8ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement poison, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + %load = call { , i32 } @llvm.vp.load.ff.nxv2i8.p0(ptr %ptr, %b, i32 %evl) + ret { , i32 } %load +} + +declare { , i32 } @llvm.vp.load.ff.nxv4i8.p0(ptr, , i32) + +define { , i32 } @vploadff_nxv4i8(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv4i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vle8ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { , i32 } @llvm.vp.load.ff.nxv4i8.p0(ptr %ptr, %m, i32 %evl) + ret { , i32 } %load +} + +define { , i32 } @vploadff_nxv4i8_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv4i8_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vle8ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement poison, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + %load = call { , i32 } @llvm.vp.load.ff.nxv4i8.p0(ptr %ptr, %b, i32 %evl) + ret { , i32 } %load +} + +declare { , i32 } @llvm.vp.load.ff.nxv8i8.p0(ptr, , i32) + +define { , i32 } @vploadff_nxv8i8(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv8i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vle8ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { , i32 } @llvm.vp.load.ff.nxv8i8.p0(ptr %ptr, %m, i32 %evl) + ret { , i32 } %load +} + +define { , i32 } @vploadff_nxv8i8_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv8i8_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma +; CHECK-NEXT: vle8ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement poison, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + %load = call { , i32 } @llvm.vp.load.ff.nxv8i8.p0(ptr %ptr, %b, i32 %evl) + ret { , i32 } %load +} + +declare { , i32 } @llvm.vp.load.ff.nxv16i8.p0(ptr, , i32) + +define { , i32 } @vploadff_nxv16i8(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv16i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vle8ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { , i32 } @llvm.vp.load.ff.nxv16i8.p0(ptr %ptr, %m, i32 %evl) + ret { , i32 } %load +} + +define { , i32 } @vploadff_nxv16i8_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv16i8_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma +; CHECK-NEXT: vle8ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement poison, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + %load = call { , i32 } @llvm.vp.load.ff.nxv16i8.p0(ptr %ptr, %b, i32 %evl) + ret { , i32 } %load +} + +declare { , i32 } @llvm.vp.load.ff.nxv32i8.p0(ptr, , i32) + +define { , i32 } @vploadff_nxv32i8(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv32i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT: vle8ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { , i32 } @llvm.vp.load.ff.nxv32i8.p0(ptr %ptr, %m, i32 %evl) + ret { , i32 } %load +} + +define { , i32 } @vploadff_nxv32i8_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv32i8_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma +; CHECK-NEXT: vle8ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement poison, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + %load = call { , i32 } @llvm.vp.load.ff.nxv32i8.p0(ptr %ptr, %b, i32 %evl) + ret { , i32 } %load +} + +declare { , i32 } @llvm.vp.load.ff.nxv64i8.p0(ptr, , i32) + +define { , i32 } @vploadff_nxv64i8(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv64i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vle8ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { , i32 } @llvm.vp.load.ff.nxv64i8.p0(ptr %ptr, %m, i32 %evl) + ret { , i32 } %load +} + +define { , i32 } @vploadff_nxv64i8_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv64i8_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, m8, ta, ma +; CHECK-NEXT: vle8ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement poison, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + %load = call { , i32 } @llvm.vp.load.ff.nxv64i8.p0(ptr %ptr, %b, i32 %evl) + ret { , i32 } %load +} + +declare { , i32 } @llvm.vp.load.ff.nxv1i16.p0(ptr, , i32) + +define { , i32 } @vploadff_nxv1i16(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv1i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { , i32 } @llvm.vp.load.ff.nxv1i16.p0(ptr %ptr, %m, i32 %evl) + ret { , i32 } %load +} + +define { , i32 } @vploadff_nxv1i16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv1i16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement poison, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + %load = call { , i32 } @llvm.vp.load.ff.nxv1i16.p0(ptr %ptr, %b, i32 %evl) + ret { , i32 } %load +} + +declare { , i32 } @llvm.vp.load.ff.nxv2i16.p0(ptr, , i32) + +define { , i32 } @vploadff_nxv2i16(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv2i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { , i32 } @llvm.vp.load.ff.nxv2i16.p0(ptr %ptr, %m, i32 %evl) + ret { , i32 } %load +} + +define { , i32 } @vploadff_nxv2i16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv2i16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement poison, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + %load = call { , i32 } @llvm.vp.load.ff.nxv2i16.p0(ptr %ptr, %b, i32 %evl) + ret { , i32 } %load +} + +declare { , i32 } @llvm.vp.load.ff.nxv4i16.p0(ptr, , i32) + +define { , i32 } @vploadff_nxv4i16(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv4i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { , i32 } @llvm.vp.load.ff.nxv4i16.p0(ptr %ptr, %m, i32 %evl) + ret { , i32 } %load +} + +define { , i32 } @vploadff_nxv4i16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv4i16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement poison, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + %load = call { , i32 } @llvm.vp.load.ff.nxv4i16.p0(ptr %ptr, %b, i32 %evl) + ret { , i32 } %load +} + +declare { , i32 } @llvm.vp.load.ff.nxv8i16.p0(ptr, , i32) + +define { , i32 } @vploadff_nxv8i16(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv8i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { , i32 } @llvm.vp.load.ff.nxv8i16.p0(ptr %ptr, %m, i32 %evl) + ret { , i32 } %load +} + +define { , i32 } @vploadff_nxv8i16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv8i16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement poison, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + %load = call { , i32 } @llvm.vp.load.ff.nxv8i16.p0(ptr %ptr, %b, i32 %evl) + ret { , i32 } %load +} + +declare { , i32 } @llvm.vp.load.ff.nxv16i16.p0(ptr, , i32) + +define { , i32 } @vploadff_nxv16i16(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv16i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { , i32 } @llvm.vp.load.ff.nxv16i16.p0(ptr %ptr, %m, i32 %evl) + ret { , i32 } %load +} + +define { , i32 } @vploadff_nxv16i16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv16i16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement poison, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + %load = call { , i32 } @llvm.vp.load.ff.nxv16i16.p0(ptr %ptr, %b, i32 %evl) + ret { , i32 } %load +} + +declare { , i32 } @llvm.vp.load.ff.nxv32i16.p0(ptr, , i32) + +define { , i32 } @vploadff_nxv32i16(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv32i16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { , i32 } @llvm.vp.load.ff.nxv32i16.p0(ptr %ptr, %m, i32 %evl) + ret { , i32 } %load +} + +define { , i32 } @vploadff_nxv32i16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv32i16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement poison, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + %load = call { , i32 } @llvm.vp.load.ff.nxv32i16.p0(ptr %ptr, %b, i32 %evl) + ret { , i32 } %load +} + +declare { , i32 } @llvm.vp.load.ff.nxv1i32.p0(ptr, , i32) + +define { , i32 } @vploadff_nxv1i32(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv1i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { , i32 } @llvm.vp.load.ff.nxv1i32.p0(ptr %ptr, %m, i32 %evl) + ret { , i32 } %load +} + +define { , i32 } @vploadff_nxv1i32_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv1i32_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement poison, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + %load = call { , i32 } @llvm.vp.load.ff.nxv1i32.p0(ptr %ptr, %b, i32 %evl) + ret { , i32 } %load +} + +declare { , i32 } @llvm.vp.load.ff.nxv2i32.p0(ptr, , i32) + +define { , i32 } @vploadff_nxv2i32(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv2i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { , i32 } @llvm.vp.load.ff.nxv2i32.p0(ptr %ptr, %m, i32 %evl) + ret { , i32 } %load +} + +define { , i32 } @vploadff_nxv2i32_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv2i32_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement poison, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + %load = call { , i32 } @llvm.vp.load.ff.nxv2i32.p0(ptr %ptr, %b, i32 %evl) + ret { , i32 } %load +} + +declare { , i32 } @llvm.vp.load.ff.nxv4i32.p0(ptr, , i32) + +define { , i32 } @vploadff_nxv4i32(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv4i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { , i32 } @llvm.vp.load.ff.nxv4i32.p0(ptr %ptr, %m, i32 %evl) + ret { , i32 } %load +} + +define { , i32 } @vploadff_nxv4i32_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv4i32_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement poison, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + %load = call { , i32 } @llvm.vp.load.ff.nxv4i32.p0(ptr %ptr, %b, i32 %evl) + ret { , i32 } %load +} + +declare { , i32 } @llvm.vp.load.ff.nxv8i32.p0(ptr, , i32) + +define { , i32 } @vploadff_nxv8i32(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv8i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { , i32 } @llvm.vp.load.ff.nxv8i32.p0(ptr %ptr, %m, i32 %evl) + ret { , i32 } %load +} + +define { , i32 } @vploadff_nxv8i32_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv8i32_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement poison, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + %load = call { , i32 } @llvm.vp.load.ff.nxv8i32.p0(ptr %ptr, %b, i32 %evl) + ret { , i32 } %load +} + +declare { , i32 } @llvm.vp.load.ff.nxv16i32.p0(ptr, , i32) + +define { , i32 } @vploadff_nxv16i32(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv16i32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { , i32 } @llvm.vp.load.ff.nxv16i32.p0(ptr %ptr, %m, i32 %evl) + ret { , i32 } %load +} + +define { , i32 } @vploadff_nxv16i32_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv16i32_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement poison, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + %load = call { , i32 } @llvm.vp.load.ff.nxv16i32.p0(ptr %ptr, %b, i32 %evl) + ret { , i32 } %load +} + +declare { , i32 } @llvm.vp.load.ff.nxv1i64.p0(ptr, , i32) + +define { , i32 } @vploadff_nxv1i64(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv1i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { , i32 } @llvm.vp.load.ff.nxv1i64.p0(ptr %ptr, %m, i32 %evl) + ret { , i32 } %load +} + +define { , i32 } @vploadff_nxv1i64_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv1i64_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement poison, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + %load = call { , i32 } @llvm.vp.load.ff.nxv1i64.p0(ptr %ptr, %b, i32 %evl) + ret { , i32 } %load +} + +declare { , i32 } @llvm.vp.load.ff.nxv2i64.p0(ptr, , i32) + +define { , i32 } @vploadff_nxv2i64(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv2i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { , i32 } @llvm.vp.load.ff.nxv2i64.p0(ptr %ptr, %m, i32 %evl) + ret { , i32 } %load +} + +define { , i32 } @vploadff_nxv2i64_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv2i64_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement poison, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + %load = call { , i32 } @llvm.vp.load.ff.nxv2i64.p0(ptr %ptr, %b, i32 %evl) + ret { , i32 } %load +} + +declare { , i32 } @llvm.vp.load.ff.nxv4i64.p0(ptr, , i32) + +define { , i32 } @vploadff_nxv4i64(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv4i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { , i32 } @llvm.vp.load.ff.nxv4i64.p0(ptr %ptr, %m, i32 %evl) + ret { , i32 } %load +} + +define { , i32 } @vploadff_nxv4i64_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv4i64_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement poison, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + %load = call { , i32 } @llvm.vp.load.ff.nxv4i64.p0(ptr %ptr, %b, i32 %evl) + ret { , i32 } %load +} + +declare { , i32 } @llvm.vp.load.ff.nxv8i64.p0(ptr, , i32) + +define { , i32 } @vploadff_nxv8i64(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv8i64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { , i32 } @llvm.vp.load.ff.nxv8i64.p0(ptr %ptr, %m, i32 %evl) + ret { , i32 } %load +} + +define { , i32 } @vploadff_nxv8i64_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv8i64_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement poison, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + %load = call { , i32 } @llvm.vp.load.ff.nxv8i64.p0(ptr %ptr, %b, i32 %evl) + ret { , i32 } %load +} + +declare { , i32 } @llvm.vp.load.ff.nxv1f16.p0(ptr, , i32) + +define { , i32 } @vploadff_nxv1f16(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv1f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { , i32 } @llvm.vp.load.ff.nxv1f16.p0(ptr %ptr, %m, i32 %evl) + ret { , i32 } %load +} + +define { , i32 } @vploadff_nxv1f16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv1f16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement poison, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + %load = call { , i32 } @llvm.vp.load.ff.nxv1f16.p0(ptr %ptr, %b, i32 %evl) + ret { , i32 } %load +} + +declare { , i32 } @llvm.vp.load.ff.nxv2f16.p0(ptr, , i32) + +define { , i32 } @vploadff_nxv2f16(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv2f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { , i32 } @llvm.vp.load.ff.nxv2f16.p0(ptr %ptr, %m, i32 %evl) + ret { , i32 } %load +} + +define { , i32 } @vploadff_nxv2f16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv2f16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement poison, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + %load = call { , i32 } @llvm.vp.load.ff.nxv2f16.p0(ptr %ptr, %b, i32 %evl) + ret { , i32 } %load +} + +declare { , i32 } @llvm.vp.load.ff.nxv4f16.p0(ptr, , i32) + +define { , i32 } @vploadff_nxv4f16(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { , i32 } @llvm.vp.load.ff.nxv4f16.p0(ptr %ptr, %m, i32 %evl) + ret { , i32 } %load +} + +define { , i32 } @vploadff_nxv4f16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv4f16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement poison, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + %load = call { , i32 } @llvm.vp.load.ff.nxv4f16.p0(ptr %ptr, %b, i32 %evl) + ret { , i32 } %load +} + +declare { , i32 } @llvm.vp.load.ff.nxv8f16.p0(ptr, , i32) + +define { , i32 } @vploadff_nxv8f16(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv8f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { , i32 } @llvm.vp.load.ff.nxv8f16.p0(ptr %ptr, %m, i32 %evl) + ret { , i32 } %load +} + +define { , i32 } @vploadff_nxv8f16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv8f16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement poison, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + %load = call { , i32 } @llvm.vp.load.ff.nxv8f16.p0(ptr %ptr, %b, i32 %evl) + ret { , i32 } %load +} + +declare { , i32 } @llvm.vp.load.ff.nxv16f16.p0(ptr, , i32) + +define { , i32 } @vploadff_nxv16f16(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv16f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { , i32 } @llvm.vp.load.ff.nxv16f16.p0(ptr %ptr, %m, i32 %evl) + ret { , i32 } %load +} + +define { , i32 } @vploadff_nxv16f16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv16f16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement poison, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + %load = call { , i32 } @llvm.vp.load.ff.nxv16f16.p0(ptr %ptr, %b, i32 %evl) + ret { , i32 } %load +} + +declare { , i32 } @llvm.vp.load.ff.nxv32f16.p0(ptr, , i32) + +define { , i32 } @vploadff_nxv32f16(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv32f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { , i32 } @llvm.vp.load.ff.nxv32f16.p0(ptr %ptr, %m, i32 %evl) + ret { , i32 } %load +} + +define { , i32 } @vploadff_nxv32f16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv32f16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement poison, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + %load = call { , i32 } @llvm.vp.load.ff.nxv32f16.p0(ptr %ptr, %b, i32 %evl) + ret { , i32 } %load +} + +declare { , i32 } @llvm.vp.load.ff.nxv1f32.p0(ptr, , i32) + +define { , i32 } @vploadff_nxv1f32(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv1f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { , i32 } @llvm.vp.load.ff.nxv1f32.p0(ptr %ptr, %m, i32 %evl) + ret { , i32 } %load +} + +define { , i32 } @vploadff_nxv1f32_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv1f32_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement poison, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + %load = call { , i32 } @llvm.vp.load.ff.nxv1f32.p0(ptr %ptr, %b, i32 %evl) + ret { , i32 } %load +} + +declare { , i32 } @llvm.vp.load.ff.nxv2f32.p0(ptr, , i32) + +define { , i32 } @vploadff_nxv2f32(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv2f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { , i32 } @llvm.vp.load.ff.nxv2f32.p0(ptr %ptr, %m, i32 %evl) + ret { , i32 } %load +} + +define { , i32 } @vploadff_nxv2f32_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv2f32_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement poison, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + %load = call { , i32 } @llvm.vp.load.ff.nxv2f32.p0(ptr %ptr, %b, i32 %evl) + ret { , i32 } %load +} + +declare { , i32 } @llvm.vp.load.ff.nxv4f32.p0(ptr, , i32) + +define { , i32 } @vploadff_nxv4f32(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv4f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { , i32 } @llvm.vp.load.ff.nxv4f32.p0(ptr %ptr, %m, i32 %evl) + ret { , i32 } %load +} + +define { , i32 } @vploadff_nxv4f32_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv4f32_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement poison, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + %load = call { , i32 } @llvm.vp.load.ff.nxv4f32.p0(ptr %ptr, %b, i32 %evl) + ret { , i32 } %load +} + +declare { , i32 } @llvm.vp.load.ff.nxv8f32.p0(ptr, , i32) + +define { , i32 } @vploadff_nxv8f32(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv8f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { , i32 } @llvm.vp.load.ff.nxv8f32.p0(ptr %ptr, %m, i32 %evl) + ret { , i32 } %load +} + +define { , i32 } @vploadff_nxv8f32_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv8f32_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement poison, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + %load = call { , i32 } @llvm.vp.load.ff.nxv8f32.p0(ptr %ptr, %b, i32 %evl) + ret { , i32 } %load +} + +declare { , i32 } @llvm.vp.load.ff.nxv16f32.p0(ptr, , i32) + +define { , i32 } @vploadff_nxv16f32(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv16f32: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { , i32 } @llvm.vp.load.ff.nxv16f32.p0(ptr %ptr, %m, i32 %evl) + ret { , i32 } %load +} + +define { , i32 } @vploadff_nxv16f32_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv16f32_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e32, m8, ta, ma +; CHECK-NEXT: vle32ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement poison, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + %load = call { , i32 } @llvm.vp.load.ff.nxv16f32.p0(ptr %ptr, %b, i32 %evl) + ret { , i32 } %load +} + +declare { , i32 } @llvm.vp.load.ff.nxv1f64.p0(ptr, , i32) + +define { , i32 } @vploadff_nxv1f64(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv1f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { , i32 } @llvm.vp.load.ff.nxv1f64.p0(ptr %ptr, %m, i32 %evl) + ret { , i32 } %load +} + +define { , i32 } @vploadff_nxv1f64_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv1f64_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement poison, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + %load = call { , i32 } @llvm.vp.load.ff.nxv1f64.p0(ptr %ptr, %b, i32 %evl) + ret { , i32 } %load +} + +declare { , i32 } @llvm.vp.load.ff.nxv2f64.p0(ptr, , i32) + +define { , i32 } @vploadff_nxv2f64(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv2f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { , i32 } @llvm.vp.load.ff.nxv2f64.p0(ptr %ptr, %m, i32 %evl) + ret { , i32 } %load +} + +define { , i32 } @vploadff_nxv2f64_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv2f64_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement poison, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + %load = call { , i32 } @llvm.vp.load.ff.nxv2f64.p0(ptr %ptr, %b, i32 %evl) + ret { , i32 } %load +} + +declare { , i32 } @llvm.vp.load.ff.nxv4f64.p0(ptr, , i32) + +define { , i32 } @vploadff_nxv4f64(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv4f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { , i32 } @llvm.vp.load.ff.nxv4f64.p0(ptr %ptr, %m, i32 %evl) + ret { , i32 } %load +} + +define { , i32 } @vploadff_nxv4f64_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv4f64_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement poison, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + %load = call { , i32 } @llvm.vp.load.ff.nxv4f64.p0(ptr %ptr, %b, i32 %evl) + ret { , i32 } %load +} + +declare { , i32 } @llvm.vp.load.ff.nxv8f64.p0(ptr, , i32) + +define { , i32 } @vploadff_nxv8f64(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv8f64: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { , i32 } @llvm.vp.load.ff.nxv8f64.p0(ptr %ptr, %m, i32 %evl) + ret { , i32 } %load +} + +define { , i32 } @vploadff_nxv8f64_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv8f64_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e64, m8, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement poison, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + %load = call { , i32 } @llvm.vp.load.ff.nxv8f64.p0(ptr %ptr, %b, i32 %evl) + ret { , i32 } %load +} + +declare { , i32 } @llvm.vp.load.ff.nxv1bf16.p0(ptr, , i32) + +define { , i32 } @vploadff_nxv1bf16(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv1bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { , i32 } @llvm.vp.load.ff.nxv1bf16.p0(ptr %ptr, %m, i32 %evl) + ret { , i32 } %load +} + +define { , i32 } @vploadff_nxv1bf16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv1bf16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement poison, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + %load = call { , i32 } @llvm.vp.load.ff.nxv1bf16.p0(ptr %ptr, %b, i32 %evl) + ret { , i32 } %load +} + +declare { , i32 } @llvm.vp.load.ff.nxv2bf16.p0(ptr, , i32) + +define { , i32 } @vploadff_nxv2bf16(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv2bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { , i32 } @llvm.vp.load.ff.nxv2bf16.p0(ptr %ptr, %m, i32 %evl) + ret { , i32 } %load +} + +define { , i32 } @vploadff_nxv2bf16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv2bf16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement poison, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + %load = call { , i32 } @llvm.vp.load.ff.nxv2bf16.p0(ptr %ptr, %b, i32 %evl) + ret { , i32 } %load +} + +declare { , i32 } @llvm.vp.load.ff.nxv4bf16.p0(ptr, , i32) + +define { , i32 } @vploadff_nxv4bf16(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { , i32 } @llvm.vp.load.ff.nxv4bf16.p0(ptr %ptr, %m, i32 %evl) + ret { , i32 } %load +} + +define { , i32 } @vploadff_nxv4bf16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv4bf16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement poison, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + %load = call { , i32 } @llvm.vp.load.ff.nxv4bf16.p0(ptr %ptr, %b, i32 %evl) + ret { , i32 } %load +} + +declare { , i32 } @llvm.vp.load.ff.nxv8bf16.p0(ptr, , i32) + +define { , i32 } @vploadff_nxv8bf16(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv8bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { , i32 } @llvm.vp.load.ff.nxv8bf16.p0(ptr %ptr, %m, i32 %evl) + ret { , i32 } %load +} + +define { , i32 } @vploadff_nxv8bf16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv8bf16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement poison, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + %load = call { , i32 } @llvm.vp.load.ff.nxv8bf16.p0(ptr %ptr, %b, i32 %evl) + ret { , i32 } %load +} + +declare { , i32 } @llvm.vp.load.ff.nxv16bf16.p0(ptr, , i32) + +define { , i32 } @vploadff_nxv16bf16(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv16bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { , i32 } @llvm.vp.load.ff.nxv16bf16.p0(ptr %ptr, %m, i32 %evl) + ret { , i32 } %load +} + +define { , i32 } @vploadff_nxv16bf16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv16bf16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement poison, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + %load = call { , i32 } @llvm.vp.load.ff.nxv16bf16.p0(ptr %ptr, %b, i32 %evl) + ret { , i32 } %load +} + +declare { , i32 } @llvm.vp.load.ff.nxv32bf16.p0(ptr, , i32) + +define { , i32 } @vploadff_nxv32bf16(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv32bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { , i32 } @llvm.vp.load.ff.nxv32bf16.p0(ptr %ptr, %m, i32 %evl) + ret { , i32 } %load +} + +define { , i32 } @vploadff_nxv32bf16_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv32bf16_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e16, m8, ta, ma +; CHECK-NEXT: vle16ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %a = insertelement poison, i1 true, i32 0 + %b = shufflevector %a, poison, zeroinitializer + %load = call { , i32 } @llvm.vp.load.ff.nxv32bf16.p0(ptr %ptr, %b, i32 %evl) + ret { , i32 } %load +} diff --git a/llvm/unittests/IR/VPIntrinsicTest.cpp b/llvm/unittests/IR/VPIntrinsicTest.cpp index d6ad7599ce461..a101979ee6a4a 100644 --- a/llvm/unittests/IR/VPIntrinsicTest.cpp +++ b/llvm/unittests/IR/VPIntrinsicTest.cpp @@ -100,6 +100,8 @@ class VPIntrinsicTest : public testing::Test { "i32*>, <8 x i1>, i32) "; Str << " declare <8 x i32> @llvm.vp.load.v8i32.p0v8i32(<8 x i32>*, <8 x " "i1>, i32) "; + Str << " declare {<8 x i32>, i32} @llvm.vp.load.ff.v8i32.p0v8i32(<8 x " + "i32>*, <8 x i1>, i32) "; Str << "declare <8 x i32> " "@llvm.experimental.vp.strided.load.v8i32.i32(i32*, i32, <8 " "x i1>, i32) "; From 2fb476edf27c38f52bce772f2ce52e7891c5b71d Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 3 Mar 2025 16:13:07 -0800 Subject: [PATCH 02/14] fixup! Add documentation. --- llvm/docs/LangRef.rst | 57 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 33c85c7ba9d29..9acda87804ec7 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -23943,6 +23943,63 @@ Examples: %also.r = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %ptr, i32 2, <8 x i1> %mask, <8 x i8> poison) +.. _int_vp_ff_load: + +'``llvm.vp.ff.load``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" +This is an overloaded intrinsic. + +:: + + declare {<4 x float>, i32} @llvm.vp.load.ff.v4f32.p0(ptr %ptr, <4 x i1> %mask, i32 %evl) + declare {, i32} @llvm.vp.load.ff.nxv2i16.p0(ptr %ptr, %mask, i32 %evl) + declare {<8 x float>, i32} @llvm.vp.load.ff.v8f32.p1(ptr addrspace(1) %ptr, <8 x i1> %mask, i32 %evl) + declare {, i32} @llvm.vp.load.ff.nxv1i64.p6(ptr addrspace(6) %ptr, %mask, i32 %evl) + +Overview: +""""""""" + +The '``llvm.vp.load.ff.*``' intrinsic is similar to '``llvm.vp.load.*``', but +will not trap if there are not ``evl`` readable elements at the pointer. + +Arguments: +"""""""""" + +The first argument is the base pointer for the load. The second argument is a +vector of boolean values with the same number of elements as the first return +type. The third is the explicit vector length of the operation. The first +return type and underlying type of the base pointer are the same vector types. + +The :ref:`align ` parameter attribute can be provided for the first +argument. + +Semantics: +"""""""""" + +The '``llvm.vp.load.ff``' intrinsic reads a vector from memory similar to +'``llvm.vp.load``, but will only trap if the first lane is unreadable. If +any other lane is unreadable, the number of successfully read lanes will +be returned in the second return value. The result in the first return value +for the lanes that were not successfully read is +:ref:`poison value `. If ``evl`` is 0, no read occurs and thus no +trap can occur for the first lane. If ``mask`` is 0 for the first lane, no +trap occurs. This intrinsic is allowed to read fewer than ``evl`` lanes even +if no trap would occur. If ``evl`` is non-zero, the result in the second result +must be at least 1 even if the first lane is disabled by ``mask``. + +The default alignment is taken as the ABI alignment of the first return +type as specified by the :ref:`datalayout string`. + +Examples: +""""""""" + +.. code-block:: text + + %r = call {<8 x i8>, i32} @llvm.vp.load.ff.v8i8.p0(ptr align 2 %ptr, <8 x i1> %mask, i32 %evl) + .. _int_vp_store: '``llvm.vp.store``' Intrinsic From 4f2fbff97a72284d3d46184fa5b0af64096d85c7 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 3 Mar 2025 17:57:32 -0800 Subject: [PATCH 03/14] fixup! remove intrinsic declare and use splat --- .../RISCV/rvv/fixed-vectors-vploadff.ll | 144 ++-------- llvm/test/CodeGen/RISCV/rvv/vploadff.ll | 258 +++--------------- 2 files changed, 67 insertions(+), 335 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vploadff.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vploadff.ll index 9f982293256ac..474f859e9789d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vploadff.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vploadff.ll @@ -8,8 +8,6 @@ ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zvfbfmin,+v \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -declare { <2 x i8>, i32 } @llvm.vp.load.ff.v2i8.p0(ptr, <2 x i1>, i32) - define { <2 x i8>, i32 } @vploadff_v2i8(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_v2i8: ; CHECK: # %bb.0: @@ -28,14 +26,10 @@ define { <2 x i8>, i32 } @vploadff_v2i8_allones_mask(ptr %ptr, i32 zeroext %evl) ; CHECK-NEXT: vle8ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement <2 x i1> poison, i1 true, i32 0 - %b = shufflevector <2 x i1> %a, <2 x i1> poison, <2 x i32> zeroinitializer - %load = call { <2 x i8>, i32 } @llvm.vp.load.ff.v2i8.p0(ptr %ptr, <2 x i1> %b, i32 %evl) + %load = call { <2 x i8>, i32 } @llvm.vp.load.ff.v2i8.p0(ptr %ptr, <2 x i1> splat (i1 true), i32 %evl) ret { <2 x i8>, i32 } %load } -declare { <4 x i8>, i32 } @llvm.vp.load.ff.v4i8.p0(ptr, <4 x i1>, i32) - define { <4 x i8>, i32 } @vploadff_v4i8(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_v4i8: ; CHECK: # %bb.0: @@ -54,14 +48,10 @@ define { <4 x i8>, i32 } @vploadff_v4i8_allones_mask(ptr %ptr, i32 zeroext %evl) ; CHECK-NEXT: vle8ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement <4 x i1> poison, i1 true, i32 0 - %b = shufflevector <4 x i1> %a, <4 x i1> poison, <4 x i32> zeroinitializer - %load = call { <4 x i8>, i32 } @llvm.vp.load.ff.v4i8.p0(ptr %ptr, <4 x i1> %b, i32 %evl) + %load = call { <4 x i8>, i32 } @llvm.vp.load.ff.v4i8.p0(ptr %ptr, <4 x i1> splat (i1 true), i32 %evl) ret { <4 x i8>, i32 } %load } -declare { <8 x i8>, i32 } @llvm.vp.load.ff.v8i8.p0(ptr, <8 x i1>, i32) - define { <8 x i8>, i32 } @vploadff_v8i8(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_v8i8: ; CHECK: # %bb.0: @@ -80,14 +70,10 @@ define { <8 x i8>, i32 } @vploadff_v8i8_allones_mask(ptr %ptr, i32 zeroext %evl) ; CHECK-NEXT: vle8ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement <8 x i1> poison, i1 true, i32 0 - %b = shufflevector <8 x i1> %a, <8 x i1> poison, <8 x i32> zeroinitializer - %load = call { <8 x i8>, i32 } @llvm.vp.load.ff.v8i8.p0(ptr %ptr, <8 x i1> %b, i32 %evl) + %load = call { <8 x i8>, i32 } @llvm.vp.load.ff.v8i8.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %evl) ret { <8 x i8>, i32 } %load } -declare { <2 x i16>, i32 } @llvm.vp.load.ff.v2i16.p0(ptr, <2 x i1>, i32) - define { <2 x i16>, i32 } @vploadff_v2i16(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_v2i16: ; CHECK: # %bb.0: @@ -106,14 +92,10 @@ define { <2 x i16>, i32 } @vploadff_v2i16_allones_mask(ptr %ptr, i32 zeroext %ev ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement <2 x i1> poison, i1 true, i32 0 - %b = shufflevector <2 x i1> %a, <2 x i1> poison, <2 x i32> zeroinitializer - %load = call { <2 x i16>, i32 } @llvm.vp.load.ff.v2i16.p0(ptr %ptr, <2 x i1> %b, i32 %evl) + %load = call { <2 x i16>, i32 } @llvm.vp.load.ff.v2i16.p0(ptr %ptr, <2 x i1> splat (i1 true), i32 %evl) ret { <2 x i16>, i32 } %load } -declare { <4 x i16>, i32 } @llvm.vp.load.ff.v4i16.p0(ptr, <4 x i1>, i32) - define { <4 x i16>, i32 } @vploadff_v4i16(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_v4i16: ; CHECK: # %bb.0: @@ -132,14 +114,10 @@ define { <4 x i16>, i32 } @vploadff_v4i16_allones_mask(ptr %ptr, i32 zeroext %ev ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement <4 x i1> poison, i1 true, i32 0 - %b = shufflevector <4 x i1> %a, <4 x i1> poison, <4 x i32> zeroinitializer - %load = call { <4 x i16>, i32 } @llvm.vp.load.ff.v4i16.p0(ptr %ptr, <4 x i1> %b, i32 %evl) + %load = call { <4 x i16>, i32 } @llvm.vp.load.ff.v4i16.p0(ptr %ptr, <4 x i1> splat (i1 true), i32 %evl) ret { <4 x i16>, i32 } %load } -declare { <8 x i16>, i32 } @llvm.vp.load.ff.v8i16.p0(ptr, <8 x i1>, i32) - define { <8 x i16>, i32 } @vploadff_v8i16(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_v8i16: ; CHECK: # %bb.0: @@ -158,14 +136,10 @@ define { <8 x i16>, i32 } @vploadff_v8i16_allones_mask(ptr %ptr, i32 zeroext %ev ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement <8 x i1> poison, i1 true, i32 0 - %b = shufflevector <8 x i1> %a, <8 x i1> poison, <8 x i32> zeroinitializer - %load = call { <8 x i16>, i32 } @llvm.vp.load.ff.v8i16.p0(ptr %ptr, <8 x i1> %b, i32 %evl) + %load = call { <8 x i16>, i32 } @llvm.vp.load.ff.v8i16.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %evl) ret { <8 x i16>, i32 } %load } -declare { <2 x i32>, i32 } @llvm.vp.load.ff.v2i32.p0(ptr, <2 x i1>, i32) - define { <2 x i32>, i32 } @vploadff_v2i32(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_v2i32: ; CHECK: # %bb.0: @@ -184,14 +158,10 @@ define { <2 x i32>, i32 } @vploadff_v2i32_allones_mask(ptr %ptr, i32 zeroext %ev ; CHECK-NEXT: vle32ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement <2 x i1> poison, i1 true, i32 0 - %b = shufflevector <2 x i1> %a, <2 x i1> poison, <2 x i32> zeroinitializer - %load = call { <2 x i32>, i32 } @llvm.vp.load.ff.v2i32.p0(ptr %ptr, <2 x i1> %b, i32 %evl) + %load = call { <2 x i32>, i32 } @llvm.vp.load.ff.v2i32.p0(ptr %ptr, <2 x i1> splat (i1 true), i32 %evl) ret { <2 x i32>, i32 } %load } -declare { <4 x i32>, i32 } @llvm.vp.load.ff.v4i32.p0(ptr, <4 x i1>, i32) - define { <4 x i32>, i32 } @vploadff_v4i32(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_v4i32: ; CHECK: # %bb.0: @@ -210,14 +180,10 @@ define { <4 x i32>, i32 } @vploadff_v4i32_allones_mask(ptr %ptr, i32 zeroext %ev ; CHECK-NEXT: vle32ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement <4 x i1> poison, i1 true, i32 0 - %b = shufflevector <4 x i1> %a, <4 x i1> poison, <4 x i32> zeroinitializer - %load = call { <4 x i32>, i32 } @llvm.vp.load.ff.v4i32.p0(ptr %ptr, <4 x i1> %b, i32 %evl) + %load = call { <4 x i32>, i32 } @llvm.vp.load.ff.v4i32.p0(ptr %ptr, <4 x i1> splat (i1 true), i32 %evl) ret { <4 x i32>, i32 } %load } -declare { <8 x i32>, i32 } @llvm.vp.load.ff.v8i32.p0(ptr, <8 x i1>, i32) - define { <8 x i32>, i32 } @vploadff_v8i32(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_v8i32: ; CHECK: # %bb.0: @@ -236,14 +202,10 @@ define { <8 x i32>, i32 } @vploadff_v8i32_allones_mask(ptr %ptr, i32 zeroext %ev ; CHECK-NEXT: vle32ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement <8 x i1> poison, i1 true, i32 0 - %b = shufflevector <8 x i1> %a, <8 x i1> poison, <8 x i32> zeroinitializer - %load = call { <8 x i32>, i32 } @llvm.vp.load.ff.v8i32.p0(ptr %ptr, <8 x i1> %b, i32 %evl) + %load = call { <8 x i32>, i32 } @llvm.vp.load.ff.v8i32.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %evl) ret { <8 x i32>, i32 } %load } -declare { <2 x i64>, i32 } @llvm.vp.load.ff.v2i64.p0(ptr, <2 x i1>, i32) - define { <2 x i64>, i32 } @vploadff_v2i64(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_v2i64: ; CHECK: # %bb.0: @@ -262,14 +224,10 @@ define { <2 x i64>, i32 } @vploadff_v2i64_allones_mask(ptr %ptr, i32 zeroext %ev ; CHECK-NEXT: vle64ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement <2 x i1> poison, i1 true, i32 0 - %b = shufflevector <2 x i1> %a, <2 x i1> poison, <2 x i32> zeroinitializer - %load = call { <2 x i64>, i32 } @llvm.vp.load.ff.v2i64.p0(ptr %ptr, <2 x i1> %b, i32 %evl) + %load = call { <2 x i64>, i32 } @llvm.vp.load.ff.v2i64.p0(ptr %ptr, <2 x i1> splat (i1 true), i32 %evl) ret { <2 x i64>, i32 } %load } -declare { <4 x i64>, i32 } @llvm.vp.load.ff.v4i64.p0(ptr, <4 x i1>, i32) - define { <4 x i64>, i32 } @vploadff_v4i64(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_v4i64: ; CHECK: # %bb.0: @@ -288,14 +246,10 @@ define { <4 x i64>, i32 } @vploadff_v4i64_allones_mask(ptr %ptr, i32 zeroext %ev ; CHECK-NEXT: vle64ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement <4 x i1> poison, i1 true, i32 0 - %b = shufflevector <4 x i1> %a, <4 x i1> poison, <4 x i32> zeroinitializer - %load = call { <4 x i64>, i32 } @llvm.vp.load.ff.v4i64.p0(ptr %ptr, <4 x i1> %b, i32 %evl) + %load = call { <4 x i64>, i32 } @llvm.vp.load.ff.v4i64.p0(ptr %ptr, <4 x i1> splat (i1 true), i32 %evl) ret { <4 x i64>, i32 } %load } -declare { <8 x i64>, i32 } @llvm.vp.load.ff.v8i64.p0(ptr, <8 x i1>, i32) - define { <8 x i64>, i32 } @vploadff_v8i64(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_v8i64: ; CHECK: # %bb.0: @@ -314,14 +268,10 @@ define { <8 x i64>, i32 } @vploadff_v8i64_allones_mask(ptr %ptr, i32 zeroext %ev ; CHECK-NEXT: vle64ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement <8 x i1> poison, i1 true, i32 0 - %b = shufflevector <8 x i1> %a, <8 x i1> poison, <8 x i32> zeroinitializer - %load = call { <8 x i64>, i32 } @llvm.vp.load.ff.v8i64.p0(ptr %ptr, <8 x i1> %b, i32 %evl) + %load = call { <8 x i64>, i32 } @llvm.vp.load.ff.v8i64.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %evl) ret { <8 x i64>, i32 } %load } -declare { <2 x half>, i32 } @llvm.vp.load.ff.v2f16.p0(ptr, <2 x i1>, i32) - define { <2 x half>, i32 } @vploadff_v2f16(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_v2f16: ; CHECK: # %bb.0: @@ -340,14 +290,10 @@ define { <2 x half>, i32 } @vploadff_v2f16_allones_mask(ptr %ptr, i32 zeroext %e ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement <2 x i1> poison, i1 true, i32 0 - %b = shufflevector <2 x i1> %a, <2 x i1> poison, <2 x i32> zeroinitializer - %load = call { <2 x half>, i32 } @llvm.vp.load.ff.v2f16.p0(ptr %ptr, <2 x i1> %b, i32 %evl) + %load = call { <2 x half>, i32 } @llvm.vp.load.ff.v2f16.p0(ptr %ptr, <2 x i1> splat (i1 true), i32 %evl) ret { <2 x half>, i32 } %load } -declare { <4 x half>, i32 } @llvm.vp.load.ff.v4f16.p0(ptr, <4 x i1>, i32) - define { <4 x half>, i32 } @vploadff_v4f16(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_v4f16: ; CHECK: # %bb.0: @@ -366,14 +312,10 @@ define { <4 x half>, i32 } @vploadff_v4f16_allones_mask(ptr %ptr, i32 zeroext %e ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement <4 x i1> poison, i1 true, i32 0 - %b = shufflevector <4 x i1> %a, <4 x i1> poison, <4 x i32> zeroinitializer - %load = call { <4 x half>, i32 } @llvm.vp.load.ff.v4f16.p0(ptr %ptr, <4 x i1> %b, i32 %evl) + %load = call { <4 x half>, i32 } @llvm.vp.load.ff.v4f16.p0(ptr %ptr, <4 x i1> splat (i1 true), i32 %evl) ret { <4 x half>, i32 } %load } -declare { <8 x half>, i32 } @llvm.vp.load.ff.v8f16.p0(ptr, <8 x i1>, i32) - define { <8 x half>, i32 } @vploadff_v8f16(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_v8f16: ; CHECK: # %bb.0: @@ -392,14 +334,10 @@ define { <8 x half>, i32 } @vploadff_v8f16_allones_mask(ptr %ptr, i32 zeroext %e ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement <8 x i1> poison, i1 true, i32 0 - %b = shufflevector <8 x i1> %a, <8 x i1> poison, <8 x i32> zeroinitializer - %load = call { <8 x half>, i32 } @llvm.vp.load.ff.v8f16.p0(ptr %ptr, <8 x i1> %b, i32 %evl) + %load = call { <8 x half>, i32 } @llvm.vp.load.ff.v8f16.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %evl) ret { <8 x half>, i32 } %load } -declare { <2 x float>, i32 } @llvm.vp.load.ff.v2f32.p0(ptr, <2 x i1>, i32) - define { <2 x float>, i32 } @vploadff_v2f32(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_v2f32: ; CHECK: # %bb.0: @@ -418,14 +356,10 @@ define { <2 x float>, i32 } @vploadff_v2f32_allones_mask(ptr %ptr, i32 zeroext % ; CHECK-NEXT: vle32ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement <2 x i1> poison, i1 true, i32 0 - %b = shufflevector <2 x i1> %a, <2 x i1> poison, <2 x i32> zeroinitializer - %load = call { <2 x float>, i32 } @llvm.vp.load.ff.v2f32.p0(ptr %ptr, <2 x i1> %b, i32 %evl) + %load = call { <2 x float>, i32 } @llvm.vp.load.ff.v2f32.p0(ptr %ptr, <2 x i1> splat (i1 true), i32 %evl) ret { <2 x float>, i32 } %load } -declare { <4 x float>, i32 } @llvm.vp.load.ff.v4f32.p0(ptr, <4 x i1>, i32) - define { <4 x float>, i32 } @vploadff_v4f32(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_v4f32: ; CHECK: # %bb.0: @@ -444,14 +378,10 @@ define { <4 x float>, i32 } @vploadff_v4f32_allones_mask(ptr %ptr, i32 zeroext % ; CHECK-NEXT: vle32ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement <4 x i1> poison, i1 true, i32 0 - %b = shufflevector <4 x i1> %a, <4 x i1> poison, <4 x i32> zeroinitializer - %load = call { <4 x float>, i32 } @llvm.vp.load.ff.v4f32.p0(ptr %ptr, <4 x i1> %b, i32 %evl) + %load = call { <4 x float>, i32 } @llvm.vp.load.ff.v4f32.p0(ptr %ptr, <4 x i1> splat (i1 true), i32 %evl) ret { <4 x float>, i32 } %load } -declare { <8 x float>, i32 } @llvm.vp.load.ff.v8f32.p0(ptr, <8 x i1>, i32) - define { <8 x float>, i32 } @vploadff_v8f32(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_v8f32: ; CHECK: # %bb.0: @@ -470,14 +400,10 @@ define { <8 x float>, i32 } @vploadff_v8f32_allones_mask(ptr %ptr, i32 zeroext % ; CHECK-NEXT: vle32ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement <8 x i1> poison, i1 true, i32 0 - %b = shufflevector <8 x i1> %a, <8 x i1> poison, <8 x i32> zeroinitializer - %load = call { <8 x float>, i32 } @llvm.vp.load.ff.v8f32.p0(ptr %ptr, <8 x i1> %b, i32 %evl) + %load = call { <8 x float>, i32 } @llvm.vp.load.ff.v8f32.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %evl) ret { <8 x float>, i32 } %load } -declare { <2 x double>, i32 } @llvm.vp.load.ff.v2f64.p0(ptr, <2 x i1>, i32) - define { <2 x double>, i32 } @vploadff_v2f64(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_v2f64: ; CHECK: # %bb.0: @@ -496,14 +422,10 @@ define { <2 x double>, i32 } @vploadff_v2f64_allones_mask(ptr %ptr, i32 zeroext ; CHECK-NEXT: vle64ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement <2 x i1> poison, i1 true, i32 0 - %b = shufflevector <2 x i1> %a, <2 x i1> poison, <2 x i32> zeroinitializer - %load = call { <2 x double>, i32 } @llvm.vp.load.ff.v2f64.p0(ptr %ptr, <2 x i1> %b, i32 %evl) + %load = call { <2 x double>, i32 } @llvm.vp.load.ff.v2f64.p0(ptr %ptr, <2 x i1> splat (i1 true), i32 %evl) ret { <2 x double>, i32 } %load } -declare { <4 x double>, i32 } @llvm.vp.load.ff.v4f64.p0(ptr, <4 x i1>, i32) - define { <4 x double>, i32 } @vploadff_v4f64(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_v4f64: ; CHECK: # %bb.0: @@ -522,14 +444,10 @@ define { <4 x double>, i32 } @vploadff_v4f64_allones_mask(ptr %ptr, i32 zeroext ; CHECK-NEXT: vle64ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement <4 x i1> poison, i1 true, i32 0 - %b = shufflevector <4 x i1> %a, <4 x i1> poison, <4 x i32> zeroinitializer - %load = call { <4 x double>, i32 } @llvm.vp.load.ff.v4f64.p0(ptr %ptr, <4 x i1> %b, i32 %evl) + %load = call { <4 x double>, i32 } @llvm.vp.load.ff.v4f64.p0(ptr %ptr, <4 x i1> splat (i1 true), i32 %evl) ret { <4 x double>, i32 } %load } -declare { <8 x double>, i32 } @llvm.vp.load.ff.v8f64.p0(ptr, <8 x i1>, i32) - define { <8 x double>, i32 } @vploadff_v8f64(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_v8f64: ; CHECK: # %bb.0: @@ -548,14 +466,10 @@ define { <8 x double>, i32 } @vploadff_v8f64_allones_mask(ptr %ptr, i32 zeroext ; CHECK-NEXT: vle64ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement <8 x i1> poison, i1 true, i32 0 - %b = shufflevector <8 x i1> %a, <8 x i1> poison, <8 x i32> zeroinitializer - %load = call { <8 x double>, i32 } @llvm.vp.load.ff.v8f64.p0(ptr %ptr, <8 x i1> %b, i32 %evl) + %load = call { <8 x double>, i32 } @llvm.vp.load.ff.v8f64.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %evl) ret { <8 x double>, i32 } %load } -declare { <2 x bfloat>, i32 } @llvm.vp.load.ff.v2bf16.p0(ptr, <2 x i1>, i32) - define { <2 x bfloat>, i32 } @vploadff_v2bf16(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_v2bf16: ; CHECK: # %bb.0: @@ -574,14 +488,10 @@ define { <2 x bfloat>, i32 } @vploadff_v2bf16_allones_mask(ptr %ptr, i32 zeroext ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement <2 x i1> poison, i1 true, i32 0 - %b = shufflevector <2 x i1> %a, <2 x i1> poison, <2 x i32> zeroinitializer - %load = call { <2 x bfloat>, i32 } @llvm.vp.load.ff.v2bf16.p0(ptr %ptr, <2 x i1> %b, i32 %evl) + %load = call { <2 x bfloat>, i32 } @llvm.vp.load.ff.v2bf16.p0(ptr %ptr, <2 x i1> splat (i1 true), i32 %evl) ret { <2 x bfloat>, i32 } %load } -declare { <4 x bfloat>, i32 } @llvm.vp.load.ff.v4bf16.p0(ptr, <4 x i1>, i32) - define { <4 x bfloat>, i32 } @vploadff_v4bf16(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_v4bf16: ; CHECK: # %bb.0: @@ -600,14 +510,10 @@ define { <4 x bfloat>, i32 } @vploadff_v4bf16_allones_mask(ptr %ptr, i32 zeroext ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement <4 x i1> poison, i1 true, i32 0 - %b = shufflevector <4 x i1> %a, <4 x i1> poison, <4 x i32> zeroinitializer - %load = call { <4 x bfloat>, i32 } @llvm.vp.load.ff.v4bf16.p0(ptr %ptr, <4 x i1> %b, i32 %evl) + %load = call { <4 x bfloat>, i32 } @llvm.vp.load.ff.v4bf16.p0(ptr %ptr, <4 x i1> splat (i1 true), i32 %evl) ret { <4 x bfloat>, i32 } %load } -declare { <8 x bfloat>, i32 } @llvm.vp.load.ff.v8bf16.p0(ptr, <8 x i1>, i32) - define { <8 x bfloat>, i32 } @vploadff_v8bf16(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_v8bf16: ; CHECK: # %bb.0: @@ -626,8 +532,6 @@ define { <8 x bfloat>, i32 } @vploadff_v8bf16_allones_mask(ptr %ptr, i32 zeroext ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement <8 x i1> poison, i1 true, i32 0 - %b = shufflevector <8 x i1> %a, <8 x i1> poison, <8 x i32> zeroinitializer - %load = call { <8 x bfloat>, i32 } @llvm.vp.load.ff.v8bf16.p0(ptr %ptr, <8 x i1> %b, i32 %evl) + %load = call { <8 x bfloat>, i32 } @llvm.vp.load.ff.v8bf16.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %evl) ret { <8 x bfloat>, i32 } %load } diff --git a/llvm/test/CodeGen/RISCV/rvv/vploadff.ll b/llvm/test/CodeGen/RISCV/rvv/vploadff.ll index 11812eec6ac46..ae439fd0ce3eb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vploadff.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vploadff.ll @@ -8,8 +8,6 @@ ; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zvfbfmin,+v \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -declare { , i32 } @llvm.vp.load.ff.nxv1i8.p0(ptr, , i32) - define { , i32 } @vploadff_nxv1i8(ptr %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_nxv1i8: ; CHECK: # %bb.0: @@ -28,14 +26,10 @@ define { , i32 } @vploadff_nxv1i8_allones_mask(ptr %ptr, i32 ze ; CHECK-NEXT: vle8ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call { , i32 } @llvm.vp.load.ff.nxv1i8.p0(ptr %ptr, %b, i32 %evl) + %load = call { , i32 } @llvm.vp.load.ff.nxv1i8.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } -declare { , i32 } @llvm.vp.load.ff.nxv2i8.p0(ptr, , i32) - define { , i32 } @vploadff_nxv2i8(ptr %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_nxv2i8: ; CHECK: # %bb.0: @@ -54,14 +48,10 @@ define { , i32 } @vploadff_nxv2i8_allones_mask(ptr %ptr, i32 ze ; CHECK-NEXT: vle8ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call { , i32 } @llvm.vp.load.ff.nxv2i8.p0(ptr %ptr, %b, i32 %evl) + %load = call { , i32 } @llvm.vp.load.ff.nxv2i8.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } -declare { , i32 } @llvm.vp.load.ff.nxv4i8.p0(ptr, , i32) - define { , i32 } @vploadff_nxv4i8(ptr %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_nxv4i8: ; CHECK: # %bb.0: @@ -80,14 +70,10 @@ define { , i32 } @vploadff_nxv4i8_allones_mask(ptr %ptr, i32 ze ; CHECK-NEXT: vle8ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call { , i32 } @llvm.vp.load.ff.nxv4i8.p0(ptr %ptr, %b, i32 %evl) + %load = call { , i32 } @llvm.vp.load.ff.nxv4i8.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } -declare { , i32 } @llvm.vp.load.ff.nxv8i8.p0(ptr, , i32) - define { , i32 } @vploadff_nxv8i8(ptr %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_nxv8i8: ; CHECK: # %bb.0: @@ -106,14 +92,10 @@ define { , i32 } @vploadff_nxv8i8_allones_mask(ptr %ptr, i32 ze ; CHECK-NEXT: vle8ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call { , i32 } @llvm.vp.load.ff.nxv8i8.p0(ptr %ptr, %b, i32 %evl) + %load = call { , i32 } @llvm.vp.load.ff.nxv8i8.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } -declare { , i32 } @llvm.vp.load.ff.nxv16i8.p0(ptr, , i32) - define { , i32 } @vploadff_nxv16i8(ptr %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_nxv16i8: ; CHECK: # %bb.0: @@ -132,14 +114,10 @@ define { , i32 } @vploadff_nxv16i8_allones_mask(ptr %ptr, i32 ; CHECK-NEXT: vle8ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call { , i32 } @llvm.vp.load.ff.nxv16i8.p0(ptr %ptr, %b, i32 %evl) + %load = call { , i32 } @llvm.vp.load.ff.nxv16i8.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } -declare { , i32 } @llvm.vp.load.ff.nxv32i8.p0(ptr, , i32) - define { , i32 } @vploadff_nxv32i8(ptr %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_nxv32i8: ; CHECK: # %bb.0: @@ -158,14 +136,10 @@ define { , i32 } @vploadff_nxv32i8_allones_mask(ptr %ptr, i32 ; CHECK-NEXT: vle8ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call { , i32 } @llvm.vp.load.ff.nxv32i8.p0(ptr %ptr, %b, i32 %evl) + %load = call { , i32 } @llvm.vp.load.ff.nxv32i8.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } -declare { , i32 } @llvm.vp.load.ff.nxv64i8.p0(ptr, , i32) - define { , i32 } @vploadff_nxv64i8(ptr %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_nxv64i8: ; CHECK: # %bb.0: @@ -184,14 +158,10 @@ define { , i32 } @vploadff_nxv64i8_allones_mask(ptr %ptr, i32 ; CHECK-NEXT: vle8ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call { , i32 } @llvm.vp.load.ff.nxv64i8.p0(ptr %ptr, %b, i32 %evl) + %load = call { , i32 } @llvm.vp.load.ff.nxv64i8.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } -declare { , i32 } @llvm.vp.load.ff.nxv1i16.p0(ptr, , i32) - define { , i32 } @vploadff_nxv1i16(ptr %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_nxv1i16: ; CHECK: # %bb.0: @@ -210,14 +180,10 @@ define { , i32 } @vploadff_nxv1i16_allones_mask(ptr %ptr, i32 ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call { , i32 } @llvm.vp.load.ff.nxv1i16.p0(ptr %ptr, %b, i32 %evl) + %load = call { , i32 } @llvm.vp.load.ff.nxv1i16.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } -declare { , i32 } @llvm.vp.load.ff.nxv2i16.p0(ptr, , i32) - define { , i32 } @vploadff_nxv2i16(ptr %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_nxv2i16: ; CHECK: # %bb.0: @@ -236,14 +202,10 @@ define { , i32 } @vploadff_nxv2i16_allones_mask(ptr %ptr, i32 ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call { , i32 } @llvm.vp.load.ff.nxv2i16.p0(ptr %ptr, %b, i32 %evl) + %load = call { , i32 } @llvm.vp.load.ff.nxv2i16.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } -declare { , i32 } @llvm.vp.load.ff.nxv4i16.p0(ptr, , i32) - define { , i32 } @vploadff_nxv4i16(ptr %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_nxv4i16: ; CHECK: # %bb.0: @@ -262,14 +224,10 @@ define { , i32 } @vploadff_nxv4i16_allones_mask(ptr %ptr, i32 ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call { , i32 } @llvm.vp.load.ff.nxv4i16.p0(ptr %ptr, %b, i32 %evl) + %load = call { , i32 } @llvm.vp.load.ff.nxv4i16.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } -declare { , i32 } @llvm.vp.load.ff.nxv8i16.p0(ptr, , i32) - define { , i32 } @vploadff_nxv8i16(ptr %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_nxv8i16: ; CHECK: # %bb.0: @@ -288,14 +246,10 @@ define { , i32 } @vploadff_nxv8i16_allones_mask(ptr %ptr, i32 ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call { , i32 } @llvm.vp.load.ff.nxv8i16.p0(ptr %ptr, %b, i32 %evl) + %load = call { , i32 } @llvm.vp.load.ff.nxv8i16.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } -declare { , i32 } @llvm.vp.load.ff.nxv16i16.p0(ptr, , i32) - define { , i32 } @vploadff_nxv16i16(ptr %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_nxv16i16: ; CHECK: # %bb.0: @@ -314,14 +268,10 @@ define { , i32 } @vploadff_nxv16i16_allones_mask(ptr %ptr, i3 ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call { , i32 } @llvm.vp.load.ff.nxv16i16.p0(ptr %ptr, %b, i32 %evl) + %load = call { , i32 } @llvm.vp.load.ff.nxv16i16.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } -declare { , i32 } @llvm.vp.load.ff.nxv32i16.p0(ptr, , i32) - define { , i32 } @vploadff_nxv32i16(ptr %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_nxv32i16: ; CHECK: # %bb.0: @@ -340,14 +290,10 @@ define { , i32 } @vploadff_nxv32i16_allones_mask(ptr %ptr, i3 ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call { , i32 } @llvm.vp.load.ff.nxv32i16.p0(ptr %ptr, %b, i32 %evl) + %load = call { , i32 } @llvm.vp.load.ff.nxv32i16.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } -declare { , i32 } @llvm.vp.load.ff.nxv1i32.p0(ptr, , i32) - define { , i32 } @vploadff_nxv1i32(ptr %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_nxv1i32: ; CHECK: # %bb.0: @@ -366,14 +312,10 @@ define { , i32 } @vploadff_nxv1i32_allones_mask(ptr %ptr, i32 ; CHECK-NEXT: vle32ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call { , i32 } @llvm.vp.load.ff.nxv1i32.p0(ptr %ptr, %b, i32 %evl) + %load = call { , i32 } @llvm.vp.load.ff.nxv1i32.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } -declare { , i32 } @llvm.vp.load.ff.nxv2i32.p0(ptr, , i32) - define { , i32 } @vploadff_nxv2i32(ptr %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_nxv2i32: ; CHECK: # %bb.0: @@ -392,14 +334,10 @@ define { , i32 } @vploadff_nxv2i32_allones_mask(ptr %ptr, i32 ; CHECK-NEXT: vle32ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call { , i32 } @llvm.vp.load.ff.nxv2i32.p0(ptr %ptr, %b, i32 %evl) + %load = call { , i32 } @llvm.vp.load.ff.nxv2i32.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } -declare { , i32 } @llvm.vp.load.ff.nxv4i32.p0(ptr, , i32) - define { , i32 } @vploadff_nxv4i32(ptr %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_nxv4i32: ; CHECK: # %bb.0: @@ -418,14 +356,10 @@ define { , i32 } @vploadff_nxv4i32_allones_mask(ptr %ptr, i32 ; CHECK-NEXT: vle32ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call { , i32 } @llvm.vp.load.ff.nxv4i32.p0(ptr %ptr, %b, i32 %evl) + %load = call { , i32 } @llvm.vp.load.ff.nxv4i32.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } -declare { , i32 } @llvm.vp.load.ff.nxv8i32.p0(ptr, , i32) - define { , i32 } @vploadff_nxv8i32(ptr %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_nxv8i32: ; CHECK: # %bb.0: @@ -444,14 +378,10 @@ define { , i32 } @vploadff_nxv8i32_allones_mask(ptr %ptr, i32 ; CHECK-NEXT: vle32ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call { , i32 } @llvm.vp.load.ff.nxv8i32.p0(ptr %ptr, %b, i32 %evl) + %load = call { , i32 } @llvm.vp.load.ff.nxv8i32.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } -declare { , i32 } @llvm.vp.load.ff.nxv16i32.p0(ptr, , i32) - define { , i32 } @vploadff_nxv16i32(ptr %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_nxv16i32: ; CHECK: # %bb.0: @@ -470,14 +400,10 @@ define { , i32 } @vploadff_nxv16i32_allones_mask(ptr %ptr, i3 ; CHECK-NEXT: vle32ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call { , i32 } @llvm.vp.load.ff.nxv16i32.p0(ptr %ptr, %b, i32 %evl) + %load = call { , i32 } @llvm.vp.load.ff.nxv16i32.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } -declare { , i32 } @llvm.vp.load.ff.nxv1i64.p0(ptr, , i32) - define { , i32 } @vploadff_nxv1i64(ptr %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_nxv1i64: ; CHECK: # %bb.0: @@ -496,14 +422,10 @@ define { , i32 } @vploadff_nxv1i64_allones_mask(ptr %ptr, i32 ; CHECK-NEXT: vle64ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call { , i32 } @llvm.vp.load.ff.nxv1i64.p0(ptr %ptr, %b, i32 %evl) + %load = call { , i32 } @llvm.vp.load.ff.nxv1i64.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } -declare { , i32 } @llvm.vp.load.ff.nxv2i64.p0(ptr, , i32) - define { , i32 } @vploadff_nxv2i64(ptr %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_nxv2i64: ; CHECK: # %bb.0: @@ -522,14 +444,10 @@ define { , i32 } @vploadff_nxv2i64_allones_mask(ptr %ptr, i32 ; CHECK-NEXT: vle64ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call { , i32 } @llvm.vp.load.ff.nxv2i64.p0(ptr %ptr, %b, i32 %evl) + %load = call { , i32 } @llvm.vp.load.ff.nxv2i64.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } -declare { , i32 } @llvm.vp.load.ff.nxv4i64.p0(ptr, , i32) - define { , i32 } @vploadff_nxv4i64(ptr %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_nxv4i64: ; CHECK: # %bb.0: @@ -548,14 +466,10 @@ define { , i32 } @vploadff_nxv4i64_allones_mask(ptr %ptr, i32 ; CHECK-NEXT: vle64ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call { , i32 } @llvm.vp.load.ff.nxv4i64.p0(ptr %ptr, %b, i32 %evl) + %load = call { , i32 } @llvm.vp.load.ff.nxv4i64.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } -declare { , i32 } @llvm.vp.load.ff.nxv8i64.p0(ptr, , i32) - define { , i32 } @vploadff_nxv8i64(ptr %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_nxv8i64: ; CHECK: # %bb.0: @@ -574,14 +488,10 @@ define { , i32 } @vploadff_nxv8i64_allones_mask(ptr %ptr, i32 ; CHECK-NEXT: vle64ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call { , i32 } @llvm.vp.load.ff.nxv8i64.p0(ptr %ptr, %b, i32 %evl) + %load = call { , i32 } @llvm.vp.load.ff.nxv8i64.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } -declare { , i32 } @llvm.vp.load.ff.nxv1f16.p0(ptr, , i32) - define { , i32 } @vploadff_nxv1f16(ptr %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_nxv1f16: ; CHECK: # %bb.0: @@ -600,14 +510,10 @@ define { , i32 } @vploadff_nxv1f16_allones_mask(ptr %ptr, i32 ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call { , i32 } @llvm.vp.load.ff.nxv1f16.p0(ptr %ptr, %b, i32 %evl) + %load = call { , i32 } @llvm.vp.load.ff.nxv1f16.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } -declare { , i32 } @llvm.vp.load.ff.nxv2f16.p0(ptr, , i32) - define { , i32 } @vploadff_nxv2f16(ptr %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_nxv2f16: ; CHECK: # %bb.0: @@ -626,14 +532,10 @@ define { , i32 } @vploadff_nxv2f16_allones_mask(ptr %ptr, i32 ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call { , i32 } @llvm.vp.load.ff.nxv2f16.p0(ptr %ptr, %b, i32 %evl) + %load = call { , i32 } @llvm.vp.load.ff.nxv2f16.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } -declare { , i32 } @llvm.vp.load.ff.nxv4f16.p0(ptr, , i32) - define { , i32 } @vploadff_nxv4f16(ptr %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_nxv4f16: ; CHECK: # %bb.0: @@ -652,14 +554,10 @@ define { , i32 } @vploadff_nxv4f16_allones_mask(ptr %ptr, i32 ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call { , i32 } @llvm.vp.load.ff.nxv4f16.p0(ptr %ptr, %b, i32 %evl) + %load = call { , i32 } @llvm.vp.load.ff.nxv4f16.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } -declare { , i32 } @llvm.vp.load.ff.nxv8f16.p0(ptr, , i32) - define { , i32 } @vploadff_nxv8f16(ptr %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_nxv8f16: ; CHECK: # %bb.0: @@ -678,14 +576,10 @@ define { , i32 } @vploadff_nxv8f16_allones_mask(ptr %ptr, i32 ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call { , i32 } @llvm.vp.load.ff.nxv8f16.p0(ptr %ptr, %b, i32 %evl) + %load = call { , i32 } @llvm.vp.load.ff.nxv8f16.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } -declare { , i32 } @llvm.vp.load.ff.nxv16f16.p0(ptr, , i32) - define { , i32 } @vploadff_nxv16f16(ptr %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_nxv16f16: ; CHECK: # %bb.0: @@ -704,14 +598,10 @@ define { , i32 } @vploadff_nxv16f16_allones_mask(ptr %ptr, i ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call { , i32 } @llvm.vp.load.ff.nxv16f16.p0(ptr %ptr, %b, i32 %evl) + %load = call { , i32 } @llvm.vp.load.ff.nxv16f16.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } -declare { , i32 } @llvm.vp.load.ff.nxv32f16.p0(ptr, , i32) - define { , i32 } @vploadff_nxv32f16(ptr %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_nxv32f16: ; CHECK: # %bb.0: @@ -730,14 +620,10 @@ define { , i32 } @vploadff_nxv32f16_allones_mask(ptr %ptr, i ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call { , i32 } @llvm.vp.load.ff.nxv32f16.p0(ptr %ptr, %b, i32 %evl) + %load = call { , i32 } @llvm.vp.load.ff.nxv32f16.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } -declare { , i32 } @llvm.vp.load.ff.nxv1f32.p0(ptr, , i32) - define { , i32 } @vploadff_nxv1f32(ptr %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_nxv1f32: ; CHECK: # %bb.0: @@ -756,14 +642,10 @@ define { , i32 } @vploadff_nxv1f32_allones_mask(ptr %ptr, i3 ; CHECK-NEXT: vle32ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call { , i32 } @llvm.vp.load.ff.nxv1f32.p0(ptr %ptr, %b, i32 %evl) + %load = call { , i32 } @llvm.vp.load.ff.nxv1f32.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } -declare { , i32 } @llvm.vp.load.ff.nxv2f32.p0(ptr, , i32) - define { , i32 } @vploadff_nxv2f32(ptr %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_nxv2f32: ; CHECK: # %bb.0: @@ -782,14 +664,10 @@ define { , i32 } @vploadff_nxv2f32_allones_mask(ptr %ptr, i3 ; CHECK-NEXT: vle32ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call { , i32 } @llvm.vp.load.ff.nxv2f32.p0(ptr %ptr, %b, i32 %evl) + %load = call { , i32 } @llvm.vp.load.ff.nxv2f32.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } -declare { , i32 } @llvm.vp.load.ff.nxv4f32.p0(ptr, , i32) - define { , i32 } @vploadff_nxv4f32(ptr %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_nxv4f32: ; CHECK: # %bb.0: @@ -808,14 +686,10 @@ define { , i32 } @vploadff_nxv4f32_allones_mask(ptr %ptr, i3 ; CHECK-NEXT: vle32ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call { , i32 } @llvm.vp.load.ff.nxv4f32.p0(ptr %ptr, %b, i32 %evl) + %load = call { , i32 } @llvm.vp.load.ff.nxv4f32.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } -declare { , i32 } @llvm.vp.load.ff.nxv8f32.p0(ptr, , i32) - define { , i32 } @vploadff_nxv8f32(ptr %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_nxv8f32: ; CHECK: # %bb.0: @@ -834,14 +708,10 @@ define { , i32 } @vploadff_nxv8f32_allones_mask(ptr %ptr, i3 ; CHECK-NEXT: vle32ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call { , i32 } @llvm.vp.load.ff.nxv8f32.p0(ptr %ptr, %b, i32 %evl) + %load = call { , i32 } @llvm.vp.load.ff.nxv8f32.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } -declare { , i32 } @llvm.vp.load.ff.nxv16f32.p0(ptr, , i32) - define { , i32 } @vploadff_nxv16f32(ptr %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_nxv16f32: ; CHECK: # %bb.0: @@ -860,14 +730,10 @@ define { , i32 } @vploadff_nxv16f32_allones_mask(ptr %ptr, ; CHECK-NEXT: vle32ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call { , i32 } @llvm.vp.load.ff.nxv16f32.p0(ptr %ptr, %b, i32 %evl) + %load = call { , i32 } @llvm.vp.load.ff.nxv16f32.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } -declare { , i32 } @llvm.vp.load.ff.nxv1f64.p0(ptr, , i32) - define { , i32 } @vploadff_nxv1f64(ptr %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_nxv1f64: ; CHECK: # %bb.0: @@ -886,14 +752,10 @@ define { , i32 } @vploadff_nxv1f64_allones_mask(ptr %ptr, i ; CHECK-NEXT: vle64ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call { , i32 } @llvm.vp.load.ff.nxv1f64.p0(ptr %ptr, %b, i32 %evl) + %load = call { , i32 } @llvm.vp.load.ff.nxv1f64.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } -declare { , i32 } @llvm.vp.load.ff.nxv2f64.p0(ptr, , i32) - define { , i32 } @vploadff_nxv2f64(ptr %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_nxv2f64: ; CHECK: # %bb.0: @@ -912,14 +774,10 @@ define { , i32 } @vploadff_nxv2f64_allones_mask(ptr %ptr, i ; CHECK-NEXT: vle64ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call { , i32 } @llvm.vp.load.ff.nxv2f64.p0(ptr %ptr, %b, i32 %evl) + %load = call { , i32 } @llvm.vp.load.ff.nxv2f64.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } -declare { , i32 } @llvm.vp.load.ff.nxv4f64.p0(ptr, , i32) - define { , i32 } @vploadff_nxv4f64(ptr %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_nxv4f64: ; CHECK: # %bb.0: @@ -938,14 +796,10 @@ define { , i32 } @vploadff_nxv4f64_allones_mask(ptr %ptr, i ; CHECK-NEXT: vle64ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call { , i32 } @llvm.vp.load.ff.nxv4f64.p0(ptr %ptr, %b, i32 %evl) + %load = call { , i32 } @llvm.vp.load.ff.nxv4f64.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } -declare { , i32 } @llvm.vp.load.ff.nxv8f64.p0(ptr, , i32) - define { , i32 } @vploadff_nxv8f64(ptr %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_nxv8f64: ; CHECK: # %bb.0: @@ -964,14 +818,10 @@ define { , i32 } @vploadff_nxv8f64_allones_mask(ptr %ptr, i ; CHECK-NEXT: vle64ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call { , i32 } @llvm.vp.load.ff.nxv8f64.p0(ptr %ptr, %b, i32 %evl) + %load = call { , i32 } @llvm.vp.load.ff.nxv8f64.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } -declare { , i32 } @llvm.vp.load.ff.nxv1bf16.p0(ptr, , i32) - define { , i32 } @vploadff_nxv1bf16(ptr %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_nxv1bf16: ; CHECK: # %bb.0: @@ -990,14 +840,10 @@ define { , i32 } @vploadff_nxv1bf16_allones_mask(ptr %ptr, ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call { , i32 } @llvm.vp.load.ff.nxv1bf16.p0(ptr %ptr, %b, i32 %evl) + %load = call { , i32 } @llvm.vp.load.ff.nxv1bf16.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } -declare { , i32 } @llvm.vp.load.ff.nxv2bf16.p0(ptr, , i32) - define { , i32 } @vploadff_nxv2bf16(ptr %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_nxv2bf16: ; CHECK: # %bb.0: @@ -1016,14 +862,10 @@ define { , i32 } @vploadff_nxv2bf16_allones_mask(ptr %ptr, ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call { , i32 } @llvm.vp.load.ff.nxv2bf16.p0(ptr %ptr, %b, i32 %evl) + %load = call { , i32 } @llvm.vp.load.ff.nxv2bf16.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } -declare { , i32 } @llvm.vp.load.ff.nxv4bf16.p0(ptr, , i32) - define { , i32 } @vploadff_nxv4bf16(ptr %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_nxv4bf16: ; CHECK: # %bb.0: @@ -1042,14 +884,10 @@ define { , i32 } @vploadff_nxv4bf16_allones_mask(ptr %ptr, ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call { , i32 } @llvm.vp.load.ff.nxv4bf16.p0(ptr %ptr, %b, i32 %evl) + %load = call { , i32 } @llvm.vp.load.ff.nxv4bf16.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } -declare { , i32 } @llvm.vp.load.ff.nxv8bf16.p0(ptr, , i32) - define { , i32 } @vploadff_nxv8bf16(ptr %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_nxv8bf16: ; CHECK: # %bb.0: @@ -1068,14 +906,10 @@ define { , i32 } @vploadff_nxv8bf16_allones_mask(ptr %ptr, ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call { , i32 } @llvm.vp.load.ff.nxv8bf16.p0(ptr %ptr, %b, i32 %evl) + %load = call { , i32 } @llvm.vp.load.ff.nxv8bf16.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } -declare { , i32 } @llvm.vp.load.ff.nxv16bf16.p0(ptr, , i32) - define { , i32 } @vploadff_nxv16bf16(ptr %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_nxv16bf16: ; CHECK: # %bb.0: @@ -1094,14 +928,10 @@ define { , i32 } @vploadff_nxv16bf16_allones_mask(ptr %ptr ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call { , i32 } @llvm.vp.load.ff.nxv16bf16.p0(ptr %ptr, %b, i32 %evl) + %load = call { , i32 } @llvm.vp.load.ff.nxv16bf16.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } -declare { , i32 } @llvm.vp.load.ff.nxv32bf16.p0(ptr, , i32) - define { , i32 } @vploadff_nxv32bf16(ptr %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_nxv32bf16: ; CHECK: # %bb.0: @@ -1120,8 +950,6 @@ define { , i32 } @vploadff_nxv32bf16_allones_mask(ptr %ptr ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %a = insertelement poison, i1 true, i32 0 - %b = shufflevector %a, poison, zeroinitializer - %load = call { , i32 } @llvm.vp.load.ff.nxv32bf16.p0(ptr %ptr, %b, i32 %evl) + %load = call { , i32 } @llvm.vp.load.ff.nxv32bf16.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } From 93d6ab797ca53b5b85487200e2d448749dd971b7 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 3 Mar 2025 19:03:34 -0800 Subject: [PATCH 04/14] fixup! Add SplitVectorRes support --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 50 +++++++++++++++++++ .../RISCV/rvv/fixed-vectors-vploadff.ll | 38 ++++++++++++++ llvm/test/CodeGen/RISCV/rvv/vploadff.ll | 42 ++++++++++++++++ 4 files changed, 131 insertions(+) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 74d7210743372..f3aa5340defac 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -958,6 +958,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi); void SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, SDValue &Hi); + void SplitVecRes_VP_LOAD_FF(VPLoadFFSDNode *LD, SDValue &Lo, SDValue &Hi); void SplitVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *SLD, SDValue &Lo, SDValue &Hi); void SplitVecRes_MLOAD(MaskedLoadSDNode *MLD, SDValue &Lo, SDValue &Hi); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 9d42ec2fdf859..1d94a931c1743 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -1163,6 +1163,9 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::VP_LOAD: SplitVecRes_VP_LOAD(cast(N), Lo, Hi); break; + case ISD::VP_LOAD_FF: + SplitVecRes_VP_LOAD_FF(cast(N), Lo, Hi); + break; case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: SplitVecRes_VP_STRIDED_LOAD(cast(N), Lo, Hi); break; @@ -2232,6 +2235,53 @@ void DAGTypeLegalizer::SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, ReplaceValueWith(SDValue(LD, 1), Ch); } +void DAGTypeLegalizer::SplitVecRes_VP_LOAD_FF(VPLoadFFSDNode *LD, SDValue &Lo, + SDValue &Hi) { + EVT LoVT, HiVT; + SDLoc dl(LD); + std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0)); + + SDValue Ch = LD->getChain(); + SDValue Ptr = LD->getBasePtr(); + Align Alignment = LD->getOriginalAlign(); + SDValue Mask = LD->getMask(); + SDValue EVL = LD->getVectorLength(); + EVT MemoryVT = LD->getMemoryVT(); + + EVT LoMemVT, HiMemVT; + bool HiIsEmpty = false; + std::tie(LoMemVT, HiMemVT) = + DAG.GetDependentSplitDestVTs(MemoryVT, LoVT, &HiIsEmpty); + + // Split Mask operand + SDValue MaskLo, MaskHi; + if (Mask.getOpcode() == ISD::SETCC) { + SplitVecRes_SETCC(Mask.getNode(), MaskLo, MaskHi); + } else { + if (getTypeAction(Mask.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Mask, MaskLo, MaskHi); + else + std::tie(MaskLo, MaskHi) = DAG.SplitVector(Mask, dl); + } + + // Split EVL operand + SDValue EVLLo, EVLHi; + std::tie(EVLLo, EVLHi) = DAG.SplitEVL(EVL, LD->getValueType(0), dl); + + MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( + LD->getPointerInfo(), MachineMemOperand::MOLoad, + LocationSize::beforeOrAfterPointer(), Alignment, LD->getAAInfo(), + LD->getRanges()); + + Lo = DAG.getLoadFFVP(LoVT, dl, Ch, Ptr, MaskLo, EVLLo, MMO); + + // Fill the upper half with poison. + Hi = DAG.getUNDEF(HiVT); + + ReplaceValueWith(SDValue(LD, 1), Lo.getValue(1)); + ReplaceValueWith(SDValue(LD, 2), Lo.getValue(2)); +} + void DAGTypeLegalizer::SplitVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *SLD, SDValue &Lo, SDValue &Hi) { assert(SLD->isUnindexed() && diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vploadff.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vploadff.ll index 474f859e9789d..84b531c0d85b8 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vploadff.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vploadff.ll @@ -272,6 +272,44 @@ define { <8 x i64>, i32 } @vploadff_v8i64_allones_mask(ptr %ptr, i32 zeroext %ev ret { <8 x i64>, i32 } %load } +define { <32 x i64>, i32 } @vploadff_v32i64(ptr %ptr, <32 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v32i64: +; CHECK: # %bb.0: +; CHECK-NEXT: li a3, 16 +; CHECK-NEXT: bltu a2, a3, .LBB24_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: .LBB24_2: +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a1), v0.t +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: sw a1, 256(a0) +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vse64.v v8, (a0) +; CHECK-NEXT: ret + %load = call { <32 x i64>, i32 } @llvm.vp.load.ff.v32i64.p0(ptr %ptr, <32 x i1> %m, i32 %evl) + ret { <32 x i64>, i32 } %load +} + +define { <32 x i64>, i32 } @vploadff_v32i64_allones_mask(ptr %ptr, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v32i64_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: li a3, 16 +; CHECK-NEXT: bltu a2, a3, .LBB25_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: li a2, 16 +; CHECK-NEXT: .LBB25_2: +; CHECK-NEXT: vsetvli zero, a2, e64, m8, ta, ma +; CHECK-NEXT: vle64ff.v v8, (a1) +; CHECK-NEXT: csrr a1, vl +; CHECK-NEXT: sw a1, 256(a0) +; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma +; CHECK-NEXT: vse64.v v8, (a0) +; CHECK-NEXT: ret + %load = call { <32 x i64>, i32 } @llvm.vp.load.ff.v32i64.p0(ptr %ptr, <32 x i1> splat (i1 true), i32 %evl) + ret { <32 x i64>, i32 } %load +} + define { <2 x half>, i32 } @vploadff_v2f16(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_v2f16: ; CHECK: # %bb.0: diff --git a/llvm/test/CodeGen/RISCV/rvv/vploadff.ll b/llvm/test/CodeGen/RISCV/rvv/vploadff.ll index ae439fd0ce3eb..bbf0c27118793 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vploadff.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vploadff.ll @@ -162,6 +162,48 @@ define { , i32 } @vploadff_nxv64i8_allones_mask(ptr %ptr, i32 ret { , i32 } %load } +define @vploadff_nxv128i8(ptr %ptr, ptr %evl_out, %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv128i8: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 3 +; CHECK-NEXT: bltu a2, a3, .LBB14_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a2, a3 +; CHECK-NEXT: .LBB14_2: +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma +; CHECK-NEXT: vle8ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: sw a0, 0(a1) +; CHECK-NEXT: ret + %load = call { , i32 } @llvm.vp.load.ff.nxv128i8.p0(ptr %ptr, %m, i32 %evl) + %result0 = extractvalue { , i32 } %load, 0 + %result1 = extractvalue { , i32 } %load, 1 + store i32 %result1, ptr %evl_out + ret %result0 +} + +define @vploadff_nxv128i8_allones_mask(ptr %ptr, ptr %evl_out, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv128i8_allones_mask: +; CHECK: # %bb.0: +; CHECK-NEXT: csrr a3, vlenb +; CHECK-NEXT: slli a3, a3, 3 +; CHECK-NEXT: bltu a2, a3, .LBB15_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: mv a2, a3 +; CHECK-NEXT: .LBB15_2: +; CHECK-NEXT: vsetvli zero, a2, e8, m8, ta, ma +; CHECK-NEXT: vle8ff.v v8, (a0) +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: sw a0, 0(a1) +; CHECK-NEXT: ret + %load = call { , i32 } @llvm.vp.load.ff.nxv128i8.p0(ptr %ptr, splat (i1 true), i32 %evl) + %result0 = extractvalue { , i32 } %load, 0 + %result1 = extractvalue { , i32 } %load, 1 + store i32 %result1, ptr %evl_out + ret %result0 +} + define { , i32 } @vploadff_nxv1i16(ptr %ptr, %m, i32 zeroext %evl) { ; CHECK-LABEL: vploadff_nxv1i16: ; CHECK: # %bb.0: From 5f0f3cccb0286c21c3eb8dcf1830049a9e5e5add Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 4 Mar 2025 11:34:10 -0800 Subject: [PATCH 05/14] fixup! Use LocationSize::beforeOrAfterPointer(). --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index f2e234f98a943..5cf13f16fec5d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -8493,7 +8493,7 @@ void SelectionDAGBuilder::visitVPLoadFF( SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad, - MemoryLocation::UnknownSize, *Alignment, AAInfo, Ranges); + LocationSize::beforeOrAfterPointer(), *Alignment, AAInfo, Ranges); LD = DAG.getLoadFFVP(VT, DL, InChain, OpValues[0], OpValues[1], OpValues[2], MMO); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, EVLVT, LD.getValue(1)); From 6720314499cb8aed0e6043bf01f187af3a932d2c Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 4 Mar 2025 12:10:01 -0800 Subject: [PATCH 06/14] fixup! Add to AddNodeIDCustom. --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index d9bca40567165..f90ffb99999a3 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -846,6 +846,14 @@ static void AddNodeIDCustom(FoldingSetNodeID &ID, const SDNode *N) { ID.AddInteger(ELD->getMemOperand()->getFlags()); break; } + case ISD::VP_LOAD_FF: { + const VPLoadFFSDNode *LD = cast(N); + ID.AddInteger(LD->getMemoryVT().getRawBits()); + ID.AddInteger(LD->getRawSubclassData()); + ID.AddInteger(LD->getPointerInfo().getAddrSpace()); + ID.AddInteger(LD->getMemOperand()->getFlags()); + break; + } case ISD::VP_STORE: { const VPStoreSDNode *EST = cast(N); ID.AddInteger(EST->getMemoryVT().getRawBits()); From 3a8289fbb3aaf73afccd135e98b6f6d606da65ed Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 5 Mar 2025 22:15:52 -0800 Subject: [PATCH 07/14] fixup! Add WidenVecRes_VP_LOAD_FF --- llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 1 + .../SelectionDAG/LegalizeVectorTypes.cpp | 26 +++++++++++++++++++ .../RISCV/rvv/fixed-vectors-vploadff.ll | 11 ++++++++ llvm/test/CodeGen/RISCV/rvv/vploadff.ll | 11 ++++++++ 4 files changed, 49 insertions(+) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index f3aa5340defac..fde49c2aebea9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -1061,6 +1061,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N); SDValue WidenVecRes_LOAD(SDNode* N); SDValue WidenVecRes_VP_LOAD(VPLoadSDNode *N); + SDValue WidenVecRes_VP_LOAD_FF(VPLoadFFSDNode *N); SDValue WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N); SDValue WidenVecRes_VECTOR_COMPRESS(SDNode *N); SDValue WidenVecRes_MLOAD(MaskedLoadSDNode* N); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 1d94a931c1743..c8db247551c7e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -4649,6 +4649,9 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::VP_LOAD: Res = WidenVecRes_VP_LOAD(cast(N)); break; + case ISD::VP_LOAD_FF: + Res = WidenVecRes_VP_LOAD_FF(cast(N)); + break; case ISD::EXPERIMENTAL_VP_STRIDED_LOAD: Res = WidenVecRes_VP_STRIDED_LOAD(cast(N)); break; @@ -6113,6 +6116,29 @@ SDValue DAGTypeLegalizer::WidenVecRes_VP_LOAD(VPLoadSDNode *N) { return Res; } +SDValue DAGTypeLegalizer::WidenVecRes_VP_LOAD_FF(VPLoadFFSDNode *N) { + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Mask = N->getMask(); + SDValue EVL = N->getVectorLength(); + SDLoc dl(N); + + // The mask should be widened as well + assert(getTypeAction(Mask.getValueType()) == + TargetLowering::TypeWidenVector && + "Unable to widen binary VP op"); + Mask = GetWidenedVector(Mask); + assert(Mask.getValueType().getVectorElementCount() == + TLI.getTypeToTransformTo(*DAG.getContext(), Mask.getValueType()) + .getVectorElementCount() && + "Unable to widen vector load"); + + SDValue Res = DAG.getLoadFFVP(WidenVT, dl, N->getChain(), N->getBasePtr(), + Mask, EVL, N->getMemOperand()); + ReplaceValueWith(SDValue(N, 1), Res.getValue(1)); + ReplaceValueWith(SDValue(N, 2), Res.getValue(2)); + return Res; +} + SDValue DAGTypeLegalizer::WidenVecRes_VP_STRIDED_LOAD(VPStridedLoadSDNode *N) { SDLoc DL(N); diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vploadff.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vploadff.ll index 84b531c0d85b8..5b01976dbbebd 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vploadff.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vploadff.ll @@ -573,3 +573,14 @@ define { <8 x bfloat>, i32 } @vploadff_v8bf16_allones_mask(ptr %ptr, i32 zeroext %load = call { <8 x bfloat>, i32 } @llvm.vp.load.ff.v8bf16.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %evl) ret { <8 x bfloat>, i32 } %load } + +define { <7 x i8>, i32 } @vploadff_v7i8(ptr %ptr, <7 x i1> %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_v7i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vle8ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { <7 x i8>, i32 } @llvm.vp.load.ff.v7i8.p0(ptr %ptr, <7 x i1> %m, i32 %evl) + ret { <7 x i8>, i32 } %load +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vploadff.ll b/llvm/test/CodeGen/RISCV/rvv/vploadff.ll index bbf0c27118793..9e08938a9fe6c 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vploadff.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vploadff.ll @@ -995,3 +995,14 @@ define { , i32 } @vploadff_nxv32bf16_allones_mask(ptr %ptr %load = call { , i32 } @llvm.vp.load.ff.nxv32bf16.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } + +define { , i32 } @vploadff_nxv3i8(ptr %ptr, %m, i32 zeroext %evl) { +; CHECK-LABEL: vploadff_nxv3i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma +; CHECK-NEXT: vle8ff.v v8, (a0), v0.t +; CHECK-NEXT: csrr a0, vl +; CHECK-NEXT: ret + %load = call { , i32 } @llvm.vp.load.ff.nxv3i8.p0(ptr %ptr, %m, i32 %evl) + ret { , i32 } %load +} From 081ab4655595d161851ea1192ab1f2f0e7609eb5 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 6 Mar 2025 15:46:47 -0800 Subject: [PATCH 08/14] fixup! Address review comments --- llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index c8db247551c7e..9e3c8200dbcd1 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -2248,9 +2248,8 @@ void DAGTypeLegalizer::SplitVecRes_VP_LOAD_FF(VPLoadFFSDNode *LD, SDValue &Lo, SDValue EVL = LD->getVectorLength(); EVT MemoryVT = LD->getMemoryVT(); - EVT LoMemVT, HiMemVT; bool HiIsEmpty = false; - std::tie(LoMemVT, HiMemVT) = + auto [LoMemVT, HiMemVT] = DAG.GetDependentSplitDestVTs(MemoryVT, LoVT, &HiIsEmpty); // Split Mask operand @@ -2265,8 +2264,7 @@ void DAGTypeLegalizer::SplitVecRes_VP_LOAD_FF(VPLoadFFSDNode *LD, SDValue &Lo, } // Split EVL operand - SDValue EVLLo, EVLHi; - std::tie(EVLLo, EVLHi) = DAG.SplitEVL(EVL, LD->getValueType(0), dl); + auto [EVLLo, EVLHi] = DAG.SplitEVL(EVL, LD->getValueType(0), dl); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( LD->getPointerInfo(), MachineMemOperand::MOLoad, From c1fae0d91a185b686943b625a1c400a994ad6af0 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 13 Mar 2025 17:14:53 -0700 Subject: [PATCH 09/14] fixup! Documentation improvements --- llvm/docs/LangRef.rst | 51 +++++++++++++++++++++++++++++++++---------- 1 file changed, 40 insertions(+), 11 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 9acda87804ec7..61a1be0f849db 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -23963,7 +23963,8 @@ Overview: """"""""" The '``llvm.vp.load.ff.*``' intrinsic is similar to '``llvm.vp.load.*``', but -will not trap if there are not ``evl`` readable elements at the pointer. +will not trap if there are not ``evl`` readable elements at the pointer. '``ff``' +stands for fault-first or fault-only-first. Arguments: """""""""" @@ -23979,16 +23980,44 @@ argument. Semantics: """""""""" -The '``llvm.vp.load.ff``' intrinsic reads a vector from memory similar to -'``llvm.vp.load``, but will only trap if the first lane is unreadable. If -any other lane is unreadable, the number of successfully read lanes will -be returned in the second return value. The result in the first return value -for the lanes that were not successfully read is -:ref:`poison value `. If ``evl`` is 0, no read occurs and thus no -trap can occur for the first lane. If ``mask`` is 0 for the first lane, no -trap occurs. This intrinsic is allowed to read fewer than ``evl`` lanes even -if no trap would occur. If ``evl`` is non-zero, the result in the second result -must be at least 1 even if the first lane is disabled by ``mask``. +The '``llvm.vp.load.ff``' is designed for reading vector lanes in a single +IR operation where the number of lanes that can be read is not known and can +only be determined by looking at the data. This is useful for vectorizing +strcmp or strlen like loops where the data contains a null terminator. This is +useful for targets that have a fault-only-first load instruction. Other targets +may support this intrinsic differently, for example by lowering to a single +scalar load guarded by ``evl!=0`` and ``mask[0]==1`` and indicating only 1 +lane could be read. + +Like '``llvm.vp.load``', this intrinsic reads memory based on a ``mask`` and an +``evl``. If ``evl`` is non-zero and the first lane is masked-on, then the +first lane of the vector needs to be inbounds of an allocation. The remaining +masked-on lanes with index less than ``evl`` do not need to be inbounds of +an the same allocation or any allocation. + +The second return value from the intrinsic indicates the index of the first +lane that could not be read for some reason or ``evl`` if all lanes could be +be read. Lanes at this index or higher in the first return value are +:ref:`poison value `. If ``evl`` is non-zero, the result in the +second return value must be at least 1, even if the first lane is masked-off. + +The second result is usually less than ``evl`` when an exception would occur +for reading that lane, but it can be reduced for any reason. This facilitates +emulating this intrinsic when the hardware has only supports narrower vector +types natively or when when hardware does not support fault-only-first loads. + +Masked-on lanes that are not inbounds of the allocation that contains the first +lane are :ref:`poison value `. There should be a marker in the +allocation that indicates where valid data stops such as a null terminator. The +terminator should be checked for after calling this intrinsic to prevent using +any lanes past the terminator. Even if second return value is less than +``evl``, the terminator value may not have been read. + +This intrinsic will typically be called in a loop until a terminator is +found. The second result should be used to indicates how many elements are +valid to look for the null terminator. If the terminator is not found, the +pointer should be advanced by the number of elements in the second result and +the intrinsic called again. The default alignment is taken as the ABI alignment of the first return type as specified by the :ref:`datalayout string`. From c6c237d7103d4bf0e4b8e7fc806c882d56ff9ded Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 13 Mar 2025 20:21:37 -0700 Subject: [PATCH 10/14] fixup! Add experimental to the intrinsic name. --- llvm/docs/LangRef.rst | 18 +- llvm/include/llvm/IR/Intrinsics.td | 2 +- llvm/include/llvm/IR/VPIntrinsics.def | 8 +- .../SelectionDAG/LegalizeVectorTypes.cpp | 5 - llvm/lib/IR/IntrinsicInst.cpp | 4 +- .../RISCV/rvv/fixed-vectors-vploadff.ll | 102 +++++----- llvm/test/CodeGen/RISCV/rvv/vploadff.ll | 178 +++++++++--------- llvm/unittests/IR/VPIntrinsicTest.cpp | 3 +- 8 files changed, 158 insertions(+), 162 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 61a1be0f849db..a4254e249021a 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -23954,17 +23954,17 @@ This is an overloaded intrinsic. :: - declare {<4 x float>, i32} @llvm.vp.load.ff.v4f32.p0(ptr %ptr, <4 x i1> %mask, i32 %evl) - declare {, i32} @llvm.vp.load.ff.nxv2i16.p0(ptr %ptr, %mask, i32 %evl) - declare {<8 x float>, i32} @llvm.vp.load.ff.v8f32.p1(ptr addrspace(1) %ptr, <8 x i1> %mask, i32 %evl) - declare {, i32} @llvm.vp.load.ff.nxv1i64.p6(ptr addrspace(6) %ptr, %mask, i32 %evl) + declare {<4 x float>, i32} @llvm.experimental.vp.load.ff.v4f32.p0(ptr %ptr, <4 x i1> %mask, i32 %evl) + declare {, i32} @llvm.experimental.vp.load.ff.nxv2i16.p0(ptr %ptr, %mask, i32 %evl) + declare {<8 x float>, i32} @llvm.experimental.vp.load.ff.v8f32.p1(ptr addrspace(1) %ptr, <8 x i1> %mask, i32 %evl) + declare {, i32} @llvm.experimental.vp.load.ff.nxv1i64.p6(ptr addrspace(6) %ptr, %mask, i32 %evl) Overview: """"""""" -The '``llvm.vp.load.ff.*``' intrinsic is similar to '``llvm.vp.load.*``', but -will not trap if there are not ``evl`` readable elements at the pointer. '``ff``' -stands for fault-first or fault-only-first. +The '``llvm.experimental.vp.load.ff.*``' intrinsic is similar to +'``llvm.vp.load.*``', but will not trap if there are not ``evl`` readable +lanes at the pointer. '``ff``' stands for fault-first or fault-only-first. Arguments: """""""""" @@ -23980,7 +23980,7 @@ argument. Semantics: """""""""" -The '``llvm.vp.load.ff``' is designed for reading vector lanes in a single +The '``llvm.experimental.vp.load.ff``' is designed for reading vector lanes in a single IR operation where the number of lanes that can be read is not known and can only be determined by looking at the data. This is useful for vectorizing strcmp or strlen like loops where the data contains a null terminator. This is @@ -24027,7 +24027,7 @@ Examples: .. code-block:: text - %r = call {<8 x i8>, i32} @llvm.vp.load.ff.v8i8.p0(ptr align 2 %ptr, <8 x i1> %mask, i32 %evl) + %r = call {<8 x i8>, i32} @llvm.experimental.vp.load.ff.v8i8.p0(ptr align 2 %ptr, <8 x i1> %mask, i32 %evl) .. _int_vp_store: diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 707e1020f6170..f28d21af9f5ab 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1912,7 +1912,7 @@ def int_vp_load : DefaultAttrsIntrinsic<[ llvm_anyvector_ty], llvm_i32_ty], [ NoCapture>, IntrNoSync, IntrReadMem, IntrWillReturn, IntrArgMemOnly ]>; -def int_vp_load_ff : DefaultAttrsIntrinsic<[ llvm_anyvector_ty, llvm_i32_ty ], +def int_experimental_vp_load_ff : DefaultAttrsIntrinsic<[ llvm_anyvector_ty, llvm_i32_ty ], [ llvm_anyptr_ty, LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_i32_ty], diff --git a/llvm/include/llvm/IR/VPIntrinsics.def b/llvm/include/llvm/IR/VPIntrinsics.def index 4a71097226f18..c210e15341697 100644 --- a/llvm/include/llvm/IR/VPIntrinsics.def +++ b/llvm/include/llvm/IR/VPIntrinsics.def @@ -587,12 +587,12 @@ VP_PROPERTY_FUNCTIONAL_OPC(Load) VP_PROPERTY_FUNCTIONAL_INTRINSIC(masked_load) END_REGISTER_VP(vp_load, VP_LOAD) -BEGIN_REGISTER_VP_INTRINSIC(vp_load_ff, 1, 2) +BEGIN_REGISTER_VP_INTRINSIC(experimental_vp_load_ff, 1, 2) // val,chain = VP_LOAD_FF chain,base,mask,evl -BEGIN_REGISTER_VP_SDNODE(VP_LOAD_FF, -1, vp_load_ff, 2, 3) -HELPER_MAP_VPID_TO_VPSD(vp_load_ff, VP_LOAD_FF) +BEGIN_REGISTER_VP_SDNODE(VP_LOAD_FF, -1, experimental_vp_load_ff, 2, 3) +HELPER_MAP_VPID_TO_VPSD(experimental_vp_load_ff, VP_LOAD_FF) VP_PROPERTY_NO_FUNCTIONAL -END_REGISTER_VP(vp_load_ff, VP_LOAD_FF) +END_REGISTER_VP(experimental_vp_load_ff, VP_LOAD_FF) // llvm.experimental.vp.strided.load(ptr,stride,mask,vlen) BEGIN_REGISTER_VP_INTRINSIC(experimental_vp_strided_load, 2, 3) // chain = EXPERIMENTAL_VP_STRIDED_LOAD chain,base,offset,stride,mask,evl diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 9e3c8200dbcd1..22e87046a4610 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -2246,11 +2246,6 @@ void DAGTypeLegalizer::SplitVecRes_VP_LOAD_FF(VPLoadFFSDNode *LD, SDValue &Lo, Align Alignment = LD->getOriginalAlign(); SDValue Mask = LD->getMask(); SDValue EVL = LD->getVectorLength(); - EVT MemoryVT = LD->getMemoryVT(); - - bool HiIsEmpty = false; - auto [LoMemVT, HiMemVT] = - DAG.GetDependentSplitDestVTs(MemoryVT, LoVT, &HiIsEmpty); // Split Mask operand SDValue MaskLo, MaskHi; diff --git a/llvm/lib/IR/IntrinsicInst.cpp b/llvm/lib/IR/IntrinsicInst.cpp index 7ddea32f57f02..7fbf201141fc8 100644 --- a/llvm/lib/IR/IntrinsicInst.cpp +++ b/llvm/lib/IR/IntrinsicInst.cpp @@ -448,7 +448,7 @@ VPIntrinsic::getMemoryPointerParamPos(Intrinsic::ID VPID) { case Intrinsic::experimental_vp_strided_store: return 1; case Intrinsic::vp_load: - case Intrinsic::vp_load_ff: + case Intrinsic::experimental_vp_load_ff: case Intrinsic::vp_gather: case Intrinsic::experimental_vp_strided_load: return 0; @@ -672,7 +672,7 @@ Function *VPIntrinsic::getOrInsertDeclarationForParams( VPFunc = Intrinsic::getOrInsertDeclaration( M, VPID, {ReturnType, Params[0]->getType()}); break; - case Intrinsic::vp_load_ff: + case Intrinsic::experimental_vp_load_ff: VPFunc = Intrinsic::getOrInsertDeclaration( M, VPID, {ReturnType->getStructElementType(0), Params[0]->getType()}); break; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vploadff.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vploadff.ll index 5b01976dbbebd..4705f9d96191b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vploadff.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vploadff.ll @@ -15,7 +15,7 @@ define { <2 x i8>, i32 } @vploadff_v2i8(ptr %ptr, <2 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vle8ff.v v8, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { <2 x i8>, i32 } @llvm.vp.load.ff.v2i8.p0(ptr %ptr, <2 x i1> %m, i32 %evl) + %load = call { <2 x i8>, i32 } @llvm.experimental.vp.load.ff.v2i8.p0(ptr %ptr, <2 x i1> %m, i32 %evl) ret { <2 x i8>, i32 } %load } @@ -26,7 +26,7 @@ define { <2 x i8>, i32 } @vploadff_v2i8_allones_mask(ptr %ptr, i32 zeroext %evl) ; CHECK-NEXT: vle8ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { <2 x i8>, i32 } @llvm.vp.load.ff.v2i8.p0(ptr %ptr, <2 x i1> splat (i1 true), i32 %evl) + %load = call { <2 x i8>, i32 } @llvm.experimental.vp.load.ff.v2i8.p0(ptr %ptr, <2 x i1> splat (i1 true), i32 %evl) ret { <2 x i8>, i32 } %load } @@ -37,7 +37,7 @@ define { <4 x i8>, i32 } @vploadff_v4i8(ptr %ptr, <4 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vle8ff.v v8, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { <4 x i8>, i32 } @llvm.vp.load.ff.v4i8.p0(ptr %ptr, <4 x i1> %m, i32 %evl) + %load = call { <4 x i8>, i32 } @llvm.experimental.vp.load.ff.v4i8.p0(ptr %ptr, <4 x i1> %m, i32 %evl) ret { <4 x i8>, i32 } %load } @@ -48,7 +48,7 @@ define { <4 x i8>, i32 } @vploadff_v4i8_allones_mask(ptr %ptr, i32 zeroext %evl) ; CHECK-NEXT: vle8ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { <4 x i8>, i32 } @llvm.vp.load.ff.v4i8.p0(ptr %ptr, <4 x i1> splat (i1 true), i32 %evl) + %load = call { <4 x i8>, i32 } @llvm.experimental.vp.load.ff.v4i8.p0(ptr %ptr, <4 x i1> splat (i1 true), i32 %evl) ret { <4 x i8>, i32 } %load } @@ -59,7 +59,7 @@ define { <8 x i8>, i32 } @vploadff_v8i8(ptr %ptr, <8 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vle8ff.v v8, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { <8 x i8>, i32 } @llvm.vp.load.ff.v8i8.p0(ptr %ptr, <8 x i1> %m, i32 %evl) + %load = call { <8 x i8>, i32 } @llvm.experimental.vp.load.ff.v8i8.p0(ptr %ptr, <8 x i1> %m, i32 %evl) ret { <8 x i8>, i32 } %load } @@ -70,7 +70,7 @@ define { <8 x i8>, i32 } @vploadff_v8i8_allones_mask(ptr %ptr, i32 zeroext %evl) ; CHECK-NEXT: vle8ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { <8 x i8>, i32 } @llvm.vp.load.ff.v8i8.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %evl) + %load = call { <8 x i8>, i32 } @llvm.experimental.vp.load.ff.v8i8.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %evl) ret { <8 x i8>, i32 } %load } @@ -81,7 +81,7 @@ define { <2 x i16>, i32 } @vploadff_v2i16(ptr %ptr, <2 x i1> %m, i32 zeroext %ev ; CHECK-NEXT: vle16ff.v v8, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { <2 x i16>, i32 } @llvm.vp.load.ff.v2i16.p0(ptr %ptr, <2 x i1> %m, i32 %evl) + %load = call { <2 x i16>, i32 } @llvm.experimental.vp.load.ff.v2i16.p0(ptr %ptr, <2 x i1> %m, i32 %evl) ret { <2 x i16>, i32 } %load } @@ -92,7 +92,7 @@ define { <2 x i16>, i32 } @vploadff_v2i16_allones_mask(ptr %ptr, i32 zeroext %ev ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { <2 x i16>, i32 } @llvm.vp.load.ff.v2i16.p0(ptr %ptr, <2 x i1> splat (i1 true), i32 %evl) + %load = call { <2 x i16>, i32 } @llvm.experimental.vp.load.ff.v2i16.p0(ptr %ptr, <2 x i1> splat (i1 true), i32 %evl) ret { <2 x i16>, i32 } %load } @@ -103,7 +103,7 @@ define { <4 x i16>, i32 } @vploadff_v4i16(ptr %ptr, <4 x i1> %m, i32 zeroext %ev ; CHECK-NEXT: vle16ff.v v8, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { <4 x i16>, i32 } @llvm.vp.load.ff.v4i16.p0(ptr %ptr, <4 x i1> %m, i32 %evl) + %load = call { <4 x i16>, i32 } @llvm.experimental.vp.load.ff.v4i16.p0(ptr %ptr, <4 x i1> %m, i32 %evl) ret { <4 x i16>, i32 } %load } @@ -114,7 +114,7 @@ define { <4 x i16>, i32 } @vploadff_v4i16_allones_mask(ptr %ptr, i32 zeroext %ev ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { <4 x i16>, i32 } @llvm.vp.load.ff.v4i16.p0(ptr %ptr, <4 x i1> splat (i1 true), i32 %evl) + %load = call { <4 x i16>, i32 } @llvm.experimental.vp.load.ff.v4i16.p0(ptr %ptr, <4 x i1> splat (i1 true), i32 %evl) ret { <4 x i16>, i32 } %load } @@ -125,7 +125,7 @@ define { <8 x i16>, i32 } @vploadff_v8i16(ptr %ptr, <8 x i1> %m, i32 zeroext %ev ; CHECK-NEXT: vle16ff.v v8, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { <8 x i16>, i32 } @llvm.vp.load.ff.v8i16.p0(ptr %ptr, <8 x i1> %m, i32 %evl) + %load = call { <8 x i16>, i32 } @llvm.experimental.vp.load.ff.v8i16.p0(ptr %ptr, <8 x i1> %m, i32 %evl) ret { <8 x i16>, i32 } %load } @@ -136,7 +136,7 @@ define { <8 x i16>, i32 } @vploadff_v8i16_allones_mask(ptr %ptr, i32 zeroext %ev ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { <8 x i16>, i32 } @llvm.vp.load.ff.v8i16.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %evl) + %load = call { <8 x i16>, i32 } @llvm.experimental.vp.load.ff.v8i16.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %evl) ret { <8 x i16>, i32 } %load } @@ -147,7 +147,7 @@ define { <2 x i32>, i32 } @vploadff_v2i32(ptr %ptr, <2 x i1> %m, i32 zeroext %ev ; CHECK-NEXT: vle32ff.v v8, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { <2 x i32>, i32 } @llvm.vp.load.ff.v2i32.p0(ptr %ptr, <2 x i1> %m, i32 %evl) + %load = call { <2 x i32>, i32 } @llvm.experimental.vp.load.ff.v2i32.p0(ptr %ptr, <2 x i1> %m, i32 %evl) ret { <2 x i32>, i32 } %load } @@ -158,7 +158,7 @@ define { <2 x i32>, i32 } @vploadff_v2i32_allones_mask(ptr %ptr, i32 zeroext %ev ; CHECK-NEXT: vle32ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { <2 x i32>, i32 } @llvm.vp.load.ff.v2i32.p0(ptr %ptr, <2 x i1> splat (i1 true), i32 %evl) + %load = call { <2 x i32>, i32 } @llvm.experimental.vp.load.ff.v2i32.p0(ptr %ptr, <2 x i1> splat (i1 true), i32 %evl) ret { <2 x i32>, i32 } %load } @@ -169,7 +169,7 @@ define { <4 x i32>, i32 } @vploadff_v4i32(ptr %ptr, <4 x i1> %m, i32 zeroext %ev ; CHECK-NEXT: vle32ff.v v8, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { <4 x i32>, i32 } @llvm.vp.load.ff.v4i32.p0(ptr %ptr, <4 x i1> %m, i32 %evl) + %load = call { <4 x i32>, i32 } @llvm.experimental.vp.load.ff.v4i32.p0(ptr %ptr, <4 x i1> %m, i32 %evl) ret { <4 x i32>, i32 } %load } @@ -180,7 +180,7 @@ define { <4 x i32>, i32 } @vploadff_v4i32_allones_mask(ptr %ptr, i32 zeroext %ev ; CHECK-NEXT: vle32ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { <4 x i32>, i32 } @llvm.vp.load.ff.v4i32.p0(ptr %ptr, <4 x i1> splat (i1 true), i32 %evl) + %load = call { <4 x i32>, i32 } @llvm.experimental.vp.load.ff.v4i32.p0(ptr %ptr, <4 x i1> splat (i1 true), i32 %evl) ret { <4 x i32>, i32 } %load } @@ -191,7 +191,7 @@ define { <8 x i32>, i32 } @vploadff_v8i32(ptr %ptr, <8 x i1> %m, i32 zeroext %ev ; CHECK-NEXT: vle32ff.v v8, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { <8 x i32>, i32 } @llvm.vp.load.ff.v8i32.p0(ptr %ptr, <8 x i1> %m, i32 %evl) + %load = call { <8 x i32>, i32 } @llvm.experimental.vp.load.ff.v8i32.p0(ptr %ptr, <8 x i1> %m, i32 %evl) ret { <8 x i32>, i32 } %load } @@ -202,7 +202,7 @@ define { <8 x i32>, i32 } @vploadff_v8i32_allones_mask(ptr %ptr, i32 zeroext %ev ; CHECK-NEXT: vle32ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { <8 x i32>, i32 } @llvm.vp.load.ff.v8i32.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %evl) + %load = call { <8 x i32>, i32 } @llvm.experimental.vp.load.ff.v8i32.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %evl) ret { <8 x i32>, i32 } %load } @@ -213,7 +213,7 @@ define { <2 x i64>, i32 } @vploadff_v2i64(ptr %ptr, <2 x i1> %m, i32 zeroext %ev ; CHECK-NEXT: vle64ff.v v8, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { <2 x i64>, i32 } @llvm.vp.load.ff.v2i64.p0(ptr %ptr, <2 x i1> %m, i32 %evl) + %load = call { <2 x i64>, i32 } @llvm.experimental.vp.load.ff.v2i64.p0(ptr %ptr, <2 x i1> %m, i32 %evl) ret { <2 x i64>, i32 } %load } @@ -224,7 +224,7 @@ define { <2 x i64>, i32 } @vploadff_v2i64_allones_mask(ptr %ptr, i32 zeroext %ev ; CHECK-NEXT: vle64ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { <2 x i64>, i32 } @llvm.vp.load.ff.v2i64.p0(ptr %ptr, <2 x i1> splat (i1 true), i32 %evl) + %load = call { <2 x i64>, i32 } @llvm.experimental.vp.load.ff.v2i64.p0(ptr %ptr, <2 x i1> splat (i1 true), i32 %evl) ret { <2 x i64>, i32 } %load } @@ -235,7 +235,7 @@ define { <4 x i64>, i32 } @vploadff_v4i64(ptr %ptr, <4 x i1> %m, i32 zeroext %ev ; CHECK-NEXT: vle64ff.v v8, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { <4 x i64>, i32 } @llvm.vp.load.ff.v4i64.p0(ptr %ptr, <4 x i1> %m, i32 %evl) + %load = call { <4 x i64>, i32 } @llvm.experimental.vp.load.ff.v4i64.p0(ptr %ptr, <4 x i1> %m, i32 %evl) ret { <4 x i64>, i32 } %load } @@ -246,7 +246,7 @@ define { <4 x i64>, i32 } @vploadff_v4i64_allones_mask(ptr %ptr, i32 zeroext %ev ; CHECK-NEXT: vle64ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { <4 x i64>, i32 } @llvm.vp.load.ff.v4i64.p0(ptr %ptr, <4 x i1> splat (i1 true), i32 %evl) + %load = call { <4 x i64>, i32 } @llvm.experimental.vp.load.ff.v4i64.p0(ptr %ptr, <4 x i1> splat (i1 true), i32 %evl) ret { <4 x i64>, i32 } %load } @@ -257,7 +257,7 @@ define { <8 x i64>, i32 } @vploadff_v8i64(ptr %ptr, <8 x i1> %m, i32 zeroext %ev ; CHECK-NEXT: vle64ff.v v8, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { <8 x i64>, i32 } @llvm.vp.load.ff.v8i64.p0(ptr %ptr, <8 x i1> %m, i32 %evl) + %load = call { <8 x i64>, i32 } @llvm.experimental.vp.load.ff.v8i64.p0(ptr %ptr, <8 x i1> %m, i32 %evl) ret { <8 x i64>, i32 } %load } @@ -268,7 +268,7 @@ define { <8 x i64>, i32 } @vploadff_v8i64_allones_mask(ptr %ptr, i32 zeroext %ev ; CHECK-NEXT: vle64ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { <8 x i64>, i32 } @llvm.vp.load.ff.v8i64.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %evl) + %load = call { <8 x i64>, i32 } @llvm.experimental.vp.load.ff.v8i64.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %evl) ret { <8 x i64>, i32 } %load } @@ -287,7 +287,7 @@ define { <32 x i64>, i32 } @vploadff_v32i64(ptr %ptr, <32 x i1> %m, i32 zeroext ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret - %load = call { <32 x i64>, i32 } @llvm.vp.load.ff.v32i64.p0(ptr %ptr, <32 x i1> %m, i32 %evl) + %load = call { <32 x i64>, i32 } @llvm.experimental.vp.load.ff.v32i64.p0(ptr %ptr, <32 x i1> %m, i32 %evl) ret { <32 x i64>, i32 } %load } @@ -306,7 +306,7 @@ define { <32 x i64>, i32 } @vploadff_v32i64_allones_mask(ptr %ptr, i32 zeroext % ; CHECK-NEXT: vsetivli zero, 16, e64, m8, ta, ma ; CHECK-NEXT: vse64.v v8, (a0) ; CHECK-NEXT: ret - %load = call { <32 x i64>, i32 } @llvm.vp.load.ff.v32i64.p0(ptr %ptr, <32 x i1> splat (i1 true), i32 %evl) + %load = call { <32 x i64>, i32 } @llvm.experimental.vp.load.ff.v32i64.p0(ptr %ptr, <32 x i1> splat (i1 true), i32 %evl) ret { <32 x i64>, i32 } %load } @@ -317,7 +317,7 @@ define { <2 x half>, i32 } @vploadff_v2f16(ptr %ptr, <2 x i1> %m, i32 zeroext %e ; CHECK-NEXT: vle16ff.v v8, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { <2 x half>, i32 } @llvm.vp.load.ff.v2f16.p0(ptr %ptr, <2 x i1> %m, i32 %evl) + %load = call { <2 x half>, i32 } @llvm.experimental.vp.load.ff.v2f16.p0(ptr %ptr, <2 x i1> %m, i32 %evl) ret { <2 x half>, i32 } %load } @@ -328,7 +328,7 @@ define { <2 x half>, i32 } @vploadff_v2f16_allones_mask(ptr %ptr, i32 zeroext %e ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { <2 x half>, i32 } @llvm.vp.load.ff.v2f16.p0(ptr %ptr, <2 x i1> splat (i1 true), i32 %evl) + %load = call { <2 x half>, i32 } @llvm.experimental.vp.load.ff.v2f16.p0(ptr %ptr, <2 x i1> splat (i1 true), i32 %evl) ret { <2 x half>, i32 } %load } @@ -339,7 +339,7 @@ define { <4 x half>, i32 } @vploadff_v4f16(ptr %ptr, <4 x i1> %m, i32 zeroext %e ; CHECK-NEXT: vle16ff.v v8, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { <4 x half>, i32 } @llvm.vp.load.ff.v4f16.p0(ptr %ptr, <4 x i1> %m, i32 %evl) + %load = call { <4 x half>, i32 } @llvm.experimental.vp.load.ff.v4f16.p0(ptr %ptr, <4 x i1> %m, i32 %evl) ret { <4 x half>, i32 } %load } @@ -350,7 +350,7 @@ define { <4 x half>, i32 } @vploadff_v4f16_allones_mask(ptr %ptr, i32 zeroext %e ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { <4 x half>, i32 } @llvm.vp.load.ff.v4f16.p0(ptr %ptr, <4 x i1> splat (i1 true), i32 %evl) + %load = call { <4 x half>, i32 } @llvm.experimental.vp.load.ff.v4f16.p0(ptr %ptr, <4 x i1> splat (i1 true), i32 %evl) ret { <4 x half>, i32 } %load } @@ -361,7 +361,7 @@ define { <8 x half>, i32 } @vploadff_v8f16(ptr %ptr, <8 x i1> %m, i32 zeroext %e ; CHECK-NEXT: vle16ff.v v8, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { <8 x half>, i32 } @llvm.vp.load.ff.v8f16.p0(ptr %ptr, <8 x i1> %m, i32 %evl) + %load = call { <8 x half>, i32 } @llvm.experimental.vp.load.ff.v8f16.p0(ptr %ptr, <8 x i1> %m, i32 %evl) ret { <8 x half>, i32 } %load } @@ -372,7 +372,7 @@ define { <8 x half>, i32 } @vploadff_v8f16_allones_mask(ptr %ptr, i32 zeroext %e ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { <8 x half>, i32 } @llvm.vp.load.ff.v8f16.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %evl) + %load = call { <8 x half>, i32 } @llvm.experimental.vp.load.ff.v8f16.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %evl) ret { <8 x half>, i32 } %load } @@ -383,7 +383,7 @@ define { <2 x float>, i32 } @vploadff_v2f32(ptr %ptr, <2 x i1> %m, i32 zeroext % ; CHECK-NEXT: vle32ff.v v8, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { <2 x float>, i32 } @llvm.vp.load.ff.v2f32.p0(ptr %ptr, <2 x i1> %m, i32 %evl) + %load = call { <2 x float>, i32 } @llvm.experimental.vp.load.ff.v2f32.p0(ptr %ptr, <2 x i1> %m, i32 %evl) ret { <2 x float>, i32 } %load } @@ -394,7 +394,7 @@ define { <2 x float>, i32 } @vploadff_v2f32_allones_mask(ptr %ptr, i32 zeroext % ; CHECK-NEXT: vle32ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { <2 x float>, i32 } @llvm.vp.load.ff.v2f32.p0(ptr %ptr, <2 x i1> splat (i1 true), i32 %evl) + %load = call { <2 x float>, i32 } @llvm.experimental.vp.load.ff.v2f32.p0(ptr %ptr, <2 x i1> splat (i1 true), i32 %evl) ret { <2 x float>, i32 } %load } @@ -405,7 +405,7 @@ define { <4 x float>, i32 } @vploadff_v4f32(ptr %ptr, <4 x i1> %m, i32 zeroext % ; CHECK-NEXT: vle32ff.v v8, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { <4 x float>, i32 } @llvm.vp.load.ff.v4f32.p0(ptr %ptr, <4 x i1> %m, i32 %evl) + %load = call { <4 x float>, i32 } @llvm.experimental.vp.load.ff.v4f32.p0(ptr %ptr, <4 x i1> %m, i32 %evl) ret { <4 x float>, i32 } %load } @@ -416,7 +416,7 @@ define { <4 x float>, i32 } @vploadff_v4f32_allones_mask(ptr %ptr, i32 zeroext % ; CHECK-NEXT: vle32ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { <4 x float>, i32 } @llvm.vp.load.ff.v4f32.p0(ptr %ptr, <4 x i1> splat (i1 true), i32 %evl) + %load = call { <4 x float>, i32 } @llvm.experimental.vp.load.ff.v4f32.p0(ptr %ptr, <4 x i1> splat (i1 true), i32 %evl) ret { <4 x float>, i32 } %load } @@ -427,7 +427,7 @@ define { <8 x float>, i32 } @vploadff_v8f32(ptr %ptr, <8 x i1> %m, i32 zeroext % ; CHECK-NEXT: vle32ff.v v8, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { <8 x float>, i32 } @llvm.vp.load.ff.v8f32.p0(ptr %ptr, <8 x i1> %m, i32 %evl) + %load = call { <8 x float>, i32 } @llvm.experimental.vp.load.ff.v8f32.p0(ptr %ptr, <8 x i1> %m, i32 %evl) ret { <8 x float>, i32 } %load } @@ -438,7 +438,7 @@ define { <8 x float>, i32 } @vploadff_v8f32_allones_mask(ptr %ptr, i32 zeroext % ; CHECK-NEXT: vle32ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { <8 x float>, i32 } @llvm.vp.load.ff.v8f32.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %evl) + %load = call { <8 x float>, i32 } @llvm.experimental.vp.load.ff.v8f32.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %evl) ret { <8 x float>, i32 } %load } @@ -449,7 +449,7 @@ define { <2 x double>, i32 } @vploadff_v2f64(ptr %ptr, <2 x i1> %m, i32 zeroext ; CHECK-NEXT: vle64ff.v v8, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { <2 x double>, i32 } @llvm.vp.load.ff.v2f64.p0(ptr %ptr, <2 x i1> %m, i32 %evl) + %load = call { <2 x double>, i32 } @llvm.experimental.vp.load.ff.v2f64.p0(ptr %ptr, <2 x i1> %m, i32 %evl) ret { <2 x double>, i32 } %load } @@ -460,7 +460,7 @@ define { <2 x double>, i32 } @vploadff_v2f64_allones_mask(ptr %ptr, i32 zeroext ; CHECK-NEXT: vle64ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { <2 x double>, i32 } @llvm.vp.load.ff.v2f64.p0(ptr %ptr, <2 x i1> splat (i1 true), i32 %evl) + %load = call { <2 x double>, i32 } @llvm.experimental.vp.load.ff.v2f64.p0(ptr %ptr, <2 x i1> splat (i1 true), i32 %evl) ret { <2 x double>, i32 } %load } @@ -471,7 +471,7 @@ define { <4 x double>, i32 } @vploadff_v4f64(ptr %ptr, <4 x i1> %m, i32 zeroext ; CHECK-NEXT: vle64ff.v v8, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { <4 x double>, i32 } @llvm.vp.load.ff.v4f64.p0(ptr %ptr, <4 x i1> %m, i32 %evl) + %load = call { <4 x double>, i32 } @llvm.experimental.vp.load.ff.v4f64.p0(ptr %ptr, <4 x i1> %m, i32 %evl) ret { <4 x double>, i32 } %load } @@ -482,7 +482,7 @@ define { <4 x double>, i32 } @vploadff_v4f64_allones_mask(ptr %ptr, i32 zeroext ; CHECK-NEXT: vle64ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { <4 x double>, i32 } @llvm.vp.load.ff.v4f64.p0(ptr %ptr, <4 x i1> splat (i1 true), i32 %evl) + %load = call { <4 x double>, i32 } @llvm.experimental.vp.load.ff.v4f64.p0(ptr %ptr, <4 x i1> splat (i1 true), i32 %evl) ret { <4 x double>, i32 } %load } @@ -493,7 +493,7 @@ define { <8 x double>, i32 } @vploadff_v8f64(ptr %ptr, <8 x i1> %m, i32 zeroext ; CHECK-NEXT: vle64ff.v v8, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { <8 x double>, i32 } @llvm.vp.load.ff.v8f64.p0(ptr %ptr, <8 x i1> %m, i32 %evl) + %load = call { <8 x double>, i32 } @llvm.experimental.vp.load.ff.v8f64.p0(ptr %ptr, <8 x i1> %m, i32 %evl) ret { <8 x double>, i32 } %load } @@ -504,7 +504,7 @@ define { <8 x double>, i32 } @vploadff_v8f64_allones_mask(ptr %ptr, i32 zeroext ; CHECK-NEXT: vle64ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { <8 x double>, i32 } @llvm.vp.load.ff.v8f64.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %evl) + %load = call { <8 x double>, i32 } @llvm.experimental.vp.load.ff.v8f64.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %evl) ret { <8 x double>, i32 } %load } @@ -515,7 +515,7 @@ define { <2 x bfloat>, i32 } @vploadff_v2bf16(ptr %ptr, <2 x i1> %m, i32 zeroext ; CHECK-NEXT: vle16ff.v v8, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { <2 x bfloat>, i32 } @llvm.vp.load.ff.v2bf16.p0(ptr %ptr, <2 x i1> %m, i32 %evl) + %load = call { <2 x bfloat>, i32 } @llvm.experimental.vp.load.ff.v2bf16.p0(ptr %ptr, <2 x i1> %m, i32 %evl) ret { <2 x bfloat>, i32 } %load } @@ -526,7 +526,7 @@ define { <2 x bfloat>, i32 } @vploadff_v2bf16_allones_mask(ptr %ptr, i32 zeroext ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { <2 x bfloat>, i32 } @llvm.vp.load.ff.v2bf16.p0(ptr %ptr, <2 x i1> splat (i1 true), i32 %evl) + %load = call { <2 x bfloat>, i32 } @llvm.experimental.vp.load.ff.v2bf16.p0(ptr %ptr, <2 x i1> splat (i1 true), i32 %evl) ret { <2 x bfloat>, i32 } %load } @@ -537,7 +537,7 @@ define { <4 x bfloat>, i32 } @vploadff_v4bf16(ptr %ptr, <4 x i1> %m, i32 zeroext ; CHECK-NEXT: vle16ff.v v8, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { <4 x bfloat>, i32 } @llvm.vp.load.ff.v4bf16.p0(ptr %ptr, <4 x i1> %m, i32 %evl) + %load = call { <4 x bfloat>, i32 } @llvm.experimental.vp.load.ff.v4bf16.p0(ptr %ptr, <4 x i1> %m, i32 %evl) ret { <4 x bfloat>, i32 } %load } @@ -548,7 +548,7 @@ define { <4 x bfloat>, i32 } @vploadff_v4bf16_allones_mask(ptr %ptr, i32 zeroext ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { <4 x bfloat>, i32 } @llvm.vp.load.ff.v4bf16.p0(ptr %ptr, <4 x i1> splat (i1 true), i32 %evl) + %load = call { <4 x bfloat>, i32 } @llvm.experimental.vp.load.ff.v4bf16.p0(ptr %ptr, <4 x i1> splat (i1 true), i32 %evl) ret { <4 x bfloat>, i32 } %load } @@ -559,7 +559,7 @@ define { <8 x bfloat>, i32 } @vploadff_v8bf16(ptr %ptr, <8 x i1> %m, i32 zeroext ; CHECK-NEXT: vle16ff.v v8, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { <8 x bfloat>, i32 } @llvm.vp.load.ff.v8bf16.p0(ptr %ptr, <8 x i1> %m, i32 %evl) + %load = call { <8 x bfloat>, i32 } @llvm.experimental.vp.load.ff.v8bf16.p0(ptr %ptr, <8 x i1> %m, i32 %evl) ret { <8 x bfloat>, i32 } %load } @@ -570,7 +570,7 @@ define { <8 x bfloat>, i32 } @vploadff_v8bf16_allones_mask(ptr %ptr, i32 zeroext ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { <8 x bfloat>, i32 } @llvm.vp.load.ff.v8bf16.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %evl) + %load = call { <8 x bfloat>, i32 } @llvm.experimental.vp.load.ff.v8bf16.p0(ptr %ptr, <8 x i1> splat (i1 true), i32 %evl) ret { <8 x bfloat>, i32 } %load } @@ -581,6 +581,6 @@ define { <7 x i8>, i32 } @vploadff_v7i8(ptr %ptr, <7 x i1> %m, i32 zeroext %evl) ; CHECK-NEXT: vle8ff.v v8, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { <7 x i8>, i32 } @llvm.vp.load.ff.v7i8.p0(ptr %ptr, <7 x i1> %m, i32 %evl) + %load = call { <7 x i8>, i32 } @llvm.experimental.vp.load.ff.v7i8.p0(ptr %ptr, <7 x i1> %m, i32 %evl) ret { <7 x i8>, i32 } %load } diff --git a/llvm/test/CodeGen/RISCV/rvv/vploadff.ll b/llvm/test/CodeGen/RISCV/rvv/vploadff.ll index 9e08938a9fe6c..461cbf616fb84 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vploadff.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vploadff.ll @@ -15,7 +15,7 @@ define { , i32 } @vploadff_nxv1i8(ptr %ptr, % ; CHECK-NEXT: vle8ff.v v8, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv1i8.p0(ptr %ptr, %m, i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv1i8.p0(ptr %ptr, %m, i32 %evl) ret { , i32 } %load } @@ -26,7 +26,7 @@ define { , i32 } @vploadff_nxv1i8_allones_mask(ptr %ptr, i32 ze ; CHECK-NEXT: vle8ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv1i8.p0(ptr %ptr, splat (i1 true), i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv1i8.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } @@ -37,7 +37,7 @@ define { , i32 } @vploadff_nxv2i8(ptr %ptr, % ; CHECK-NEXT: vle8ff.v v8, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv2i8.p0(ptr %ptr, %m, i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv2i8.p0(ptr %ptr, %m, i32 %evl) ret { , i32 } %load } @@ -48,7 +48,7 @@ define { , i32 } @vploadff_nxv2i8_allones_mask(ptr %ptr, i32 ze ; CHECK-NEXT: vle8ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv2i8.p0(ptr %ptr, splat (i1 true), i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv2i8.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } @@ -59,7 +59,7 @@ define { , i32 } @vploadff_nxv4i8(ptr %ptr, % ; CHECK-NEXT: vle8ff.v v8, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv4i8.p0(ptr %ptr, %m, i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv4i8.p0(ptr %ptr, %m, i32 %evl) ret { , i32 } %load } @@ -70,7 +70,7 @@ define { , i32 } @vploadff_nxv4i8_allones_mask(ptr %ptr, i32 ze ; CHECK-NEXT: vle8ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv4i8.p0(ptr %ptr, splat (i1 true), i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv4i8.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } @@ -81,7 +81,7 @@ define { , i32 } @vploadff_nxv8i8(ptr %ptr, % ; CHECK-NEXT: vle8ff.v v8, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv8i8.p0(ptr %ptr, %m, i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv8i8.p0(ptr %ptr, %m, i32 %evl) ret { , i32 } %load } @@ -92,7 +92,7 @@ define { , i32 } @vploadff_nxv8i8_allones_mask(ptr %ptr, i32 ze ; CHECK-NEXT: vle8ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv8i8.p0(ptr %ptr, splat (i1 true), i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv8i8.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } @@ -103,7 +103,7 @@ define { , i32 } @vploadff_nxv16i8(ptr %ptr, , i32 } @llvm.vp.load.ff.nxv16i8.p0(ptr %ptr, %m, i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv16i8.p0(ptr %ptr, %m, i32 %evl) ret { , i32 } %load } @@ -114,7 +114,7 @@ define { , i32 } @vploadff_nxv16i8_allones_mask(ptr %ptr, i32 ; CHECK-NEXT: vle8ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv16i8.p0(ptr %ptr, splat (i1 true), i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv16i8.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } @@ -125,7 +125,7 @@ define { , i32 } @vploadff_nxv32i8(ptr %ptr, , i32 } @llvm.vp.load.ff.nxv32i8.p0(ptr %ptr, %m, i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv32i8.p0(ptr %ptr, %m, i32 %evl) ret { , i32 } %load } @@ -136,7 +136,7 @@ define { , i32 } @vploadff_nxv32i8_allones_mask(ptr %ptr, i32 ; CHECK-NEXT: vle8ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv32i8.p0(ptr %ptr, splat (i1 true), i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv32i8.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } @@ -147,7 +147,7 @@ define { , i32 } @vploadff_nxv64i8(ptr %ptr, , i32 } @llvm.vp.load.ff.nxv64i8.p0(ptr %ptr, %m, i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv64i8.p0(ptr %ptr, %m, i32 %evl) ret { , i32 } %load } @@ -158,7 +158,7 @@ define { , i32 } @vploadff_nxv64i8_allones_mask(ptr %ptr, i32 ; CHECK-NEXT: vle8ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv64i8.p0(ptr %ptr, splat (i1 true), i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv64i8.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } @@ -176,7 +176,7 @@ define @vploadff_nxv128i8(ptr %ptr, ptr %evl_out, , i32 } @llvm.vp.load.ff.nxv128i8.p0(ptr %ptr, %m, i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv128i8.p0(ptr %ptr, %m, i32 %evl) %result0 = extractvalue { , i32 } %load, 0 %result1 = extractvalue { , i32 } %load, 1 store i32 %result1, ptr %evl_out @@ -197,7 +197,7 @@ define @vploadff_nxv128i8_allones_mask(ptr %ptr, ptr %evl_ou ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: sw a0, 0(a1) ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv128i8.p0(ptr %ptr, splat (i1 true), i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv128i8.p0(ptr %ptr, splat (i1 true), i32 %evl) %result0 = extractvalue { , i32 } %load, 0 %result1 = extractvalue { , i32 } %load, 1 store i32 %result1, ptr %evl_out @@ -211,7 +211,7 @@ define { , i32 } @vploadff_nxv1i16(ptr %ptr, ; CHECK-NEXT: vle16ff.v v8, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv1i16.p0(ptr %ptr, %m, i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv1i16.p0(ptr %ptr, %m, i32 %evl) ret { , i32 } %load } @@ -222,7 +222,7 @@ define { , i32 } @vploadff_nxv1i16_allones_mask(ptr %ptr, i32 ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv1i16.p0(ptr %ptr, splat (i1 true), i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv1i16.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } @@ -233,7 +233,7 @@ define { , i32 } @vploadff_nxv2i16(ptr %ptr, ; CHECK-NEXT: vle16ff.v v8, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv2i16.p0(ptr %ptr, %m, i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv2i16.p0(ptr %ptr, %m, i32 %evl) ret { , i32 } %load } @@ -244,7 +244,7 @@ define { , i32 } @vploadff_nxv2i16_allones_mask(ptr %ptr, i32 ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv2i16.p0(ptr %ptr, splat (i1 true), i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv2i16.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } @@ -255,7 +255,7 @@ define { , i32 } @vploadff_nxv4i16(ptr %ptr, ; CHECK-NEXT: vle16ff.v v8, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv4i16.p0(ptr %ptr, %m, i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv4i16.p0(ptr %ptr, %m, i32 %evl) ret { , i32 } %load } @@ -266,7 +266,7 @@ define { , i32 } @vploadff_nxv4i16_allones_mask(ptr %ptr, i32 ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv4i16.p0(ptr %ptr, splat (i1 true), i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv4i16.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } @@ -277,7 +277,7 @@ define { , i32 } @vploadff_nxv8i16(ptr %ptr, ; CHECK-NEXT: vle16ff.v v8, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv8i16.p0(ptr %ptr, %m, i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv8i16.p0(ptr %ptr, %m, i32 %evl) ret { , i32 } %load } @@ -288,7 +288,7 @@ define { , i32 } @vploadff_nxv8i16_allones_mask(ptr %ptr, i32 ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv8i16.p0(ptr %ptr, splat (i1 true), i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv8i16.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } @@ -299,7 +299,7 @@ define { , i32 } @vploadff_nxv16i16(ptr %ptr, , i32 } @llvm.vp.load.ff.nxv16i16.p0(ptr %ptr, %m, i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv16i16.p0(ptr %ptr, %m, i32 %evl) ret { , i32 } %load } @@ -310,7 +310,7 @@ define { , i32 } @vploadff_nxv16i16_allones_mask(ptr %ptr, i3 ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv16i16.p0(ptr %ptr, splat (i1 true), i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv16i16.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } @@ -321,7 +321,7 @@ define { , i32 } @vploadff_nxv32i16(ptr %ptr, , i32 } @llvm.vp.load.ff.nxv32i16.p0(ptr %ptr, %m, i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv32i16.p0(ptr %ptr, %m, i32 %evl) ret { , i32 } %load } @@ -332,7 +332,7 @@ define { , i32 } @vploadff_nxv32i16_allones_mask(ptr %ptr, i3 ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv32i16.p0(ptr %ptr, splat (i1 true), i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv32i16.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } @@ -343,7 +343,7 @@ define { , i32 } @vploadff_nxv1i32(ptr %ptr, ; CHECK-NEXT: vle32ff.v v8, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv1i32.p0(ptr %ptr, %m, i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv1i32.p0(ptr %ptr, %m, i32 %evl) ret { , i32 } %load } @@ -354,7 +354,7 @@ define { , i32 } @vploadff_nxv1i32_allones_mask(ptr %ptr, i32 ; CHECK-NEXT: vle32ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv1i32.p0(ptr %ptr, splat (i1 true), i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv1i32.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } @@ -365,7 +365,7 @@ define { , i32 } @vploadff_nxv2i32(ptr %ptr, ; CHECK-NEXT: vle32ff.v v8, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv2i32.p0(ptr %ptr, %m, i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv2i32.p0(ptr %ptr, %m, i32 %evl) ret { , i32 } %load } @@ -376,7 +376,7 @@ define { , i32 } @vploadff_nxv2i32_allones_mask(ptr %ptr, i32 ; CHECK-NEXT: vle32ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv2i32.p0(ptr %ptr, splat (i1 true), i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv2i32.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } @@ -387,7 +387,7 @@ define { , i32 } @vploadff_nxv4i32(ptr %ptr, ; CHECK-NEXT: vle32ff.v v8, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv4i32.p0(ptr %ptr, %m, i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv4i32.p0(ptr %ptr, %m, i32 %evl) ret { , i32 } %load } @@ -398,7 +398,7 @@ define { , i32 } @vploadff_nxv4i32_allones_mask(ptr %ptr, i32 ; CHECK-NEXT: vle32ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv4i32.p0(ptr %ptr, splat (i1 true), i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv4i32.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } @@ -409,7 +409,7 @@ define { , i32 } @vploadff_nxv8i32(ptr %ptr, ; CHECK-NEXT: vle32ff.v v8, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv8i32.p0(ptr %ptr, %m, i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv8i32.p0(ptr %ptr, %m, i32 %evl) ret { , i32 } %load } @@ -420,7 +420,7 @@ define { , i32 } @vploadff_nxv8i32_allones_mask(ptr %ptr, i32 ; CHECK-NEXT: vle32ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv8i32.p0(ptr %ptr, splat (i1 true), i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv8i32.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } @@ -431,7 +431,7 @@ define { , i32 } @vploadff_nxv16i32(ptr %ptr, , i32 } @llvm.vp.load.ff.nxv16i32.p0(ptr %ptr, %m, i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv16i32.p0(ptr %ptr, %m, i32 %evl) ret { , i32 } %load } @@ -442,7 +442,7 @@ define { , i32 } @vploadff_nxv16i32_allones_mask(ptr %ptr, i3 ; CHECK-NEXT: vle32ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv16i32.p0(ptr %ptr, splat (i1 true), i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv16i32.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } @@ -453,7 +453,7 @@ define { , i32 } @vploadff_nxv1i64(ptr %ptr, ; CHECK-NEXT: vle64ff.v v8, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv1i64.p0(ptr %ptr, %m, i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv1i64.p0(ptr %ptr, %m, i32 %evl) ret { , i32 } %load } @@ -464,7 +464,7 @@ define { , i32 } @vploadff_nxv1i64_allones_mask(ptr %ptr, i32 ; CHECK-NEXT: vle64ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv1i64.p0(ptr %ptr, splat (i1 true), i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv1i64.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } @@ -475,7 +475,7 @@ define { , i32 } @vploadff_nxv2i64(ptr %ptr, ; CHECK-NEXT: vle64ff.v v8, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv2i64.p0(ptr %ptr, %m, i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv2i64.p0(ptr %ptr, %m, i32 %evl) ret { , i32 } %load } @@ -486,7 +486,7 @@ define { , i32 } @vploadff_nxv2i64_allones_mask(ptr %ptr, i32 ; CHECK-NEXT: vle64ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv2i64.p0(ptr %ptr, splat (i1 true), i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv2i64.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } @@ -497,7 +497,7 @@ define { , i32 } @vploadff_nxv4i64(ptr %ptr, ; CHECK-NEXT: vle64ff.v v8, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv4i64.p0(ptr %ptr, %m, i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv4i64.p0(ptr %ptr, %m, i32 %evl) ret { , i32 } %load } @@ -508,7 +508,7 @@ define { , i32 } @vploadff_nxv4i64_allones_mask(ptr %ptr, i32 ; CHECK-NEXT: vle64ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv4i64.p0(ptr %ptr, splat (i1 true), i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv4i64.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } @@ -519,7 +519,7 @@ define { , i32 } @vploadff_nxv8i64(ptr %ptr, ; CHECK-NEXT: vle64ff.v v8, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv8i64.p0(ptr %ptr, %m, i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv8i64.p0(ptr %ptr, %m, i32 %evl) ret { , i32 } %load } @@ -530,7 +530,7 @@ define { , i32 } @vploadff_nxv8i64_allones_mask(ptr %ptr, i32 ; CHECK-NEXT: vle64ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv8i64.p0(ptr %ptr, splat (i1 true), i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv8i64.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } @@ -541,7 +541,7 @@ define { , i32 } @vploadff_nxv1f16(ptr %ptr, , i32 } @llvm.vp.load.ff.nxv1f16.p0(ptr %ptr, %m, i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv1f16.p0(ptr %ptr, %m, i32 %evl) ret { , i32 } %load } @@ -552,7 +552,7 @@ define { , i32 } @vploadff_nxv1f16_allones_mask(ptr %ptr, i32 ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv1f16.p0(ptr %ptr, splat (i1 true), i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv1f16.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } @@ -563,7 +563,7 @@ define { , i32 } @vploadff_nxv2f16(ptr %ptr, , i32 } @llvm.vp.load.ff.nxv2f16.p0(ptr %ptr, %m, i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv2f16.p0(ptr %ptr, %m, i32 %evl) ret { , i32 } %load } @@ -574,7 +574,7 @@ define { , i32 } @vploadff_nxv2f16_allones_mask(ptr %ptr, i32 ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv2f16.p0(ptr %ptr, splat (i1 true), i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv2f16.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } @@ -585,7 +585,7 @@ define { , i32 } @vploadff_nxv4f16(ptr %ptr, , i32 } @llvm.vp.load.ff.nxv4f16.p0(ptr %ptr, %m, i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv4f16.p0(ptr %ptr, %m, i32 %evl) ret { , i32 } %load } @@ -596,7 +596,7 @@ define { , i32 } @vploadff_nxv4f16_allones_mask(ptr %ptr, i32 ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv4f16.p0(ptr %ptr, splat (i1 true), i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv4f16.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } @@ -607,7 +607,7 @@ define { , i32 } @vploadff_nxv8f16(ptr %ptr, , i32 } @llvm.vp.load.ff.nxv8f16.p0(ptr %ptr, %m, i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv8f16.p0(ptr %ptr, %m, i32 %evl) ret { , i32 } %load } @@ -618,7 +618,7 @@ define { , i32 } @vploadff_nxv8f16_allones_mask(ptr %ptr, i32 ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv8f16.p0(ptr %ptr, splat (i1 true), i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv8f16.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } @@ -629,7 +629,7 @@ define { , i32 } @vploadff_nxv16f16(ptr %ptr, , i32 } @llvm.vp.load.ff.nxv16f16.p0(ptr %ptr, %m, i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv16f16.p0(ptr %ptr, %m, i32 %evl) ret { , i32 } %load } @@ -640,7 +640,7 @@ define { , i32 } @vploadff_nxv16f16_allones_mask(ptr %ptr, i ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv16f16.p0(ptr %ptr, splat (i1 true), i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv16f16.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } @@ -651,7 +651,7 @@ define { , i32 } @vploadff_nxv32f16(ptr %ptr, , i32 } @llvm.vp.load.ff.nxv32f16.p0(ptr %ptr, %m, i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv32f16.p0(ptr %ptr, %m, i32 %evl) ret { , i32 } %load } @@ -662,7 +662,7 @@ define { , i32 } @vploadff_nxv32f16_allones_mask(ptr %ptr, i ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv32f16.p0(ptr %ptr, splat (i1 true), i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv32f16.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } @@ -673,7 +673,7 @@ define { , i32 } @vploadff_nxv1f32(ptr %ptr, , i32 } @llvm.vp.load.ff.nxv1f32.p0(ptr %ptr, %m, i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv1f32.p0(ptr %ptr, %m, i32 %evl) ret { , i32 } %load } @@ -684,7 +684,7 @@ define { , i32 } @vploadff_nxv1f32_allones_mask(ptr %ptr, i3 ; CHECK-NEXT: vle32ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv1f32.p0(ptr %ptr, splat (i1 true), i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv1f32.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } @@ -695,7 +695,7 @@ define { , i32 } @vploadff_nxv2f32(ptr %ptr, , i32 } @llvm.vp.load.ff.nxv2f32.p0(ptr %ptr, %m, i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv2f32.p0(ptr %ptr, %m, i32 %evl) ret { , i32 } %load } @@ -706,7 +706,7 @@ define { , i32 } @vploadff_nxv2f32_allones_mask(ptr %ptr, i3 ; CHECK-NEXT: vle32ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv2f32.p0(ptr %ptr, splat (i1 true), i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv2f32.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } @@ -717,7 +717,7 @@ define { , i32 } @vploadff_nxv4f32(ptr %ptr, , i32 } @llvm.vp.load.ff.nxv4f32.p0(ptr %ptr, %m, i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv4f32.p0(ptr %ptr, %m, i32 %evl) ret { , i32 } %load } @@ -728,7 +728,7 @@ define { , i32 } @vploadff_nxv4f32_allones_mask(ptr %ptr, i3 ; CHECK-NEXT: vle32ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv4f32.p0(ptr %ptr, splat (i1 true), i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv4f32.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } @@ -739,7 +739,7 @@ define { , i32 } @vploadff_nxv8f32(ptr %ptr, , i32 } @llvm.vp.load.ff.nxv8f32.p0(ptr %ptr, %m, i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv8f32.p0(ptr %ptr, %m, i32 %evl) ret { , i32 } %load } @@ -750,7 +750,7 @@ define { , i32 } @vploadff_nxv8f32_allones_mask(ptr %ptr, i3 ; CHECK-NEXT: vle32ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv8f32.p0(ptr %ptr, splat (i1 true), i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv8f32.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } @@ -761,7 +761,7 @@ define { , i32 } @vploadff_nxv16f32(ptr %ptr, , i32 } @llvm.vp.load.ff.nxv16f32.p0(ptr %ptr, %m, i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv16f32.p0(ptr %ptr, %m, i32 %evl) ret { , i32 } %load } @@ -772,7 +772,7 @@ define { , i32 } @vploadff_nxv16f32_allones_mask(ptr %ptr, ; CHECK-NEXT: vle32ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv16f32.p0(ptr %ptr, splat (i1 true), i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv16f32.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } @@ -783,7 +783,7 @@ define { , i32 } @vploadff_nxv1f64(ptr %ptr, , i32 } @llvm.vp.load.ff.nxv1f64.p0(ptr %ptr, %m, i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv1f64.p0(ptr %ptr, %m, i32 %evl) ret { , i32 } %load } @@ -794,7 +794,7 @@ define { , i32 } @vploadff_nxv1f64_allones_mask(ptr %ptr, i ; CHECK-NEXT: vle64ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv1f64.p0(ptr %ptr, splat (i1 true), i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv1f64.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } @@ -805,7 +805,7 @@ define { , i32 } @vploadff_nxv2f64(ptr %ptr, , i32 } @llvm.vp.load.ff.nxv2f64.p0(ptr %ptr, %m, i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv2f64.p0(ptr %ptr, %m, i32 %evl) ret { , i32 } %load } @@ -816,7 +816,7 @@ define { , i32 } @vploadff_nxv2f64_allones_mask(ptr %ptr, i ; CHECK-NEXT: vle64ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv2f64.p0(ptr %ptr, splat (i1 true), i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv2f64.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } @@ -827,7 +827,7 @@ define { , i32 } @vploadff_nxv4f64(ptr %ptr, , i32 } @llvm.vp.load.ff.nxv4f64.p0(ptr %ptr, %m, i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv4f64.p0(ptr %ptr, %m, i32 %evl) ret { , i32 } %load } @@ -838,7 +838,7 @@ define { , i32 } @vploadff_nxv4f64_allones_mask(ptr %ptr, i ; CHECK-NEXT: vle64ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv4f64.p0(ptr %ptr, splat (i1 true), i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv4f64.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } @@ -849,7 +849,7 @@ define { , i32 } @vploadff_nxv8f64(ptr %ptr, , i32 } @llvm.vp.load.ff.nxv8f64.p0(ptr %ptr, %m, i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv8f64.p0(ptr %ptr, %m, i32 %evl) ret { , i32 } %load } @@ -860,7 +860,7 @@ define { , i32 } @vploadff_nxv8f64_allones_mask(ptr %ptr, i ; CHECK-NEXT: vle64ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv8f64.p0(ptr %ptr, splat (i1 true), i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv8f64.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } @@ -871,7 +871,7 @@ define { , i32 } @vploadff_nxv1bf16(ptr %ptr, , i32 } @llvm.vp.load.ff.nxv1bf16.p0(ptr %ptr, %m, i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv1bf16.p0(ptr %ptr, %m, i32 %evl) ret { , i32 } %load } @@ -882,7 +882,7 @@ define { , i32 } @vploadff_nxv1bf16_allones_mask(ptr %ptr, ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv1bf16.p0(ptr %ptr, splat (i1 true), i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv1bf16.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } @@ -893,7 +893,7 @@ define { , i32 } @vploadff_nxv2bf16(ptr %ptr, , i32 } @llvm.vp.load.ff.nxv2bf16.p0(ptr %ptr, %m, i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv2bf16.p0(ptr %ptr, %m, i32 %evl) ret { , i32 } %load } @@ -904,7 +904,7 @@ define { , i32 } @vploadff_nxv2bf16_allones_mask(ptr %ptr, ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv2bf16.p0(ptr %ptr, splat (i1 true), i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv2bf16.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } @@ -915,7 +915,7 @@ define { , i32 } @vploadff_nxv4bf16(ptr %ptr, , i32 } @llvm.vp.load.ff.nxv4bf16.p0(ptr %ptr, %m, i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv4bf16.p0(ptr %ptr, %m, i32 %evl) ret { , i32 } %load } @@ -926,7 +926,7 @@ define { , i32 } @vploadff_nxv4bf16_allones_mask(ptr %ptr, ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv4bf16.p0(ptr %ptr, splat (i1 true), i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv4bf16.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } @@ -937,7 +937,7 @@ define { , i32 } @vploadff_nxv8bf16(ptr %ptr, , i32 } @llvm.vp.load.ff.nxv8bf16.p0(ptr %ptr, %m, i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv8bf16.p0(ptr %ptr, %m, i32 %evl) ret { , i32 } %load } @@ -948,7 +948,7 @@ define { , i32 } @vploadff_nxv8bf16_allones_mask(ptr %ptr, ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv8bf16.p0(ptr %ptr, splat (i1 true), i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv8bf16.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } @@ -959,7 +959,7 @@ define { , i32 } @vploadff_nxv16bf16(ptr %ptr, , i32 } @llvm.vp.load.ff.nxv16bf16.p0(ptr %ptr, %m, i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv16bf16.p0(ptr %ptr, %m, i32 %evl) ret { , i32 } %load } @@ -970,7 +970,7 @@ define { , i32 } @vploadff_nxv16bf16_allones_mask(ptr %ptr ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv16bf16.p0(ptr %ptr, splat (i1 true), i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv16bf16.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } @@ -981,7 +981,7 @@ define { , i32 } @vploadff_nxv32bf16(ptr %ptr, , i32 } @llvm.vp.load.ff.nxv32bf16.p0(ptr %ptr, %m, i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv32bf16.p0(ptr %ptr, %m, i32 %evl) ret { , i32 } %load } @@ -992,7 +992,7 @@ define { , i32 } @vploadff_nxv32bf16_allones_mask(ptr %ptr ; CHECK-NEXT: vle16ff.v v8, (a0) ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv32bf16.p0(ptr %ptr, splat (i1 true), i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv32bf16.p0(ptr %ptr, splat (i1 true), i32 %evl) ret { , i32 } %load } @@ -1003,6 +1003,6 @@ define { , i32 } @vploadff_nxv3i8(ptr %ptr, % ; CHECK-NEXT: vle8ff.v v8, (a0), v0.t ; CHECK-NEXT: csrr a0, vl ; CHECK-NEXT: ret - %load = call { , i32 } @llvm.vp.load.ff.nxv3i8.p0(ptr %ptr, %m, i32 %evl) + %load = call { , i32 } @llvm.experimental.vp.load.ff.nxv3i8.p0(ptr %ptr, %m, i32 %evl) ret { , i32 } %load } diff --git a/llvm/unittests/IR/VPIntrinsicTest.cpp b/llvm/unittests/IR/VPIntrinsicTest.cpp index a101979ee6a4a..3644995b4054d 100644 --- a/llvm/unittests/IR/VPIntrinsicTest.cpp +++ b/llvm/unittests/IR/VPIntrinsicTest.cpp @@ -100,7 +100,8 @@ class VPIntrinsicTest : public testing::Test { "i32*>, <8 x i1>, i32) "; Str << " declare <8 x i32> @llvm.vp.load.v8i32.p0v8i32(<8 x i32>*, <8 x " "i1>, i32) "; - Str << " declare {<8 x i32>, i32} @llvm.vp.load.ff.v8i32.p0v8i32(<8 x " + Str << " declare {<8 x i32>, i32} " + "@llvm.experimental.vp.load.ff.v8i32.p0v8i32(<8 x " "i32>*, <8 x i1>, i32) "; Str << "declare <8 x i32> " "@llvm.experimental.vp.strided.load.v8i32.i32(i32*, i32, <8 " From 1b39f427f7a7b851118b83383a01f36d208d5566 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 13 Mar 2025 22:03:25 -0700 Subject: [PATCH 11/14] fixup! Address review comments --- llvm/docs/LangRef.rst | 4 ++-- llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 3 +-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index a4254e249021a..33cddf65f12cf 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -23943,9 +23943,9 @@ Examples: %also.r = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr %ptr, i32 2, <8 x i1> %mask, <8 x i8> poison) -.. _int_vp_ff_load: +.. _int_experimental_vp_ff_load: -'``llvm.vp.ff.load``' Intrinsic +'``llvm.experimental.vp.ff.load``' Intrinsic ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Syntax: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 22e87046a4610..fbd85d59927ff 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -2237,9 +2237,8 @@ void DAGTypeLegalizer::SplitVecRes_VP_LOAD(VPLoadSDNode *LD, SDValue &Lo, void DAGTypeLegalizer::SplitVecRes_VP_LOAD_FF(VPLoadFFSDNode *LD, SDValue &Lo, SDValue &Hi) { - EVT LoVT, HiVT; SDLoc dl(LD); - std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0)); + auto [LoVT, HiVT] = DAG.GetSplitDestVTs(LD->getValueType(0)); SDValue Ch = LD->getChain(); SDValue Ptr = LD->getBasePtr(); From cbf0bb0a1c73c854f38eb833339234f24c16613e Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 14 Mar 2025 12:57:46 -0700 Subject: [PATCH 12/14] fixup! address review comment --- llvm/docs/LangRef.rst | 2 +- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 33cddf65f12cf..033aa86d80310 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -24003,7 +24003,7 @@ second return value must be at least 1, even if the first lane is masked-off. The second result is usually less than ``evl`` when an exception would occur for reading that lane, but it can be reduced for any reason. This facilitates -emulating this intrinsic when the hardware has only supports narrower vector +emulating this intrinsic when the hardware only supports narrower vector types natively or when when hardware does not support fault-only-first loads. Masked-on lanes that are not inbounds of the allocation that contains the first diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 5cf13f16fec5d..6528001563576 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -8483,13 +8483,12 @@ void SelectionDAGBuilder::visitVPLoadFF( AAMDNodes AAInfo = VPIntrin.getAAMetadata(); const MDNode *Ranges = VPIntrin.getMetadata(LLVMContext::MD_range); SDValue LD; - bool AddToChain = true; // Do not serialize variable-length loads of constant memory with // anything. if (!Alignment) Alignment = DAG.getEVTAlign(VT); MemoryLocation ML = MemoryLocation::getAfter(PtrOperand, AAInfo); - AddToChain = !BatchAA || !BatchAA->pointsToConstantMemory(ML); + bool AddToChain = !BatchAA || !BatchAA->pointsToConstantMemory(ML); SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode(); MachineMemOperand *MMO = DAG.getMachineFunction().getMachineMemOperand( MachinePointerInfo(PtrOperand), MachineMemOperand::MOLoad, From 249b585a42e8e1817ee9dc6a007aec13ca4176ca Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 14 Mar 2025 14:38:35 -0700 Subject: [PATCH 13/14] fixup! other doc fix I forgot to make. --- llvm/docs/LangRef.rst | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 033aa86d80310..8eede401d0fa2 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -23983,11 +23983,11 @@ Semantics: The '``llvm.experimental.vp.load.ff``' is designed for reading vector lanes in a single IR operation where the number of lanes that can be read is not known and can only be determined by looking at the data. This is useful for vectorizing -strcmp or strlen like loops where the data contains a null terminator. This is -useful for targets that have a fault-only-first load instruction. Other targets -may support this intrinsic differently, for example by lowering to a single -scalar load guarded by ``evl!=0`` and ``mask[0]==1`` and indicating only 1 -lane could be read. +strcmp or strlen like loops where the data contains a null terminator. Some +targets have a fault-only-first load instruction that this intrinsic can be +lowered to. Other targets may support this intrinsic differently, for example by +lowering to a single scalar load guarded by ``evl!=0`` and ``mask[0]==1`` and +indicating only 1 lane could be read. Like '``llvm.vp.load``', this intrinsic reads memory based on a ``mask`` and an ``evl``. If ``evl`` is non-zero and the first lane is masked-on, then the From 7e6a57e378e960ba104178e2701c2f7e04571dd0 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Fri, 14 Mar 2025 14:56:06 -0700 Subject: [PATCH 14/14] fixup! always use riscv_vleff_mask --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 27 +++++++++------------ 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 26bc2b2e618d3..6a104683f2887 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -11987,29 +11987,24 @@ SDValue RISCVTargetLowering::lowerLoadFF(SDValue Op, SelectionDAG &DAG) const { SDValue Mask = VPLoadFF->getMask(); SDValue VL = VPLoadFF->getVectorLength(); - bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode()); - MVT XLenVT = Subtarget.getXLenVT(); MVT ContainerVT = VT; if (VT.isFixedLengthVector()) { ContainerVT = getContainerForFixedLengthVector(VT); - if (!IsUnmasked) { - MVT MaskVT = getMaskTypeFor(ContainerVT); - Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); - } + MVT MaskVT = getMaskTypeFor(ContainerVT); + Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget); } - unsigned IntID = - IsUnmasked ? Intrinsic::riscv_vleff : Intrinsic::riscv_vleff_mask; - SmallVector Ops{Chain, DAG.getTargetConstant(IntID, DL, XLenVT)}; - Ops.push_back(DAG.getUNDEF(ContainerVT)); - Ops.push_back(BasePtr); - if (!IsUnmasked) - Ops.push_back(Mask); - Ops.push_back(VL); - if (!IsUnmasked) - Ops.push_back(DAG.getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT)); + unsigned IntID = Intrinsic::riscv_vleff_mask; + SDValue Ops[] = { + Chain, + DAG.getTargetConstant(IntID, DL, XLenVT), + DAG.getUNDEF(ContainerVT), + BasePtr, + Mask, + VL, + DAG.getTargetConstant(RISCVVType::TAIL_AGNOSTIC, DL, XLenVT)}; SDVTList VTs = DAG.getVTList({ContainerVT, Op->getValueType(1), MVT::Other});