diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index f56097fdbb51a..2b8818482a333 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2913,7 +2913,9 @@ SDValue SelectionDAGLegalize::ExpandPARITY(SDValue Op, const SDLoc &dl) { } SDValue SelectionDAGLegalize::PromoteReduction(SDNode *Node) { - MVT VecVT = Node->getOperand(1).getSimpleValueType(); + bool IsVPOpcode = ISD::isVPOpcode(Node->getOpcode()); + MVT VecVT = IsVPOpcode ? Node->getOperand(1).getSimpleValueType() + : Node->getOperand(0).getSimpleValueType(); MVT NewVecVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VecVT); MVT ScalarVT = Node->getSimpleValueType(0); MVT NewScalarVT = NewVecVT.getVectorElementType(); @@ -2921,16 +2923,13 @@ SDValue SelectionDAGLegalize::PromoteReduction(SDNode *Node) { SDLoc DL(Node); SmallVector Operands(Node->getNumOperands()); - // promote the initial value. // FIXME: Support integer. assert(Node->getOperand(0).getValueType().isFloatingPoint() && "Only FP promotion is supported"); - Operands[0] = - DAG.getNode(ISD::FP_EXTEND, DL, NewScalarVT, Node->getOperand(0)); - for (unsigned j = 1; j != Node->getNumOperands(); ++j) + for (unsigned j = 0; j != Node->getNumOperands(); ++j) if (Node->getOperand(j).getValueType().isVector() && - !(ISD::isVPOpcode(Node->getOpcode()) && + !(IsVPOpcode && ISD::getVPMaskIdx(Node->getOpcode()) == j)) { // Skip mask operand. // promote the vector operand. // FIXME: Support integer. @@ -2938,6 +2937,10 @@ SDValue SelectionDAGLegalize::PromoteReduction(SDNode *Node) { "Only FP promotion is supported"); Operands[j] = DAG.getNode(ISD::FP_EXTEND, DL, NewVecVT, Node->getOperand(j)); + } else if (Node->getOperand(j).getValueType().isFloatingPoint()) { + // promote the initial value. + Operands[j] = + DAG.getNode(ISD::FP_EXTEND, DL, NewScalarVT, Node->getOperand(j)); } else { Operands[j] = Node->getOperand(j); // Skip VL operand. } @@ -5049,7 +5052,11 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Node->getOpcode() == ISD::SINT_TO_FP || Node->getOpcode() == ISD::SETCC || Node->getOpcode() == ISD::EXTRACT_VECTOR_ELT || - Node->getOpcode() == ISD::INSERT_VECTOR_ELT) { + Node->getOpcode() == ISD::INSERT_VECTOR_ELT || + Node->getOpcode() == ISD::VECREDUCE_FMAX || + Node->getOpcode() == ISD::VECREDUCE_FMIN || + Node->getOpcode() == ISD::VECREDUCE_FMAXIMUM || + Node->getOpcode() == ISD::VECREDUCE_FMINIMUM) { OVT = Node->getOperand(0).getSimpleValueType(); } if (Node->getOpcode() == ISD::ATOMIC_STORE || @@ -5796,6 +5803,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { DAG.getIntPtrConstant(0, dl, /*isTarget=*/true))); break; } + case ISD::VECREDUCE_FMAX: + case ISD::VECREDUCE_FMIN: + case ISD::VECREDUCE_FMAXIMUM: + case ISD::VECREDUCE_FMINIMUM: case ISD::VP_REDUCE_FMAX: case ISD::VP_REDUCE_FMIN: case ISD::VP_REDUCE_FMAXIMUM: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index de4447fb0cf1a..27bde7b96c857 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -503,13 +503,19 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::VECREDUCE_UMIN: case ISD::VECREDUCE_FADD: case ISD::VECREDUCE_FMUL: + case ISD::VECTOR_FIND_LAST_ACTIVE: + Action = TLI.getOperationAction(Node->getOpcode(), + Node->getOperand(0).getValueType()); + break; case ISD::VECREDUCE_FMAX: case ISD::VECREDUCE_FMIN: case ISD::VECREDUCE_FMAXIMUM: case ISD::VECREDUCE_FMINIMUM: - case ISD::VECTOR_FIND_LAST_ACTIVE: Action = TLI.getOperationAction(Node->getOpcode(), Node->getOperand(0).getValueType()); + // Defer non-vector results to LegalizeDAG. + if (Action == TargetLowering::Promote) + Action = TargetLowering::Legal; break; case ISD::VECREDUCE_SEQ_FADD: case ISD::VECREDUCE_SEQ_FMUL: diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 6076fe56416ad..759cf531b74b1 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -959,13 +959,35 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, // TODO: support more ops. static const unsigned ZvfhminZvfbfminPromoteOps[] = { - ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, ISD::FSUB, - ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FSQRT, - ISD::FCEIL, ISD::FTRUNC, ISD::FFLOOR, ISD::FROUND, - ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT, ISD::IS_FPCLASS, - ISD::SETCC, ISD::FMAXIMUM, ISD::FMINIMUM, ISD::STRICT_FADD, - ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV, ISD::STRICT_FSQRT, - ISD::STRICT_FMA}; + ISD::FMINNUM, + ISD::FMAXNUM, + ISD::FADD, + ISD::FSUB, + ISD::FMUL, + ISD::FMA, + ISD::FDIV, + ISD::FSQRT, + ISD::FCEIL, + ISD::FTRUNC, + ISD::FFLOOR, + ISD::FROUND, + ISD::FROUNDEVEN, + ISD::FRINT, + ISD::FNEARBYINT, + ISD::IS_FPCLASS, + ISD::SETCC, + ISD::FMAXIMUM, + ISD::FMINIMUM, + ISD::STRICT_FADD, + ISD::STRICT_FSUB, + ISD::STRICT_FMUL, + ISD::STRICT_FDIV, + ISD::STRICT_FSQRT, + ISD::STRICT_FMA, + ISD::VECREDUCE_FMIN, + ISD::VECREDUCE_FMAX, + ISD::VECREDUCE_FMINIMUM, + ISD::VECREDUCE_FMAXIMUM}; // TODO: support more vp ops. static const unsigned ZvfhminZvfbfminPromoteVPOps[] = { diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode-bf16.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode-bf16.ll new file mode 100644 index 0000000000000..1c42cd29deca9 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode-bf16.ll @@ -0,0 +1,136 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s + +define bfloat @vreduce_fmin_nxv4bf16( %val) { +; CHECK-LABEL: vreduce_fmin_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfredmin.vs v8, v10, v10 +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call bfloat @llvm.vector.reduce.fmin.nxv4bf16( %val) + ret bfloat %s +} + +define bfloat @vreduce_fmax_nxv4bf16( %val) { +; CHECK-LABEL: vreduce_fmax_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfredmax.vs v8, v10, v10 +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call bfloat @llvm.vector.reduce.fmax.nxv4bf16( %val) + ret bfloat %s +} + +define bfloat @vreduce_fmin_nnan_nxv4bf16( %val) { +; CHECK-LABEL: vreduce_fmin_nnan_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfredmin.vs v8, v10, v10 +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call nnan bfloat @llvm.vector.reduce.fmin.nxv4bf16( %val) + ret bfloat %s +} + +define bfloat @vreduce_fmax_nnan_nxv4bf16( %val) { +; CHECK-LABEL: vreduce_fmax_nnan_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfredmax.vs v8, v10, v10 +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call nnan bfloat @llvm.vector.reduce.fmax.nxv4bf16( %val) + ret bfloat %s +} + +define bfloat @vreduce_fminimum_nxv4bf16( %val) { +; CHECK-LABEL: vreduce_fminimum_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmfne.vv v8, v10, v10 +; CHECK-NEXT: vcpop.m a0, v8 +; CHECK-NEXT: beqz a0, .LBB4_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: lui a0, 523264 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB4_2: +; CHECK-NEXT: vfredmin.vs v8, v10, v10 +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call bfloat @llvm.vector.reduce.fminimum.nxv4bf16( %val) + ret bfloat %s +} + +define bfloat @vreduce_fmaximum_nxv4bf16( %val) { +; CHECK-LABEL: vreduce_fmaximum_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmfne.vv v8, v10, v10 +; CHECK-NEXT: vcpop.m a0, v8 +; CHECK-NEXT: beqz a0, .LBB5_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: lui a0, 523264 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB5_2: +; CHECK-NEXT: vfredmax.vs v8, v10, v10 +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call bfloat @llvm.vector.reduce.fmaximum.nxv4bf16( %val) + ret bfloat %s +} + +define bfloat @vreduce_fminimum_nnan_nxv4bf16( %val) { +; CHECK-LABEL: vreduce_fminimum_nnan_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfredmin.vs v8, v10, v10 +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call nnan bfloat @llvm.vector.reduce.fminimum.nxv4bf16( %val) + ret bfloat %s +} + +define bfloat @vreduce_fmaximum_nnan_nxv4bf16( %val) { +; CHECK-LABEL: vreduce_fmaximum_nnan_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfredmax.vs v8, v10, v10 +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call nnan bfloat @llvm.vector.reduce.fmaximum.nxv4bf16( %val) + ret bfloat %s +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode-f16.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode-f16.ll new file mode 100644 index 0000000000000..e269b13137d44 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode-f16.ll @@ -0,0 +1,212 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN + +define half @vreduce_fmin_nxv4f16( %val) { +; ZVFH-LABEL: vreduce_fmin_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfredmin.vs v8, v8, v8 +; ZVFH-NEXT: vfmv.f.s fa0, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vreduce_fmin_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v10 +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call half @llvm.vector.reduce.fmin.nxv4f16( %val) + ret half %s +} + +define half @vreduce_fmax_nxv4f16( %val) { +; ZVFH-LABEL: vreduce_fmax_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfredmax.vs v8, v8, v8 +; ZVFH-NEXT: vfmv.f.s fa0, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vreduce_fmax_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v10 +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call half @llvm.vector.reduce.fmax.nxv4f16( %val) + ret half %s +} + +define half @vreduce_fmin_nnan_nxv4f16( %val) { +; ZVFH-LABEL: vreduce_fmin_nnan_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfredmin.vs v8, v8, v8 +; ZVFH-NEXT: vfmv.f.s fa0, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vreduce_fmin_nnan_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v10 +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call nnan half @llvm.vector.reduce.fmin.nxv4f16( %val) + ret half %s +} + +define half @vreduce_fmax_nnan_nxv4f16( %val) { +; ZVFH-LABEL: vreduce_fmax_nnan_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfredmax.vs v8, v8, v8 +; ZVFH-NEXT: vfmv.f.s fa0, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vreduce_fmax_nnan_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v10 +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call nnan half @llvm.vector.reduce.fmax.nxv4f16( %val) + ret half %s +} + +define half @vreduce_fminimum_nxv4f16( %val) { +; ZVFH-LABEL: vreduce_fminimum_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vmfne.vv v9, v8, v8 +; ZVFH-NEXT: vcpop.m a0, v9 +; ZVFH-NEXT: beqz a0, .LBB4_2 +; ZVFH-NEXT: # %bb.1: +; ZVFH-NEXT: lui a0, %hi(.LCPI4_0) +; ZVFH-NEXT: flh fa0, %lo(.LCPI4_0)(a0) +; ZVFH-NEXT: ret +; ZVFH-NEXT: .LBB4_2: +; ZVFH-NEXT: vfredmin.vs v8, v8, v8 +; ZVFH-NEXT: vfmv.f.s fa0, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vreduce_fminimum_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vmfne.vv v8, v10, v10 +; ZVFHMIN-NEXT: vcpop.m a0, v8 +; ZVFHMIN-NEXT: beqz a0, .LBB4_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: lui a0, 523264 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret +; ZVFHMIN-NEXT: .LBB4_2: +; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v10 +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call half @llvm.vector.reduce.fminimum.nxv4f16( %val) + ret half %s +} + +define half @vreduce_fmaximum_nxv4f16( %val) { +; ZVFH-LABEL: vreduce_fmaximum_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vmfne.vv v9, v8, v8 +; ZVFH-NEXT: vcpop.m a0, v9 +; ZVFH-NEXT: beqz a0, .LBB5_2 +; ZVFH-NEXT: # %bb.1: +; ZVFH-NEXT: lui a0, %hi(.LCPI5_0) +; ZVFH-NEXT: flh fa0, %lo(.LCPI5_0)(a0) +; ZVFH-NEXT: ret +; ZVFH-NEXT: .LBB5_2: +; ZVFH-NEXT: vfredmax.vs v8, v8, v8 +; ZVFH-NEXT: vfmv.f.s fa0, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vreduce_fmaximum_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vmfne.vv v8, v10, v10 +; ZVFHMIN-NEXT: vcpop.m a0, v8 +; ZVFHMIN-NEXT: beqz a0, .LBB5_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: lui a0, 523264 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret +; ZVFHMIN-NEXT: .LBB5_2: +; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v10 +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call half @llvm.vector.reduce.fmaximum.nxv4f16( %val) + ret half %s +} + +define half @vreduce_fminimum_nnan_nxv4f16( %val) { +; ZVFH-LABEL: vreduce_fminimum_nnan_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfredmin.vs v8, v8, v8 +; ZVFH-NEXT: vfmv.f.s fa0, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vreduce_fminimum_nnan_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v10 +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call nnan half @llvm.vector.reduce.fminimum.nxv4f16( %val) + ret half %s +} + +define half @vreduce_fmaximum_nnan_nxv4f16( %val) { +; ZVFH-LABEL: vreduce_fmaximum_nnan_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfredmax.vs v8, v8, v8 +; ZVFH-NEXT: vfmv.f.s fa0, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vreduce_fmaximum_nnan_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v10 +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call nnan half @llvm.vector.reduce.fmaximum.nxv4f16( %val) + ret half %s +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp-bf16.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp-bf16.ll new file mode 100644 index 0000000000000..37bd0a0496dcf --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp-bf16.ll @@ -0,0 +1,167 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s + +define bfloat @vpreduce_fmin_nxv4bf16(bfloat %start, %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_fmin_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa5 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfredmin.vs v8, v10, v8, v0.t +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call bfloat @llvm.vp.reduce.fmin.nxv4bf16(bfloat %start, %val, %m, i32 %evl) + ret bfloat %s +} + +define bfloat @vpreduce_fmax_nxv4bf16(bfloat %start, %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_fmax_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa5 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfredmax.vs v8, v10, v8, v0.t +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call bfloat @llvm.vp.reduce.fmax.nxv4bf16(bfloat %start, %val, %m, i32 %evl) + ret bfloat %s +} + +define bfloat @vpreduce_fmin_nnan_nxv4bf16(bfloat %start, %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_fmin_nnan_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa5 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfredmin.vs v8, v10, v8, v0.t +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call nnan bfloat @llvm.vp.reduce.fmin.nxv4bf16(bfloat %start, %val, %m, i32 %evl) + ret bfloat %s +} + +define bfloat @vpreduce_fmax_nnan_nxv4bf16(bfloat %start, %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_fmax_nnan_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa5 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfredmax.vs v8, v10, v8, v0.t +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call nnan bfloat @llvm.vp.reduce.fmax.nxv4bf16(bfloat %start, %val, %m, i32 %evl) + ret bfloat %s +} + +define bfloat @vpreduce_fminimum_nxv4bf16(bfloat %start, %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_fminimum_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmfne.vv v8, v10, v10, v0.t +; CHECK-NEXT: feq.s a1, fa5, fa5 +; CHECK-NEXT: vcpop.m a2, v8, v0.t +; CHECK-NEXT: xori a1, a1, 1 +; CHECK-NEXT: or a1, a2, a1 +; CHECK-NEXT: beqz a1, .LBB4_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: lui a0, 523264 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB4_2: +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa5 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfredmin.vs v8, v10, v8, v0.t +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call bfloat @llvm.vp.reduce.fminimum.nxv4bf16(bfloat %start, %val, %m, i32 %evl) + ret bfloat %s +} + +define bfloat @vpreduce_fmaximum_nxv4bf16(bfloat %start, %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_fmaximum_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmfne.vv v8, v10, v10, v0.t +; CHECK-NEXT: feq.s a1, fa5, fa5 +; CHECK-NEXT: vcpop.m a2, v8, v0.t +; CHECK-NEXT: xori a1, a1, 1 +; CHECK-NEXT: or a1, a2, a1 +; CHECK-NEXT: beqz a1, .LBB5_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: lui a0, 523264 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB5_2: +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa5 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfredmax.vs v8, v10, v8, v0.t +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call bfloat @llvm.vp.reduce.fmaximum.nxv4bf16(bfloat %start, %val, %m, i32 %evl) + ret bfloat %s +} + +define bfloat @vpreduce_fminimum_nnan_nxv4bf16(bfloat %start, %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_fminimum_nnan_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa5 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfredmin.vs v8, v10, v8, v0.t +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call nnan bfloat @llvm.vp.reduce.fminimum.nxv4bf16(bfloat %start, %val, %m, i32 %evl) + ret bfloat %s +} + +define bfloat @vpreduce_fmaximum_nnan_nxv4bf16(bfloat %start, %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_fmaximum_nnan_nxv4bf16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa5 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfredmax.vs v8, v10, v8, v0.t +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call nnan bfloat @llvm.vp.reduce.fmaximum.nxv4bf16(bfloat %start, %val, %m, i32 %evl) + ret bfloat %s +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp-f16.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp-f16.ll new file mode 100644 index 0000000000000..8993bf8a767d8 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp-f16.ll @@ -0,0 +1,269 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN + +define half @vpreduce_fmin_nxv4f16(half %start, %val, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vpreduce_fmin_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFH-NEXT: vfmv.s.f v9, fa0 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfredmin.vs v9, v8, v9, v0.t +; ZVFH-NEXT: vfmv.f.s fa0, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_fmin_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v8, fa5 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v8, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call half @llvm.vp.reduce.fmin.nxv4f16(half %start, %val, %m, i32 %evl) + ret half %s +} + +define half @vpreduce_fmax_nxv4f16(half %start, %val, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vpreduce_fmax_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFH-NEXT: vfmv.s.f v9, fa0 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfredmax.vs v9, v8, v9, v0.t +; ZVFH-NEXT: vfmv.f.s fa0, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_fmax_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v8, fa5 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v8, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call half @llvm.vp.reduce.fmax.nxv4f16(half %start, %val, %m, i32 %evl) + ret half %s +} + +define half @vpreduce_fmin_nnan_nxv4f16(half %start, %val, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vpreduce_fmin_nnan_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFH-NEXT: vfmv.s.f v9, fa0 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfredmin.vs v9, v8, v9, v0.t +; ZVFH-NEXT: vfmv.f.s fa0, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_fmin_nnan_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v8, fa5 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v8, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call nnan half @llvm.vp.reduce.fmin.nxv4f16(half %start, %val, %m, i32 %evl) + ret half %s +} + +define half @vpreduce_fmax_nnan_nxv4f16(half %start, %val, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vpreduce_fmax_nnan_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFH-NEXT: vfmv.s.f v9, fa0 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfredmax.vs v9, v8, v9, v0.t +; ZVFH-NEXT: vfmv.f.s fa0, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_fmax_nnan_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v8, fa5 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v8, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call nnan half @llvm.vp.reduce.fmax.nxv4f16(half %start, %val, %m, i32 %evl) + ret half %s +} + +define half @vpreduce_fminimum_nxv4f16(half %start, %val, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vpreduce_fminimum_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vmfne.vv v9, v8, v8, v0.t +; ZVFH-NEXT: fcvt.s.h fa5, fa0 +; ZVFH-NEXT: vcpop.m a1, v9, v0.t +; ZVFH-NEXT: feq.s a2, fa5, fa5 +; ZVFH-NEXT: xori a2, a2, 1 +; ZVFH-NEXT: or a1, a1, a2 +; ZVFH-NEXT: beqz a1, .LBB4_2 +; ZVFH-NEXT: # %bb.1: +; ZVFH-NEXT: lui a0, %hi(.LCPI4_0) +; ZVFH-NEXT: flh fa0, %lo(.LCPI4_0)(a0) +; ZVFH-NEXT: ret +; ZVFH-NEXT: .LBB4_2: +; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFH-NEXT: vfmv.s.f v9, fa0 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfredmin.vs v9, v8, v9, v0.t +; ZVFH-NEXT: vfmv.f.s fa0, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_fminimum_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vmfne.vv v8, v10, v10, v0.t +; ZVFHMIN-NEXT: feq.s a1, fa5, fa5 +; ZVFHMIN-NEXT: vcpop.m a2, v8, v0.t +; ZVFHMIN-NEXT: xori a1, a1, 1 +; ZVFHMIN-NEXT: or a1, a2, a1 +; ZVFHMIN-NEXT: beqz a1, .LBB4_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: lui a0, 523264 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret +; ZVFHMIN-NEXT: .LBB4_2: +; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v8, fa5 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v8, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call half @llvm.vp.reduce.fminimum.nxv4f16(half %start, %val, %m, i32 %evl) + ret half %s +} + +define half @vpreduce_fmaximum_nxv4f16(half %start, %val, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vpreduce_fmaximum_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vmfne.vv v9, v8, v8, v0.t +; ZVFH-NEXT: fcvt.s.h fa5, fa0 +; ZVFH-NEXT: vcpop.m a1, v9, v0.t +; ZVFH-NEXT: feq.s a2, fa5, fa5 +; ZVFH-NEXT: xori a2, a2, 1 +; ZVFH-NEXT: or a1, a1, a2 +; ZVFH-NEXT: beqz a1, .LBB5_2 +; ZVFH-NEXT: # %bb.1: +; ZVFH-NEXT: lui a0, %hi(.LCPI5_0) +; ZVFH-NEXT: flh fa0, %lo(.LCPI5_0)(a0) +; ZVFH-NEXT: ret +; ZVFH-NEXT: .LBB5_2: +; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFH-NEXT: vfmv.s.f v9, fa0 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfredmax.vs v9, v8, v9, v0.t +; ZVFH-NEXT: vfmv.f.s fa0, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_fmaximum_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vmfne.vv v8, v10, v10, v0.t +; ZVFHMIN-NEXT: feq.s a1, fa5, fa5 +; ZVFHMIN-NEXT: vcpop.m a2, v8, v0.t +; ZVFHMIN-NEXT: xori a1, a1, 1 +; ZVFHMIN-NEXT: or a1, a2, a1 +; ZVFHMIN-NEXT: beqz a1, .LBB5_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: lui a0, 523264 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret +; ZVFHMIN-NEXT: .LBB5_2: +; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v8, fa5 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v8, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call half @llvm.vp.reduce.fmaximum.nxv4f16(half %start, %val, %m, i32 %evl) + ret half %s +} + +define half @vpreduce_fminimum_nnan_nxv4f16(half %start, %val, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vpreduce_fminimum_nnan_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFH-NEXT: vfmv.s.f v9, fa0 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfredmin.vs v9, v8, v9, v0.t +; ZVFH-NEXT: vfmv.f.s fa0, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_fminimum_nnan_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v8, fa5 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v8, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call nnan half @llvm.vp.reduce.fminimum.nxv4f16(half %start, %val, %m, i32 %evl) + ret half %s +} + +define half @vpreduce_fmaximum_nnan_nxv4f16(half %start, %val, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vpreduce_fmaximum_nnan_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFH-NEXT: vfmv.s.f v9, fa0 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfredmax.vs v9, v8, v9, v0.t +; ZVFH-NEXT: vfmv.f.s fa0, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_fmaximum_nnan_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v8, fa5 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v8, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call nnan half @llvm.vp.reduce.fmaximum.nxv4f16(half %start, %val, %m, i32 %evl) + ret half %s +}