From 50cc58c6078e722fe284f91eb29fcf80b3caf0e0 Mon Sep 17 00:00:00 2001 From: Jim Lin Date: Mon, 24 Feb 2025 16:00:39 +0800 Subject: [PATCH 1/5] [SelectionDAG][RISCV] Promote VECREDUCE_{FMAX,FMIN,FMAXIMUM,FMINIMUM} This patch also adds the tests for VP_REDUCE_{FMAX,FMIN,FMAXIMUM,FMINIMUM}, which have been supported for a while. --- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 25 +- .../SelectionDAG/LegalizeVectorOps.cpp | 8 +- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 36 ++- .../RISCV/rvv/vreductions-fp-sdnode-f16.ll | 212 ++++++++++++++ .../RISCV/rvv/vreductions-fp-vp-f16.ll | 269 ++++++++++++++++++ 5 files changed, 535 insertions(+), 15 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode-f16.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp-f16.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index f56097fdbb51a..5f70588ab84e4 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2913,7 +2913,9 @@ SDValue SelectionDAGLegalize::ExpandPARITY(SDValue Op, const SDLoc &dl) { } SDValue SelectionDAGLegalize::PromoteReduction(SDNode *Node) { - MVT VecVT = Node->getOperand(1).getSimpleValueType(); + bool IsVPOpcode = ISD::isVPOpcode(Node->getOpcode()); + MVT VecVT = IsVPOpcode ? Node->getOperand(1).getSimpleValueType() + : Node->getOperand(0).getSimpleValueType(); MVT NewVecVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VecVT); MVT ScalarVT = Node->getSimpleValueType(0); MVT NewScalarVT = NewVecVT.getVectorElementType(); @@ -2921,16 +2923,13 @@ SDValue SelectionDAGLegalize::PromoteReduction(SDNode *Node) { SDLoc DL(Node); SmallVector Operands(Node->getNumOperands()); - // promote the initial value. // FIXME: Support integer. assert(Node->getOperand(0).getValueType().isFloatingPoint() && "Only FP promotion is supported"); - Operands[0] = - DAG.getNode(ISD::FP_EXTEND, DL, NewScalarVT, Node->getOperand(0)); - for (unsigned j = 1; j != Node->getNumOperands(); ++j) + for (unsigned j = 0; j != Node->getNumOperands(); ++j) if (Node->getOperand(j).getValueType().isVector() && - !(ISD::isVPOpcode(Node->getOpcode()) && + !(IsVPOpcode && ISD::getVPMaskIdx(Node->getOpcode()) == j)) { // Skip mask operand. // promote the vector operand. // FIXME: Support integer. @@ -2938,6 +2937,10 @@ SDValue SelectionDAGLegalize::PromoteReduction(SDNode *Node) { "Only FP promotion is supported"); Operands[j] = DAG.getNode(ISD::FP_EXTEND, DL, NewVecVT, Node->getOperand(j)); + } else if (Node->getOperand(j).getValueType().isFloatingPoint()) { + // prmote the initial value. + Operands[j] = + DAG.getNode(ISD::FP_EXTEND, DL, NewScalarVT, Node->getOperand(j)); } else { Operands[j] = Node->getOperand(j); // Skip VL operand. } @@ -5049,7 +5052,11 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Node->getOpcode() == ISD::SINT_TO_FP || Node->getOpcode() == ISD::SETCC || Node->getOpcode() == ISD::EXTRACT_VECTOR_ELT || - Node->getOpcode() == ISD::INSERT_VECTOR_ELT) { + Node->getOpcode() == ISD::INSERT_VECTOR_ELT || + Node->getOpcode() == ISD::VECREDUCE_FMAX || + Node->getOpcode() == ISD::VECREDUCE_FMIN || + Node->getOpcode() == ISD::VECREDUCE_FMAXIMUM || + Node->getOpcode() == ISD::VECREDUCE_FMINIMUM) { OVT = Node->getOperand(0).getSimpleValueType(); } if (Node->getOpcode() == ISD::ATOMIC_STORE || @@ -5796,6 +5803,10 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { DAG.getIntPtrConstant(0, dl, /*isTarget=*/true))); break; } + case ISD::VECREDUCE_FMAX: + case ISD::VECREDUCE_FMIN: + case ISD::VECREDUCE_FMAXIMUM: + case ISD::VECREDUCE_FMINIMUM: case ISD::VP_REDUCE_FMAX: case ISD::VP_REDUCE_FMIN: case ISD::VP_REDUCE_FMAXIMUM: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index de4447fb0cf1a..13345b76e7e92 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -503,13 +503,19 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::VECREDUCE_UMIN: case ISD::VECREDUCE_FADD: case ISD::VECREDUCE_FMUL: + case ISD::VECTOR_FIND_LAST_ACTIVE: + Action = TLI.getOperationAction(Node->getOpcode(), + Node->getOperand(0).getValueType()); + break; case ISD::VECREDUCE_FMAX: case ISD::VECREDUCE_FMIN: case ISD::VECREDUCE_FMAXIMUM: case ISD::VECREDUCE_FMINIMUM: - case ISD::VECTOR_FIND_LAST_ACTIVE: Action = TLI.getOperationAction(Node->getOpcode(), Node->getOperand(0).getValueType()); + // Defer non-vector results to LegalizeDAG. + if (Action == TargetLowering::Promote) + Action = TargetLowering::Legal; break; case ISD::VECREDUCE_SEQ_FADD: case ISD::VECREDUCE_SEQ_FMUL: diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 6076fe56416ad..759cf531b74b1 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -959,13 +959,35 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, // TODO: support more ops. static const unsigned ZvfhminZvfbfminPromoteOps[] = { - ISD::FMINNUM, ISD::FMAXNUM, ISD::FADD, ISD::FSUB, - ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FSQRT, - ISD::FCEIL, ISD::FTRUNC, ISD::FFLOOR, ISD::FROUND, - ISD::FROUNDEVEN, ISD::FRINT, ISD::FNEARBYINT, ISD::IS_FPCLASS, - ISD::SETCC, ISD::FMAXIMUM, ISD::FMINIMUM, ISD::STRICT_FADD, - ISD::STRICT_FSUB, ISD::STRICT_FMUL, ISD::STRICT_FDIV, ISD::STRICT_FSQRT, - ISD::STRICT_FMA}; + ISD::FMINNUM, + ISD::FMAXNUM, + ISD::FADD, + ISD::FSUB, + ISD::FMUL, + ISD::FMA, + ISD::FDIV, + ISD::FSQRT, + ISD::FCEIL, + ISD::FTRUNC, + ISD::FFLOOR, + ISD::FROUND, + ISD::FROUNDEVEN, + ISD::FRINT, + ISD::FNEARBYINT, + ISD::IS_FPCLASS, + ISD::SETCC, + ISD::FMAXIMUM, + ISD::FMINIMUM, + ISD::STRICT_FADD, + ISD::STRICT_FSUB, + ISD::STRICT_FMUL, + ISD::STRICT_FDIV, + ISD::STRICT_FSQRT, + ISD::STRICT_FMA, + ISD::VECREDUCE_FMIN, + ISD::VECREDUCE_FMAX, + ISD::VECREDUCE_FMINIMUM, + ISD::VECREDUCE_FMAXIMUM}; // TODO: support more vp ops. static const unsigned ZvfhminZvfbfminPromoteVPOps[] = { diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode-f16.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode-f16.ll new file mode 100644 index 0000000000000..e269b13137d44 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode-f16.ll @@ -0,0 +1,212 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN + +define half @vreduce_fmin_nxv4f16( %val) { +; ZVFH-LABEL: vreduce_fmin_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfredmin.vs v8, v8, v8 +; ZVFH-NEXT: vfmv.f.s fa0, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vreduce_fmin_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v10 +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call half @llvm.vector.reduce.fmin.nxv4f16( %val) + ret half %s +} + +define half @vreduce_fmax_nxv4f16( %val) { +; ZVFH-LABEL: vreduce_fmax_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfredmax.vs v8, v8, v8 +; ZVFH-NEXT: vfmv.f.s fa0, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vreduce_fmax_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v10 +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call half @llvm.vector.reduce.fmax.nxv4f16( %val) + ret half %s +} + +define half @vreduce_fmin_nnan_nxv4f16( %val) { +; ZVFH-LABEL: vreduce_fmin_nnan_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfredmin.vs v8, v8, v8 +; ZVFH-NEXT: vfmv.f.s fa0, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vreduce_fmin_nnan_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v10 +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call nnan half @llvm.vector.reduce.fmin.nxv4f16( %val) + ret half %s +} + +define half @vreduce_fmax_nnan_nxv4f16( %val) { +; ZVFH-LABEL: vreduce_fmax_nnan_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfredmax.vs v8, v8, v8 +; ZVFH-NEXT: vfmv.f.s fa0, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vreduce_fmax_nnan_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v10 +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call nnan half @llvm.vector.reduce.fmax.nxv4f16( %val) + ret half %s +} + +define half @vreduce_fminimum_nxv4f16( %val) { +; ZVFH-LABEL: vreduce_fminimum_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vmfne.vv v9, v8, v8 +; ZVFH-NEXT: vcpop.m a0, v9 +; ZVFH-NEXT: beqz a0, .LBB4_2 +; ZVFH-NEXT: # %bb.1: +; ZVFH-NEXT: lui a0, %hi(.LCPI4_0) +; ZVFH-NEXT: flh fa0, %lo(.LCPI4_0)(a0) +; ZVFH-NEXT: ret +; ZVFH-NEXT: .LBB4_2: +; ZVFH-NEXT: vfredmin.vs v8, v8, v8 +; ZVFH-NEXT: vfmv.f.s fa0, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vreduce_fminimum_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vmfne.vv v8, v10, v10 +; ZVFHMIN-NEXT: vcpop.m a0, v8 +; ZVFHMIN-NEXT: beqz a0, .LBB4_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: lui a0, 523264 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret +; ZVFHMIN-NEXT: .LBB4_2: +; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v10 +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call half @llvm.vector.reduce.fminimum.nxv4f16( %val) + ret half %s +} + +define half @vreduce_fmaximum_nxv4f16( %val) { +; ZVFH-LABEL: vreduce_fmaximum_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vmfne.vv v9, v8, v8 +; ZVFH-NEXT: vcpop.m a0, v9 +; ZVFH-NEXT: beqz a0, .LBB5_2 +; ZVFH-NEXT: # %bb.1: +; ZVFH-NEXT: lui a0, %hi(.LCPI5_0) +; ZVFH-NEXT: flh fa0, %lo(.LCPI5_0)(a0) +; ZVFH-NEXT: ret +; ZVFH-NEXT: .LBB5_2: +; ZVFH-NEXT: vfredmax.vs v8, v8, v8 +; ZVFH-NEXT: vfmv.f.s fa0, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vreduce_fmaximum_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vmfne.vv v8, v10, v10 +; ZVFHMIN-NEXT: vcpop.m a0, v8 +; ZVFHMIN-NEXT: beqz a0, .LBB5_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: lui a0, 523264 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret +; ZVFHMIN-NEXT: .LBB5_2: +; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v10 +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call half @llvm.vector.reduce.fmaximum.nxv4f16( %val) + ret half %s +} + +define half @vreduce_fminimum_nnan_nxv4f16( %val) { +; ZVFH-LABEL: vreduce_fminimum_nnan_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfredmin.vs v8, v8, v8 +; ZVFH-NEXT: vfmv.f.s fa0, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vreduce_fminimum_nnan_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v10 +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call nnan half @llvm.vector.reduce.fminimum.nxv4f16( %val) + ret half %s +} + +define half @vreduce_fmaximum_nnan_nxv4f16( %val) { +; ZVFH-LABEL: vreduce_fmaximum_nnan_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFH-NEXT: vfredmax.vs v8, v8, v8 +; ZVFH-NEXT: vfmv.f.s fa0, v8 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vreduce_fmaximum_nnan_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v10 +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call nnan half @llvm.vector.reduce.fmaximum.nxv4f16( %val) + ret half %s +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp-f16.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp-f16.ll new file mode 100644 index 0000000000000..8993bf8a767d8 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp-f16.ll @@ -0,0 +1,269 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFH +; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFH +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfhmin,+zvfhmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=ZVFHMIN + +define half @vpreduce_fmin_nxv4f16(half %start, %val, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vpreduce_fmin_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFH-NEXT: vfmv.s.f v9, fa0 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfredmin.vs v9, v8, v9, v0.t +; ZVFH-NEXT: vfmv.f.s fa0, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_fmin_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v8, fa5 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v8, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call half @llvm.vp.reduce.fmin.nxv4f16(half %start, %val, %m, i32 %evl) + ret half %s +} + +define half @vpreduce_fmax_nxv4f16(half %start, %val, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vpreduce_fmax_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFH-NEXT: vfmv.s.f v9, fa0 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfredmax.vs v9, v8, v9, v0.t +; ZVFH-NEXT: vfmv.f.s fa0, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_fmax_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v8, fa5 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v8, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call half @llvm.vp.reduce.fmax.nxv4f16(half %start, %val, %m, i32 %evl) + ret half %s +} + +define half @vpreduce_fmin_nnan_nxv4f16(half %start, %val, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vpreduce_fmin_nnan_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFH-NEXT: vfmv.s.f v9, fa0 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfredmin.vs v9, v8, v9, v0.t +; ZVFH-NEXT: vfmv.f.s fa0, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_fmin_nnan_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v8, fa5 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v8, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call nnan half @llvm.vp.reduce.fmin.nxv4f16(half %start, %val, %m, i32 %evl) + ret half %s +} + +define half @vpreduce_fmax_nnan_nxv4f16(half %start, %val, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vpreduce_fmax_nnan_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFH-NEXT: vfmv.s.f v9, fa0 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfredmax.vs v9, v8, v9, v0.t +; ZVFH-NEXT: vfmv.f.s fa0, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_fmax_nnan_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v8, fa5 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v8, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call nnan half @llvm.vp.reduce.fmax.nxv4f16(half %start, %val, %m, i32 %evl) + ret half %s +} + +define half @vpreduce_fminimum_nxv4f16(half %start, %val, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vpreduce_fminimum_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vmfne.vv v9, v8, v8, v0.t +; ZVFH-NEXT: fcvt.s.h fa5, fa0 +; ZVFH-NEXT: vcpop.m a1, v9, v0.t +; ZVFH-NEXT: feq.s a2, fa5, fa5 +; ZVFH-NEXT: xori a2, a2, 1 +; ZVFH-NEXT: or a1, a1, a2 +; ZVFH-NEXT: beqz a1, .LBB4_2 +; ZVFH-NEXT: # %bb.1: +; ZVFH-NEXT: lui a0, %hi(.LCPI4_0) +; ZVFH-NEXT: flh fa0, %lo(.LCPI4_0)(a0) +; ZVFH-NEXT: ret +; ZVFH-NEXT: .LBB4_2: +; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFH-NEXT: vfmv.s.f v9, fa0 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfredmin.vs v9, v8, v9, v0.t +; ZVFH-NEXT: vfmv.f.s fa0, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_fminimum_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vmfne.vv v8, v10, v10, v0.t +; ZVFHMIN-NEXT: feq.s a1, fa5, fa5 +; ZVFHMIN-NEXT: vcpop.m a2, v8, v0.t +; ZVFHMIN-NEXT: xori a1, a1, 1 +; ZVFHMIN-NEXT: or a1, a2, a1 +; ZVFHMIN-NEXT: beqz a1, .LBB4_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: lui a0, 523264 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret +; ZVFHMIN-NEXT: .LBB4_2: +; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v8, fa5 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v8, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call half @llvm.vp.reduce.fminimum.nxv4f16(half %start, %val, %m, i32 %evl) + ret half %s +} + +define half @vpreduce_fmaximum_nxv4f16(half %start, %val, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vpreduce_fmaximum_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vmfne.vv v9, v8, v8, v0.t +; ZVFH-NEXT: fcvt.s.h fa5, fa0 +; ZVFH-NEXT: vcpop.m a1, v9, v0.t +; ZVFH-NEXT: feq.s a2, fa5, fa5 +; ZVFH-NEXT: xori a2, a2, 1 +; ZVFH-NEXT: or a1, a1, a2 +; ZVFH-NEXT: beqz a1, .LBB5_2 +; ZVFH-NEXT: # %bb.1: +; ZVFH-NEXT: lui a0, %hi(.LCPI5_0) +; ZVFH-NEXT: flh fa0, %lo(.LCPI5_0)(a0) +; ZVFH-NEXT: ret +; ZVFH-NEXT: .LBB5_2: +; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFH-NEXT: vfmv.s.f v9, fa0 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfredmax.vs v9, v8, v9, v0.t +; ZVFH-NEXT: vfmv.f.s fa0, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_fmaximum_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; ZVFHMIN-NEXT: vmfne.vv v8, v10, v10, v0.t +; ZVFHMIN-NEXT: feq.s a1, fa5, fa5 +; ZVFHMIN-NEXT: vcpop.m a2, v8, v0.t +; ZVFHMIN-NEXT: xori a1, a1, 1 +; ZVFHMIN-NEXT: or a1, a2, a1 +; ZVFHMIN-NEXT: beqz a1, .LBB5_2 +; ZVFHMIN-NEXT: # %bb.1: +; ZVFHMIN-NEXT: lui a0, 523264 +; ZVFHMIN-NEXT: fmv.w.x fa5, a0 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret +; ZVFHMIN-NEXT: .LBB5_2: +; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v8, fa5 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v8, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call half @llvm.vp.reduce.fmaximum.nxv4f16(half %start, %val, %m, i32 %evl) + ret half %s +} + +define half @vpreduce_fminimum_nnan_nxv4f16(half %start, %val, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vpreduce_fminimum_nnan_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFH-NEXT: vfmv.s.f v9, fa0 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfredmin.vs v9, v8, v9, v0.t +; ZVFH-NEXT: vfmv.f.s fa0, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_fminimum_nnan_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v8, fa5 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmin.vs v8, v10, v8, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call nnan half @llvm.vp.reduce.fminimum.nxv4f16(half %start, %val, %m, i32 %evl) + ret half %s +} + +define half @vpreduce_fmaximum_nnan_nxv4f16(half %start, %val, %m, i32 zeroext %evl) { +; ZVFH-LABEL: vpreduce_fmaximum_nnan_nxv4f16: +; ZVFH: # %bb.0: +; ZVFH-NEXT: vsetivli zero, 1, e16, m1, ta, ma +; ZVFH-NEXT: vfmv.s.f v9, fa0 +; ZVFH-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFH-NEXT: vfredmax.vs v9, v8, v9, v0.t +; ZVFH-NEXT: vfmv.f.s fa0, v9 +; ZVFH-NEXT: ret +; +; ZVFHMIN-LABEL: vpreduce_fmaximum_nnan_nxv4f16: +; ZVFHMIN: # %bb.0: +; ZVFHMIN-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; ZVFHMIN-NEXT: vfwcvt.f.f.v v10, v8 +; ZVFHMIN-NEXT: fcvt.s.h fa5, fa0 +; ZVFHMIN-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfmv.s.f v8, fa5 +; ZVFHMIN-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; ZVFHMIN-NEXT: vfredmax.vs v8, v10, v8, v0.t +; ZVFHMIN-NEXT: vfmv.f.s fa5, v8 +; ZVFHMIN-NEXT: fcvt.h.s fa0, fa5 +; ZVFHMIN-NEXT: ret + %s = call nnan half @llvm.vp.reduce.fmaximum.nxv4f16(half %start, %val, %m, i32 %evl) + ret half %s +} From 496b064ccb0324f7a457c6549df9682495222a61 Mon Sep 17 00:00:00 2001 From: Jim Lin Date: Wed, 26 Feb 2025 10:46:25 +0800 Subject: [PATCH 2/5] clang-format --- llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 13345b76e7e92..27bde7b96c857 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -515,7 +515,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { Node->getOperand(0).getValueType()); // Defer non-vector results to LegalizeDAG. if (Action == TargetLowering::Promote) - Action = TargetLowering::Legal; + Action = TargetLowering::Legal; break; case ISD::VECREDUCE_SEQ_FADD: case ISD::VECREDUCE_SEQ_FMUL: From e43532fae8da39f7307070fae4686c3b2ffcf90d Mon Sep 17 00:00:00 2001 From: Jim Lin Date: Wed, 26 Feb 2025 14:38:53 +0800 Subject: [PATCH 3/5] prmote -> promote --- llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 5f70588ab84e4..2b8818482a333 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -2938,7 +2938,7 @@ SDValue SelectionDAGLegalize::PromoteReduction(SDNode *Node) { Operands[j] = DAG.getNode(ISD::FP_EXTEND, DL, NewVecVT, Node->getOperand(j)); } else if (Node->getOperand(j).getValueType().isFloatingPoint()) { - // prmote the initial value. + // promote the initial value. Operands[j] = DAG.getNode(ISD::FP_EXTEND, DL, NewScalarVT, Node->getOperand(j)); } else { From 2319ddeb247550267f5462747dd1ec749531f4cb Mon Sep 17 00:00:00 2001 From: Jim Lin Date: Thu, 27 Feb 2025 14:40:30 +0800 Subject: [PATCH 4/5] Add tests for bf16/zvfbfmin --- .../RISCV/rvv/vreductions-fp-sdnode-bf16.ll | 136 ++++++++++++++ .../RISCV/rvv/vreductions-fp-vp-bf16.ll | 167 ++++++++++++++++++ 2 files changed, 303 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode-bf16.ll create mode 100644 llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp-bf16.ll diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode-bf16.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode-bf16.ll new file mode 100644 index 0000000000000..618d3d2a30b19 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode-bf16.ll @@ -0,0 +1,136 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s + +define bfloat @vreduce_fmin_nxv4f16( %val) { +; CHECK-LABEL: vreduce_fmin_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfredmin.vs v8, v10, v10 +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call bfloat @llvm.vector.reduce.fmin.nxv4f16( %val) + ret bfloat %s +} + +define bfloat @vreduce_fmax_nxv4f16( %val) { +; CHECK-LABEL: vreduce_fmax_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfredmax.vs v8, v10, v10 +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call bfloat @llvm.vector.reduce.fmax.nxv4f16( %val) + ret bfloat %s +} + +define bfloat @vreduce_fmin_nnan_nxv4f16( %val) { +; CHECK-LABEL: vreduce_fmin_nnan_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfredmin.vs v8, v10, v10 +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call nnan bfloat @llvm.vector.reduce.fmin.nxv4f16( %val) + ret bfloat %s +} + +define bfloat @vreduce_fmax_nnan_nxv4f16( %val) { +; CHECK-LABEL: vreduce_fmax_nnan_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfredmax.vs v8, v10, v10 +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call nnan bfloat @llvm.vector.reduce.fmax.nxv4f16( %val) + ret bfloat %s +} + +define bfloat @vreduce_fminimum_nxv4f16( %val) { +; CHECK-LABEL: vreduce_fminimum_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmfne.vv v8, v10, v10 +; CHECK-NEXT: vcpop.m a0, v8 +; CHECK-NEXT: beqz a0, .LBB4_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: lui a0, 523264 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB4_2: +; CHECK-NEXT: vfredmin.vs v8, v10, v10 +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call bfloat @llvm.vector.reduce.fminimum.nxv4f16( %val) + ret bfloat %s +} + +define bfloat @vreduce_fmaximum_nxv4f16( %val) { +; CHECK-LABEL: vreduce_fmaximum_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmfne.vv v8, v10, v10 +; CHECK-NEXT: vcpop.m a0, v8 +; CHECK-NEXT: beqz a0, .LBB5_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: lui a0, 523264 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB5_2: +; CHECK-NEXT: vfredmax.vs v8, v10, v10 +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call bfloat @llvm.vector.reduce.fmaximum.nxv4f16( %val) + ret bfloat %s +} + +define bfloat @vreduce_fminimum_nnan_nxv4f16( %val) { +; CHECK-LABEL: vreduce_fminimum_nnan_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfredmin.vs v8, v10, v10 +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call nnan bfloat @llvm.vector.reduce.fminimum.nxv4f16( %val) + ret bfloat %s +} + +define bfloat @vreduce_fmaximum_nnan_nxv4f16( %val) { +; CHECK-LABEL: vreduce_fmaximum_nnan_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vfredmax.vs v8, v10, v10 +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call nnan bfloat @llvm.vector.reduce.fmaximum.nxv4f16( %val) + ret bfloat %s +} + diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp-bf16.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp-bf16.ll new file mode 100644 index 0000000000000..ecf0c97e0a6ea --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp-bf16.ll @@ -0,0 +1,167 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -mattr=+d,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+d,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \ +; RUN: -verify-machineinstrs < %s | FileCheck %s + +define bfloat @vpreduce_fmin_nxv4f16(bfloat %start, %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_fmin_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa5 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfredmin.vs v8, v10, v8, v0.t +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call bfloat @llvm.vp.reduce.fmin.nxv4f16(bfloat %start, %val, %m, i32 %evl) + ret bfloat %s +} + +define bfloat @vpreduce_fmax_nxv4f16(bfloat %start, %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_fmax_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa5 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfredmax.vs v8, v10, v8, v0.t +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call bfloat @llvm.vp.reduce.fmax.nxv4f16(bfloat %start, %val, %m, i32 %evl) + ret bfloat %s +} + +define bfloat @vpreduce_fmin_nnan_nxv4f16(bfloat %start, %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_fmin_nnan_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa5 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfredmin.vs v8, v10, v8, v0.t +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call nnan bfloat @llvm.vp.reduce.fmin.nxv4f16(bfloat %start, %val, %m, i32 %evl) + ret bfloat %s +} + +define bfloat @vpreduce_fmax_nnan_nxv4f16(bfloat %start, %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_fmax_nnan_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa5 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfredmax.vs v8, v10, v8, v0.t +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call nnan bfloat @llvm.vp.reduce.fmax.nxv4f16(bfloat %start, %val, %m, i32 %evl) + ret bfloat %s +} + +define bfloat @vpreduce_fminimum_nxv4f16(bfloat %start, %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_fminimum_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmfne.vv v8, v10, v10, v0.t +; CHECK-NEXT: feq.s a1, fa5, fa5 +; CHECK-NEXT: vcpop.m a2, v8, v0.t +; CHECK-NEXT: xori a1, a1, 1 +; CHECK-NEXT: or a1, a2, a1 +; CHECK-NEXT: beqz a1, .LBB4_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: lui a0, 523264 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB4_2: +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa5 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfredmin.vs v8, v10, v8, v0.t +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call bfloat @llvm.vp.reduce.fminimum.nxv4f16(bfloat %start, %val, %m, i32 %evl) + ret bfloat %s +} + +define bfloat @vpreduce_fmaximum_nxv4f16(bfloat %start, %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_fmaximum_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma +; CHECK-NEXT: vmfne.vv v8, v10, v10, v0.t +; CHECK-NEXT: feq.s a1, fa5, fa5 +; CHECK-NEXT: vcpop.m a2, v8, v0.t +; CHECK-NEXT: xori a1, a1, 1 +; CHECK-NEXT: or a1, a2, a1 +; CHECK-NEXT: beqz a1, .LBB5_2 +; CHECK-NEXT: # %bb.1: +; CHECK-NEXT: lui a0, 523264 +; CHECK-NEXT: fmv.w.x fa5, a0 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret +; CHECK-NEXT: .LBB5_2: +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa5 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfredmax.vs v8, v10, v8, v0.t +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call bfloat @llvm.vp.reduce.fmaximum.nxv4f16(bfloat %start, %val, %m, i32 %evl) + ret bfloat %s +} + +define bfloat @vpreduce_fminimum_nnan_nxv4f16(bfloat %start, %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_fminimum_nnan_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa5 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfredmin.vs v8, v10, v8, v0.t +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call nnan bfloat @llvm.vp.reduce.fminimum.nxv4f16(bfloat %start, %val, %m, i32 %evl) + ret bfloat %s +} + +define bfloat @vpreduce_fmaximum_nnan_nxv4f16(bfloat %start, %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_fmaximum_nnan_nxv4f16: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma +; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 +; CHECK-NEXT: fcvt.s.bf16 fa5, fa0 +; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; CHECK-NEXT: vfmv.s.f v8, fa5 +; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma +; CHECK-NEXT: vfredmax.vs v8, v10, v8, v0.t +; CHECK-NEXT: vfmv.f.s fa5, v8 +; CHECK-NEXT: fcvt.bf16.s fa0, fa5 +; CHECK-NEXT: ret + %s = call nnan bfloat @llvm.vp.reduce.fmaximum.nxv4f16(bfloat %start, %val, %m, i32 %evl) + ret bfloat %s +} From d1cff55fa333b8ddf947ed7668e2a99727e7b6a4 Mon Sep 17 00:00:00 2001 From: Jim Lin Date: Fri, 28 Feb 2025 21:28:00 +0800 Subject: [PATCH 5/5] nxv4f16 -> nxv4bf16 for bf16 tests --- .../RISCV/rvv/vreductions-fp-sdnode-bf16.ll | 48 +++++++++---------- .../RISCV/rvv/vreductions-fp-vp-bf16.ll | 48 +++++++++---------- 2 files changed, 48 insertions(+), 48 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode-bf16.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode-bf16.ll index 618d3d2a30b19..1c42cd29deca9 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode-bf16.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-sdnode-bf16.ll @@ -4,8 +4,8 @@ ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -define bfloat @vreduce_fmin_nxv4f16( %val) { -; CHECK-LABEL: vreduce_fmin_nxv4f16: +define bfloat @vreduce_fmin_nxv4bf16( %val) { +; CHECK-LABEL: vreduce_fmin_nxv4bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 @@ -14,12 +14,12 @@ define bfloat @vreduce_fmin_nxv4f16( %val) { ; CHECK-NEXT: vfmv.f.s fa5, v8 ; CHECK-NEXT: fcvt.bf16.s fa0, fa5 ; CHECK-NEXT: ret - %s = call bfloat @llvm.vector.reduce.fmin.nxv4f16( %val) + %s = call bfloat @llvm.vector.reduce.fmin.nxv4bf16( %val) ret bfloat %s } -define bfloat @vreduce_fmax_nxv4f16( %val) { -; CHECK-LABEL: vreduce_fmax_nxv4f16: +define bfloat @vreduce_fmax_nxv4bf16( %val) { +; CHECK-LABEL: vreduce_fmax_nxv4bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 @@ -28,12 +28,12 @@ define bfloat @vreduce_fmax_nxv4f16( %val) { ; CHECK-NEXT: vfmv.f.s fa5, v8 ; CHECK-NEXT: fcvt.bf16.s fa0, fa5 ; CHECK-NEXT: ret - %s = call bfloat @llvm.vector.reduce.fmax.nxv4f16( %val) + %s = call bfloat @llvm.vector.reduce.fmax.nxv4bf16( %val) ret bfloat %s } -define bfloat @vreduce_fmin_nnan_nxv4f16( %val) { -; CHECK-LABEL: vreduce_fmin_nnan_nxv4f16: +define bfloat @vreduce_fmin_nnan_nxv4bf16( %val) { +; CHECK-LABEL: vreduce_fmin_nnan_nxv4bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 @@ -42,12 +42,12 @@ define bfloat @vreduce_fmin_nnan_nxv4f16( %val) { ; CHECK-NEXT: vfmv.f.s fa5, v8 ; CHECK-NEXT: fcvt.bf16.s fa0, fa5 ; CHECK-NEXT: ret - %s = call nnan bfloat @llvm.vector.reduce.fmin.nxv4f16( %val) + %s = call nnan bfloat @llvm.vector.reduce.fmin.nxv4bf16( %val) ret bfloat %s } -define bfloat @vreduce_fmax_nnan_nxv4f16( %val) { -; CHECK-LABEL: vreduce_fmax_nnan_nxv4f16: +define bfloat @vreduce_fmax_nnan_nxv4bf16( %val) { +; CHECK-LABEL: vreduce_fmax_nnan_nxv4bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 @@ -56,12 +56,12 @@ define bfloat @vreduce_fmax_nnan_nxv4f16( %val) { ; CHECK-NEXT: vfmv.f.s fa5, v8 ; CHECK-NEXT: fcvt.bf16.s fa0, fa5 ; CHECK-NEXT: ret - %s = call nnan bfloat @llvm.vector.reduce.fmax.nxv4f16( %val) + %s = call nnan bfloat @llvm.vector.reduce.fmax.nxv4bf16( %val) ret bfloat %s } -define bfloat @vreduce_fminimum_nxv4f16( %val) { -; CHECK-LABEL: vreduce_fminimum_nxv4f16: +define bfloat @vreduce_fminimum_nxv4bf16( %val) { +; CHECK-LABEL: vreduce_fminimum_nxv4bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 @@ -79,12 +79,12 @@ define bfloat @vreduce_fminimum_nxv4f16( %val) { ; CHECK-NEXT: vfmv.f.s fa5, v8 ; CHECK-NEXT: fcvt.bf16.s fa0, fa5 ; CHECK-NEXT: ret - %s = call bfloat @llvm.vector.reduce.fminimum.nxv4f16( %val) + %s = call bfloat @llvm.vector.reduce.fminimum.nxv4bf16( %val) ret bfloat %s } -define bfloat @vreduce_fmaximum_nxv4f16( %val) { -; CHECK-LABEL: vreduce_fmaximum_nxv4f16: +define bfloat @vreduce_fmaximum_nxv4bf16( %val) { +; CHECK-LABEL: vreduce_fmaximum_nxv4bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 @@ -102,12 +102,12 @@ define bfloat @vreduce_fmaximum_nxv4f16( %val) { ; CHECK-NEXT: vfmv.f.s fa5, v8 ; CHECK-NEXT: fcvt.bf16.s fa0, fa5 ; CHECK-NEXT: ret - %s = call bfloat @llvm.vector.reduce.fmaximum.nxv4f16( %val) + %s = call bfloat @llvm.vector.reduce.fmaximum.nxv4bf16( %val) ret bfloat %s } -define bfloat @vreduce_fminimum_nnan_nxv4f16( %val) { -; CHECK-LABEL: vreduce_fminimum_nnan_nxv4f16: +define bfloat @vreduce_fminimum_nnan_nxv4bf16( %val) { +; CHECK-LABEL: vreduce_fminimum_nnan_nxv4bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 @@ -116,12 +116,12 @@ define bfloat @vreduce_fminimum_nnan_nxv4f16( %val) { ; CHECK-NEXT: vfmv.f.s fa5, v8 ; CHECK-NEXT: fcvt.bf16.s fa0, fa5 ; CHECK-NEXT: ret - %s = call nnan bfloat @llvm.vector.reduce.fminimum.nxv4f16( %val) + %s = call nnan bfloat @llvm.vector.reduce.fminimum.nxv4bf16( %val) ret bfloat %s } -define bfloat @vreduce_fmaximum_nnan_nxv4f16( %val) { -; CHECK-LABEL: vreduce_fmaximum_nnan_nxv4f16: +define bfloat @vreduce_fmaximum_nnan_nxv4bf16( %val) { +; CHECK-LABEL: vreduce_fmaximum_nnan_nxv4bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 @@ -130,7 +130,7 @@ define bfloat @vreduce_fmaximum_nnan_nxv4f16( %val) { ; CHECK-NEXT: vfmv.f.s fa5, v8 ; CHECK-NEXT: fcvt.bf16.s fa0, fa5 ; CHECK-NEXT: ret - %s = call nnan bfloat @llvm.vector.reduce.fmaximum.nxv4f16( %val) + %s = call nnan bfloat @llvm.vector.reduce.fmaximum.nxv4bf16( %val) ret bfloat %s } diff --git a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp-bf16.ll b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp-bf16.ll index ecf0c97e0a6ea..37bd0a0496dcf 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp-bf16.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vreductions-fp-vp-bf16.ll @@ -4,8 +4,8 @@ ; RUN: llc -mtriple=riscv64 -mattr=+d,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -define bfloat @vpreduce_fmin_nxv4f16(bfloat %start, %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_fmin_nxv4f16: +define bfloat @vpreduce_fmin_nxv4bf16(bfloat %start, %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_fmin_nxv4bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 @@ -17,12 +17,12 @@ define bfloat @vpreduce_fmin_nxv4f16(bfloat %start, %val, ; CHECK-NEXT: vfmv.f.s fa5, v8 ; CHECK-NEXT: fcvt.bf16.s fa0, fa5 ; CHECK-NEXT: ret - %s = call bfloat @llvm.vp.reduce.fmin.nxv4f16(bfloat %start, %val, %m, i32 %evl) + %s = call bfloat @llvm.vp.reduce.fmin.nxv4bf16(bfloat %start, %val, %m, i32 %evl) ret bfloat %s } -define bfloat @vpreduce_fmax_nxv4f16(bfloat %start, %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_fmax_nxv4f16: +define bfloat @vpreduce_fmax_nxv4bf16(bfloat %start, %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_fmax_nxv4bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 @@ -34,12 +34,12 @@ define bfloat @vpreduce_fmax_nxv4f16(bfloat %start, %val, ; CHECK-NEXT: vfmv.f.s fa5, v8 ; CHECK-NEXT: fcvt.bf16.s fa0, fa5 ; CHECK-NEXT: ret - %s = call bfloat @llvm.vp.reduce.fmax.nxv4f16(bfloat %start, %val, %m, i32 %evl) + %s = call bfloat @llvm.vp.reduce.fmax.nxv4bf16(bfloat %start, %val, %m, i32 %evl) ret bfloat %s } -define bfloat @vpreduce_fmin_nnan_nxv4f16(bfloat %start, %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_fmin_nnan_nxv4f16: +define bfloat @vpreduce_fmin_nnan_nxv4bf16(bfloat %start, %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_fmin_nnan_nxv4bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 @@ -51,12 +51,12 @@ define bfloat @vpreduce_fmin_nnan_nxv4f16(bfloat %start, % ; CHECK-NEXT: vfmv.f.s fa5, v8 ; CHECK-NEXT: fcvt.bf16.s fa0, fa5 ; CHECK-NEXT: ret - %s = call nnan bfloat @llvm.vp.reduce.fmin.nxv4f16(bfloat %start, %val, %m, i32 %evl) + %s = call nnan bfloat @llvm.vp.reduce.fmin.nxv4bf16(bfloat %start, %val, %m, i32 %evl) ret bfloat %s } -define bfloat @vpreduce_fmax_nnan_nxv4f16(bfloat %start, %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_fmax_nnan_nxv4f16: +define bfloat @vpreduce_fmax_nnan_nxv4bf16(bfloat %start, %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_fmax_nnan_nxv4bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 @@ -68,12 +68,12 @@ define bfloat @vpreduce_fmax_nnan_nxv4f16(bfloat %start, % ; CHECK-NEXT: vfmv.f.s fa5, v8 ; CHECK-NEXT: fcvt.bf16.s fa0, fa5 ; CHECK-NEXT: ret - %s = call nnan bfloat @llvm.vp.reduce.fmax.nxv4f16(bfloat %start, %val, %m, i32 %evl) + %s = call nnan bfloat @llvm.vp.reduce.fmax.nxv4bf16(bfloat %start, %val, %m, i32 %evl) ret bfloat %s } -define bfloat @vpreduce_fminimum_nxv4f16(bfloat %start, %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_fminimum_nxv4f16: +define bfloat @vpreduce_fminimum_nxv4bf16(bfloat %start, %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_fminimum_nxv4bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 @@ -98,12 +98,12 @@ define bfloat @vpreduce_fminimum_nxv4f16(bfloat %start, %v ; CHECK-NEXT: vfmv.f.s fa5, v8 ; CHECK-NEXT: fcvt.bf16.s fa0, fa5 ; CHECK-NEXT: ret - %s = call bfloat @llvm.vp.reduce.fminimum.nxv4f16(bfloat %start, %val, %m, i32 %evl) + %s = call bfloat @llvm.vp.reduce.fminimum.nxv4bf16(bfloat %start, %val, %m, i32 %evl) ret bfloat %s } -define bfloat @vpreduce_fmaximum_nxv4f16(bfloat %start, %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_fmaximum_nxv4f16: +define bfloat @vpreduce_fmaximum_nxv4bf16(bfloat %start, %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_fmaximum_nxv4bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 @@ -128,12 +128,12 @@ define bfloat @vpreduce_fmaximum_nxv4f16(bfloat %start, %v ; CHECK-NEXT: vfmv.f.s fa5, v8 ; CHECK-NEXT: fcvt.bf16.s fa0, fa5 ; CHECK-NEXT: ret - %s = call bfloat @llvm.vp.reduce.fmaximum.nxv4f16(bfloat %start, %val, %m, i32 %evl) + %s = call bfloat @llvm.vp.reduce.fmaximum.nxv4bf16(bfloat %start, %val, %m, i32 %evl) ret bfloat %s } -define bfloat @vpreduce_fminimum_nnan_nxv4f16(bfloat %start, %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_fminimum_nnan_nxv4f16: +define bfloat @vpreduce_fminimum_nnan_nxv4bf16(bfloat %start, %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_fminimum_nnan_nxv4bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 @@ -145,12 +145,12 @@ define bfloat @vpreduce_fminimum_nnan_nxv4f16(bfloat %start, %val, %m, i32 %evl) + %s = call nnan bfloat @llvm.vp.reduce.fminimum.nxv4bf16(bfloat %start, %val, %m, i32 %evl) ret bfloat %s } -define bfloat @vpreduce_fmaximum_nnan_nxv4f16(bfloat %start, %val, %m, i32 zeroext %evl) { -; CHECK-LABEL: vpreduce_fmaximum_nnan_nxv4f16: +define bfloat @vpreduce_fmaximum_nnan_nxv4bf16(bfloat %start, %val, %m, i32 zeroext %evl) { +; CHECK-LABEL: vpreduce_fmaximum_nnan_nxv4bf16: ; CHECK: # %bb.0: ; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma ; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8 @@ -162,6 +162,6 @@ define bfloat @vpreduce_fmaximum_nnan_nxv4f16(bfloat %start, %val, %m, i32 %evl) + %s = call nnan bfloat @llvm.vp.reduce.fmaximum.nxv4bf16(bfloat %start, %val, %m, i32 %evl) ret bfloat %s }