diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 808f089914c9b..a0873beeaebd9 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -4937,6 +4937,11 @@ def msave_restore : Flag<["-"], "msave-restore">, Group, def mno_save_restore : Flag<["-"], "mno-save-restore">, Group, HelpText<"Disable using library calls for save and restore">; } // let Flags = [TargetSpecific] +def mload_store_pairs : Flag<["-"], "mload-store-pairs">, Group; +def mno_load_store_pairs : Flag<["-"], "mno-load-store-pairs">, Group; +def mccmov : Flag<["-"], "mccmov">, Group; +def mno_ccmov : Flag<["-"], "mno-ccmov">, Group; +def mremove_back_to_back_branches : Flag<["-"], "mremove_back_to_back_branches">, Group; let Flags = [TargetSpecific] in { def menable_experimental_extensions : Flag<["-"], "menable-experimental-extensions">, Group, HelpText<"Enable use of experimental RISC-V extensions.">; diff --git a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp index 6935904a24edb..7a2408477e2d7 100644 --- a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp +++ b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp @@ -336,12 +336,14 @@ std::string riscv::getRISCVArch(const llvm::opt::ArgList &Args, // - On `riscv{XLEN}-unknown-elf` we default to `rv{XLEN}imac` // - On all other OSs we use `rv{XLEN}imafdc` (equivalent to `rv{XLEN}gc`) if (Triple.isRISCV32()) { - if (Triple.getOS() == llvm::Triple::UnknownOS) + if (Triple.getOS() == llvm::Triple::UnknownOS && + Triple.getVendor() != llvm::Triple::MipsTechnologies) return "rv32imac"; else return "rv32imafdc"; } else { - if (Triple.getOS() == llvm::Triple::UnknownOS) + if (Triple.getOS() == llvm::Triple::UnknownOS && + Triple.getVendor() != llvm::Triple::MipsTechnologies) return "rv64imac"; else if (Triple.isAndroid()) return "rv64imafdcv_zba_zbb_zbs"; @@ -364,5 +366,9 @@ std::string riscv::getRISCVTargetCPU(const llvm::opt::ArgList &Args, if (!CPU.empty()) return CPU; + if (Triple.getVendor() == llvm::Triple::MipsTechnologies && + Triple.isRISCV64()) + return "p8700"; + return Triple.isRISCV64() ? "generic-rv64" : "generic-rv32"; } diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 217c1a845f0a4..dcf60f99e688a 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -2164,6 +2164,24 @@ void Clang::AddRISCVTargetArgs(const ArgList &Args, CmdArgs.push_back(A->getValue()); } + if (Arg *A = Args.getLastArg(options::OPT_mload_store_pairs, + options::OPT_mno_load_store_pairs)) { + if (A->getOption().matches(options::OPT_mload_store_pairs)) { + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back("-riscv-load-store-pairs=1"); + } + } + + if (Arg *A = Args.getLastArg(options::OPT_mccmov, options::OPT_mno_ccmov)) { + if (A->getOption().matches(options::OPT_mno_ccmov)) { + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back("-riscv-ccmov=0"); + } + } + if (Args.getLastArg(options::OPT_mremove_back_to_back_branches)) { + CmdArgs.push_back("-mllvm"); + CmdArgs.push_back("-riscv-remove-back-to-back-branches=1"); + } // Handle -mrvv-vector-bits= if (Arg *A = Args.getLastArg(options::OPT_mrvv_vector_bits_EQ)) { StringRef Val = A->getValue(); diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp index 8397f1121ec2c..5c3e23b1f7ff1 100644 --- a/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/clang/lib/Driver/ToolChains/Gnu.cpp @@ -252,9 +252,17 @@ static const char *getLDMOption(const llvm::Triple &T, const ArgList &Args) { case llvm::Triple::ppc64le: return "elf64lppc"; case llvm::Triple::riscv32: - return "elf32lriscv"; - case llvm::Triple::riscv64: - return "elf64lriscv"; + case llvm::Triple::riscv64: { + bool IsBigEndian = false; + if (Arg *A = Args.getLastArg(options::OPT_mlittle_endian, + options::OPT_mbig_endian)) + IsBigEndian = A->getOption().matches(options::OPT_mbig_endian); + + if (T.getArch() == llvm::Triple::riscv32) + return IsBigEndian ? "elf32briscv" : "elf32lriscv"; + else + return IsBigEndian ? "elf64briscv" : "elf64lriscv"; + } case llvm::Triple::sparc: case llvm::Triple::sparcel: return "elf32_sparc"; @@ -402,6 +410,14 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back(Arch == llvm::Triple::aarch64_be ? "-EB" : "-EL"); } + if (Triple.isRISCV() && + Triple.getVendor() == llvm::Triple::MipsTechnologies) { + bool IsBigEndian = false; + if (Arg *A = Args.getLastArg(options::OPT_mlittle_endian, + options::OPT_mbig_endian)) + IsBigEndian = A->getOption().matches(options::OPT_mbig_endian); + CmdArgs.push_back(IsBigEndian ? "-EB" : "-EL"); + } // Most Android ARM64 targets should enable the linker fix for erratum // 843419. Only non-Cortex-A53 devices are allowed to skip this flag. if (Arch == llvm::Triple::aarch64 && (isAndroid || isOHOSFamily)) { @@ -765,7 +781,8 @@ void tools::gnutools::Assembler::ConstructJob(Compilation &C, } case llvm::Triple::riscv32: case llvm::Triple::riscv64: { - StringRef ABIName = riscv::getRISCVABI(Args, getToolChain().getTriple()); + const llvm::Triple &Triple = getToolChain().getTriple(); + StringRef ABIName = riscv::getRISCVABI(Args, Triple); CmdArgs.push_back("-mabi"); CmdArgs.push_back(ABIName.data()); std::string MArchName = @@ -774,6 +791,14 @@ void tools::gnutools::Assembler::ConstructJob(Compilation &C, CmdArgs.push_back(Args.MakeArgString(MArchName)); if (!Args.hasFlag(options::OPT_mrelax, options::OPT_mno_relax, true)) Args.addOptOutFlag(CmdArgs, options::OPT_mrelax, options::OPT_mno_relax); + + if (Triple.getVendor() == llvm::Triple::MipsTechnologies) { + bool IsBigEndian = false; + if (Arg *A = Args.getLastArg(options::OPT_mlittle_endian, + options::OPT_mbig_endian)) + IsBigEndian = A->getOption().matches(options::OPT_mbig_endian); + CmdArgs.push_back(IsBigEndian ? "-EB" : "-EL"); + } break; } case llvm::Triple::sparc: @@ -1873,9 +1898,18 @@ static void findRISCVBareMetalMultilibs(const Driver &D, .flag(Twine("-march=", Element.march).str()) .flag(Twine("-mabi=", Element.mabi).str())); } + SmallVector Endian; + if (TargetTriple.getVendor() == llvm::Triple::MipsTechnologies) { + Endian.push_back( + MultilibBuilder("/riscv").flag("-EL").flag("-EB", /*Disallow=*/true)); + Endian.push_back( + MultilibBuilder("/riscveb").flag("-EB").flag("-EL", /*Disallow=*/true)); + } MultilibSet RISCVMultilibs = MultilibSetBuilder() .Either(Ms) + .Either(Endian) + .Either(ArrayRef(Ms)) .makeMultilibSet() .FilterOut(NonExistent) .setFilePathsCallback([](const Multilib &M) { @@ -1899,6 +1933,19 @@ static void findRISCVBareMetalMultilibs(const Driver &D, } } + bool IsBigEndian = false; + if (Arg *A = Args.getLastArg(options::OPT_mlittle_endian, + options::OPT_mbig_endian)) + IsBigEndian = A->getOption().matches(options::OPT_mbig_endian); + + if (IsBigEndian) { + D.Diag(diag::err_drv_unsupported_opt_for_target) + << "-EB" << TargetTriple.str(); + } + + addMultilibFlag(IsBigEndian, "-EB", Flags); + addMultilibFlag(!IsBigEndian, "-EL", Flags); + if (selectRISCVMultilib(D, RISCVMultilibs, MArch, Flags, Result.SelectedMultilibs)) Result.Multilibs = RISCVMultilibs; @@ -1923,8 +1970,18 @@ static void findRISCVMultilibs(const Driver &D, MultilibBuilder("lib64/lp64f").flag("-m64").flag("-mabi=lp64f"); MultilibBuilder Lp64d = MultilibBuilder("lib64/lp64d").flag("-m64").flag("-mabi=lp64d"); + + SmallVector Endian; + if (TargetTriple.getVendor() == llvm::Triple::MipsTechnologies) { + Endian.push_back( + MultilibBuilder("/riscv").flag("-EL").flag("-EB", /*Disallow=*/true)); + Endian.push_back( + MultilibBuilder("/riscveb").flag("-EB").flag("-EL", /*Disallow=*/true)); + } + MultilibSet RISCVMultilibs = MultilibSetBuilder() + .Either(Endian) .Either({Ilp32, Ilp32f, Ilp32d, Lp64, Lp64f, Lp64d}) .makeMultilibSet() .FilterOut(NonExistent); @@ -1932,6 +1989,15 @@ static void findRISCVMultilibs(const Driver &D, Multilib::flags_list Flags; bool IsRV64 = TargetTriple.getArch() == llvm::Triple::riscv64; StringRef ABIName = tools::riscv::getRISCVABI(Args, TargetTriple); + bool IsBigEndian = false; + if (Arg *A = Args.getLastArg(options::OPT_mlittle_endian, + options::OPT_mbig_endian)) + IsBigEndian = A->getOption().matches(options::OPT_mbig_endian); + + if (IsBigEndian) { + D.Diag(diag::err_drv_unsupported_opt_for_target) + << "-EB" << TargetTriple.str(); + } addMultilibFlag(!IsRV64, "-m32", Flags); addMultilibFlag(IsRV64, "-m64", Flags); @@ -1941,6 +2007,8 @@ static void findRISCVMultilibs(const Driver &D, addMultilibFlag(ABIName == "lp64", "-mabi=lp64", Flags); addMultilibFlag(ABIName == "lp64f", "-mabi=lp64f", Flags); addMultilibFlag(ABIName == "lp64d", "-mabi=lp64d", Flags); + addMultilibFlag(IsBigEndian, "-EB", Flags); + addMultilibFlag(!IsBigEndian, "-EL", Flags); if (RISCVMultilibs.select(D, Flags, Result.SelectedMultilibs)) Result.Multilibs = RISCVMultilibs; @@ -2565,8 +2633,8 @@ void Generic_GCC::GCCInstallationDetector::AddDefaultGCCPrefixes( static const char *const RISCV32Triples[] = {"riscv32-unknown-linux-gnu", "riscv32-unknown-elf"}; static const char *const RISCV64LibDirs[] = {"/lib64", "/lib"}; - static const char *const RISCV64Triples[] = {"riscv64-unknown-linux-gnu", - "riscv64-unknown-elf"}; + static const char *const RISCV64Triples[] = { + "riscv64-unknown-linux-gnu", "riscv64-unknown-elf", "riscv64-mti-elf"}; static const char *const SPARCv8LibDirs[] = {"/lib32", "/lib"}; static const char *const SPARCv8Triples[] = {"sparc-linux-gnu", @@ -3137,6 +3205,45 @@ bool Generic_GCC::IsIntegratedAssemblerDefault() const { case llvm::Triple::nvptx64: case llvm::Triple::xcore: return false; + case llvm::Triple::aarch64: + case llvm::Triple::aarch64_be: + case llvm::Triple::amdgcn: + case llvm::Triple::arm: + case llvm::Triple::armeb: + case llvm::Triple::avr: + case llvm::Triple::bpfel: + case llvm::Triple::bpfeb: + case llvm::Triple::csky: + case llvm::Triple::hexagon: + case llvm::Triple::lanai: + case llvm::Triple::loongarch32: + case llvm::Triple::loongarch64: + case llvm::Triple::m68k: + case llvm::Triple::mips: + case llvm::Triple::mipsel: + case llvm::Triple::mips64: + case llvm::Triple::mips64el: + case llvm::Triple::msp430: + case llvm::Triple::ppc: + case llvm::Triple::ppcle: + case llvm::Triple::ppc64: + case llvm::Triple::ppc64le: + case llvm::Triple::r600: + case llvm::Triple::sparc: + case llvm::Triple::sparcel: + case llvm::Triple::sparcv9: + case llvm::Triple::systemz: + case llvm::Triple::thumb: + case llvm::Triple::thumbeb: + case llvm::Triple::ve: + case llvm::Triple::x86: + case llvm::Triple::x86_64: + return true; + case llvm::Triple::riscv32: + case llvm::Triple::riscv64: + if (getTriple().getVendor() != llvm::Triple::MipsTechnologies) + return true; + return false; default: return true; } diff --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp index d1cb625613415..cf10f7a1918b3 100644 --- a/clang/lib/Driver/ToolChains/Linux.cpp +++ b/clang/lib/Driver/ToolChains/Linux.cpp @@ -274,11 +274,14 @@ Linux::Linux(const Driver &D, const llvm::Triple &Triple, const ArgList &Args) const bool IsHexagon = Arch == llvm::Triple::hexagon; const bool IsRISCV = Triple.isRISCV(); const bool IsCSKY = Triple.isCSKY(); + const bool IsMipsSysRoot = + IsMips || + (IsRISCV && Triple.getVendor() == llvm::Triple::MipsTechnologies); if (IsCSKY && !SelectedMultilibs.empty()) SysRoot = SysRoot + SelectedMultilibs.back().osSuffix(); - if ((IsMips || IsCSKY) && !SysRoot.empty()) + if ((IsMipsSysRoot || IsCSKY) && !SysRoot.empty()) ExtraOpts.push_back("--sysroot=" + SysRoot); // Do not use 'gnu' hash style for Mips targets because .gnu.hash @@ -412,7 +415,12 @@ std::string Linux::computeSysRoot() const { return std::string(); } - if (!GCCInstallation.isValid() || !getTriple().isMIPS()) + const bool IsMipsSysRoot = + getTriple().isMIPS() || + (getTriple().isRISCV() && + getTriple().getVendor() == llvm::Triple::MipsTechnologies); + + if (!GCCInstallation.isValid() || !IsMipsSysRoot) return std::string(); // Standalone MIPS toolchains use different names for sysroot folder @@ -422,8 +430,19 @@ std::string Linux::computeSysRoot() const { const StringRef InstallDir = GCCInstallation.getInstallPath(); const StringRef TripleStr = GCCInstallation.getTriple().str(); const Multilib &Multilib = GCCInstallation.getMultilib(); + std::string Path; + if (getTriple().isRISCV()) { + Path = + (InstallDir + "/../../../../sysroot" + Multilib.osSuffix() + "/../..") + .str(); + + if (getVFS().exists(Path)) + return Path; + + return std::string(); + } - std::string Path = + Path = (InstallDir + "/../../../../" + TripleStr + "/libc" + Multilib.osSuffix()) .str(); diff --git a/clang/lib/Driver/ToolChains/RISCVToolchain.cpp b/clang/lib/Driver/ToolChains/RISCVToolchain.cpp index 624099d21ae12..72b4fd04220e2 100644 --- a/clang/lib/Driver/ToolChains/RISCVToolchain.cpp +++ b/clang/lib/Driver/ToolChains/RISCVToolchain.cpp @@ -77,8 +77,8 @@ Tool *RISCVToolChain::buildLinker() const { } ToolChain::RuntimeLibType RISCVToolChain::GetDefaultRuntimeLibType() const { - return GCCInstallation.isValid() ? - ToolChain::RLT_Libgcc : ToolChain::RLT_CompilerRT; + return GCCInstallation.isValid() ? ToolChain::RLT_Libgcc + : ToolChain::RLT_CompilerRT; } ToolChain::UnwindLibType @@ -173,6 +173,14 @@ void RISCV::Linker::ConstructJob(Compilation &C, const JobAction &JA, } CmdArgs.push_back("-X"); + if (ToolChain.getTriple().getVendor() == llvm::Triple::MipsTechnologies) { + bool IsBigEndian = false; + if (Arg *A = Args.getLastArg(options::OPT_mlittle_endian, + options::OPT_mbig_endian)) + IsBigEndian = A->getOption().matches(options::OPT_mbig_endian); + CmdArgs.push_back(IsBigEndian ? "-EB" : "-EL"); + } + std::string Linker = getToolChain().GetLinkerPath(); bool WantCRTs = @@ -229,4 +237,10 @@ void RISCV::Linker::ConstructJob(Compilation &C, const JobAction &JA, JA, *this, ResponseFileSupport::AtFileCurCP(), Args.MakeArgString(Linker), CmdArgs, Inputs, Output)); } + +bool RISCVToolChain::IsIntegratedAssemblerDefault() const { + if (getTriple().getVendor() != llvm::Triple::MipsTechnologies) + return true; + return false; +} // RISCV tools end. diff --git a/clang/lib/Driver/ToolChains/RISCVToolchain.h b/clang/lib/Driver/ToolChains/RISCVToolchain.h index fa0aa265d842b..c189772cc0d0c 100644 --- a/clang/lib/Driver/ToolChains/RISCVToolchain.h +++ b/clang/lib/Driver/ToolChains/RISCVToolchain.h @@ -22,6 +22,7 @@ class LLVM_LIBRARY_VISIBILITY RISCVToolChain : public Generic_ELF { const llvm::opt::ArgList &Args); static bool hasGCCToolchain(const Driver &D, const llvm::opt::ArgList &Args); + bool IsIntegratedAssemblerDefault() const override; void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args, Action::OffloadKind) const override; diff --git a/clang/test/Misc/target-invalid-cpu-note/riscv.c b/clang/test/Misc/target-invalid-cpu-note/riscv.c index 8c5df5884cd79..fc8536d99cb80 100644 --- a/clang/test/Misc/target-invalid-cpu-note/riscv.c +++ b/clang/test/Misc/target-invalid-cpu-note/riscv.c @@ -25,6 +25,7 @@ // RISCV64: error: unknown target CPU 'not-a-cpu' // RISCV64-NEXT: note: valid target CPU values are: // RISCV64-SAME: {{^}} generic-rv64 +// RISCV64-SAME: {{^}}, mips-p8700 // RISCV64-SAME: {{^}}, rocket-rv64 // RISCV64-SAME: {{^}}, sifive-p450 // RISCV64-SAME: {{^}}, sifive-p470 @@ -72,6 +73,7 @@ // TUNE-RISCV64: error: unknown target CPU 'not-a-cpu' // TUNE-RISCV64-NEXT: note: valid target CPU values are: // TUNE-RISCV64-SAME: {{^}} generic-rv64 +// TUNE-RISCV64-SAME: {{^}}, mips-p8700 // TUNE-RISCV64-SAME: {{^}}, rocket-rv64 // TUNE-RISCV64-SAME: {{^}}, sifive-p450 // TUNE-RISCV64-SAME: {{^}}, sifive-p470 diff --git a/llvm/docs/RISCVUsage.rst b/llvm/docs/RISCVUsage.rst index bac267591e015..ad4e7bca11b65 100644 --- a/llvm/docs/RISCVUsage.rst +++ b/llvm/docs/RISCVUsage.rst @@ -426,6 +426,12 @@ The current vendor extensions supported are: ``Xwchc`` LLVM implements `the custom compressed opcodes present in some QingKe cores` by WCH / Nanjing Qinheng Microelectronics. The vendor refers to these opcodes by the name "XW". +``xmipscmove`` + LLVM implements conditional move for the `p8700 processor ` by MIPS. + +``xmipslsp`` + LLVM implements load/store pair instructions for the `p8700 processor ` by MIPS. + Experimental C Intrinsics ========================= diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp index b843bb5ae4310..4edeb07a0fc98 100644 --- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp +++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp @@ -855,6 +855,16 @@ struct RISCVOperand final : public MCParsedAsmOperand { VK == RISCVMCExpr::VK_RISCV_None; } + bool isUImm7Lsb000() const { + if (!isImm()) + return false; + int64_t Imm; + RISCVMCExpr::VariantKind VK = RISCVMCExpr::VK_RISCV_None; + bool IsConstantImm = evaluateConstantImm(getImm(), Imm, VK); + return IsConstantImm && isShiftedUInt<4, 3>(Imm) && + VK == RISCVMCExpr::VK_RISCV_None; + } + bool isUImm8Lsb00() const { if (!isImm()) return false; diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt index fd049d1a57860..2f63d7f1e4c39 100644 --- a/llvm/lib/Target/RISCV/CMakeLists.txt +++ b/llvm/lib/Target/RISCV/CMakeLists.txt @@ -47,6 +47,7 @@ add_llvm_target(RISCVCodeGen RISCVISelLowering.cpp RISCVLandingPadSetup.cpp RISCVMachineFunctionInfo.cpp + RISCVLoadStoreOptimizer.cpp RISCVMergeBaseOffset.cpp RISCVOptWInstrs.cpp RISCVPostRAExpandPseudoInsts.cpp @@ -54,6 +55,7 @@ add_llvm_target(RISCVCodeGen RISCVMoveMerger.cpp RISCVPushPopOptimizer.cpp RISCVRegisterInfo.cpp + RISCVRemoveBackToBackBranches.cpp RISCVSubtarget.cpp RISCVTargetMachine.cpp RISCVTargetObjectFile.cpp diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h index ca2f868cd4e76..79889fba75e80 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVBaseInfo.h @@ -306,6 +306,7 @@ enum OperandType : unsigned { OPERAND_UIMM6_LSB0, OPERAND_UIMM7, OPERAND_UIMM7_LSB00, + OPERAND_UIMM7_LSB000, OPERAND_UIMM8_LSB00, OPERAND_UIMM8, OPERAND_UIMM8_LSB000, diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp index 7b927522d3954..30ad5c84e996b 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp @@ -45,3 +45,11 @@ const MCExpr *RISCVMCAsmInfo::getExprForFDESymbol(const MCSymbol *Sym, assert(Encoding & dwarf::DW_EH_PE_sdata4 && "Unexpected encoding"); return RISCVMCExpr::create(ME, RISCVMCExpr::VK_RISCV_32_PCREL, Ctx); } + +void RISCVMCAsmInfo::setUseIntegratedAssembler(bool Value) { + UseIntegratedAssembler = Value; + if (!UseIntegratedAssembler) { + // gas doesn't handle non-constant leb128 + HasLEB128Directives = false; + } +} diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.h index bceeb1256471d..ea444add6e415 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.h +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.h @@ -26,6 +26,8 @@ class RISCVMCAsmInfo : public MCAsmInfoELF { const MCExpr *getExprForFDESymbol(const MCSymbol *Sym, unsigned Encoding, MCStreamer &Streamer) const override; + /// Set whether assembly (inline or otherwise) should be parsed. + void setUseIntegratedAssembler(bool Value) override; }; } // namespace llvm diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h index d7bab601d545c..1f12f77e7dc1a 100644 --- a/llvm/lib/Target/RISCV/RISCV.h +++ b/llvm/lib/Target/RISCV/RISCV.h @@ -84,6 +84,8 @@ void initializeRISCVMoveMergePass(PassRegistry &); FunctionPass *createRISCVPushPopOptimizationPass(); void initializeRISCVPushPopOptPass(PassRegistry &); +FunctionPass *createRISCVLoadStoreOptPass(); +void initializeRISCVLoadStoreOptPass(PassRegistry &); FunctionPass *createRISCVZacasABIFixPass(); void initializeRISCVZacasABIFixPass(PassRegistry &); @@ -94,6 +96,9 @@ createRISCVInstructionSelector(const RISCVTargetMachine &, const RISCVRegisterBankInfo &); void initializeRISCVDAGToDAGISelLegacyPass(PassRegistry &); +FunctionPass *createRISCVRemoveBackToBackBranches(); +void initializeRISCVRemoveBackToBackBranchesPass(PassRegistry &); + FunctionPass *createRISCVPostLegalizerCombiner(); void initializeRISCVPostLegalizerCombinerPass(PassRegistry &); diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td index 00c3d702e12a2..1df6f9ae1944c 100644 --- a/llvm/lib/Target/RISCV/RISCV.td +++ b/llvm/lib/Target/RISCV/RISCV.td @@ -46,6 +46,7 @@ include "RISCVMacroFusion.td" // RISC-V Scheduling Models //===----------------------------------------------------------------------===// +include "RISCVSchedMIPSP8700.td" include "RISCVSchedRocket.td" include "RISCVSchedSiFive7.td" include "RISCVSchedSiFiveP400.td" diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td index 26e96cf831af7..6ede46d6fa31a 100644 --- a/llvm/lib/Target/RISCV/RISCVFeatures.td +++ b/llvm/lib/Target/RISCV/RISCVFeatures.td @@ -1447,6 +1447,23 @@ def TuneConditionalCompressedMoveFusion def HasConditionalMoveFusion : Predicate<"Subtarget->hasConditionalMoveFusion()">; def NoConditionalMoveFusion : Predicate<"!Subtarget->hasConditionalMoveFusion()">; +def TuneMIPSP8700 + : SubtargetFeature<"mips-p8700", "RISCVProcFamily", "Others", + "MIPS p8700 processor">; +def FeatureMIPSCMov : SubtargetFeature<"xmipscmov", "HasMIPSCMov", + "true", "Using CCMov", + [Feature64Bit]>; +def UsesMIPSCMov + : Predicate<"Subtarget->useCCMovInsn()">, + AssemblerPredicate<(all_of FeatureMIPSCMov), "'ccmov' instruction">; +def FeatureMIPSLoadStorePairs + : SubtargetFeature<"xmipslsp", "HasMIPSLSP", "true", + "Optimize for hardware load-store bonding">; +def UsesMIPSLoadStorePairs + : Predicate<"Subtarget->useLoadStorePairs()">, + AssemblerPredicate<(all_of FeatureMIPSLoadStorePairs), + "load and store pair instructions">; + def TuneSiFive7 : SubtargetFeature<"sifive7", "RISCVProcFamily", "SiFive7", "SiFive 7-Series processors">; diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 329b42d621cee..9d013c47b1deb 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -408,7 +408,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM, setOperationAction(ISD::ABS, MVT::i32, Custom); } - if (!Subtarget.hasVendorXTHeadCondMov()) + if (Subtarget.hasMIPSCMov()) + setOperationAction(ISD::SELECT, XLenVT, Legal); + else if (!Subtarget.hasVendorXTHeadCondMov()) setOperationAction(ISD::SELECT, XLenVT, Custom); static const unsigned FPLegalNodeTypes[] = { diff --git a/llvm/lib/Target/RISCV/RISCVInstrFormats.td b/llvm/lib/Target/RISCV/RISCVInstrFormats.td index 013c26c72bfd5..9ffed2c80ad6d 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrFormats.td +++ b/llvm/lib/Target/RISCV/RISCVInstrFormats.td @@ -514,6 +514,78 @@ class RVInstJ + : RVInst { + bits<7> imm7; + bits<5> rs1; + bits<5> rd1; + bits<5> rd2; + + let Inst{31-27} = rd2; + let Inst{26-23} = imm7{6-3}; + let Inst{22-20} = 0b000; + let Inst{19-15} = rs1; + let Inst{14-12} = 0b100; + let Inst{11-7} = rd1; + let Inst{6-0} = 0b0001011; +} + +// Load word pair format. +class LWPFormat + : RVInst { + bits<7> imm7; + bits<5> rs1; + bits<5> rd1; + bits<5> rd2; + + let Inst{31-27} = rd2; + let Inst{26-22} = imm7{6-2}; + let Inst{21-20} = 0b01; + let Inst{19-15} = rs1; + let Inst{14-12} = 0b100; + let Inst{11-7} = rd1; + let Inst{6-0} = 0b0001011; +} + +// Store double pair format. +class SDPFormat + : RVInst { + bits<7> imm7; + bits<5> rs3; + bits<5> rs2; + bits<5> rs1; + + let Inst{31-27} = rs3; + let Inst{26-25} = imm7{6-5}; + let Inst{24-20} = rs2; + let Inst{19-15} = rs1; + let Inst{14-12} = 0b101; + let Inst{11-10} = imm7{4-3}; + let Inst{9-0} = 0b0000001011; +} + +// Store word pair format. +class SWPFormat + : RVInst { + bits<7> imm7; + bits<5> rs3; + bits<5> rs2; + bits<5> rs1; + + let Inst{31-27} = rs3; + let Inst{26-25} = imm7{6-5}; + let Inst{24-20} = rs2; + let Inst{19-15} = rs1; + let Inst{14-12} = 0b101; + let Inst{11-9} = imm7{4-2}; + let Inst{8-0} = 0b010001011; +} + //===----------------------------------------------------------------------===// // Instruction classes for .insn directives //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp index 47273d6bc06d6..ad7abf4f2770a 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp @@ -2464,6 +2464,9 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI, case RISCVOp::OPERAND_UIMM7_LSB00: Ok = isShiftedUInt<5, 2>(Imm); break; + case RISCVOp::OPERAND_UIMM7_LSB000: + Ok = isShiftedUInt<4, 3>(Imm); + break; case RISCVOp::OPERAND_UIMM8_LSB00: Ok = isShiftedUInt<6, 2>(Imm); break; @@ -2710,6 +2713,45 @@ MachineInstr *RISCVInstrInfo::emitLdStWithAddr(MachineInstr &MemI, .setMemRefs(MemI.memoperands()) .setMIFlags(MemI.getFlags()); } +bool RISCVInstrInfo::isPairableLdStInstOpc(unsigned Opc) { + switch (Opc) { + default: + return false; + case RISCV::SH: + case RISCV::LH: + case RISCV::LHU: + case RISCV::SW: + case RISCV::FSW: + case RISCV::LW: + case RISCV::FLW: + case RISCV::SD: + case RISCV::FSD: + case RISCV::LD: + case RISCV::FLD: + return true; + } +} + +bool RISCVInstrInfo::isLdStSafeToPair(const MachineInstr &LdSt, + const TargetRegisterInfo *TRI) { + // If this is a volatile load/store, don't mess with it. + if (LdSt.hasOrderedMemoryRef() || LdSt.getNumExplicitOperands() != 3) + return false; + + if (LdSt.getOperand(1).isFI()) + return true; + + assert(LdSt.getOperand(1).isReg() && "Expected a reg operand."); + // Can't cluster if the instruction modifies the base register + // or it is update form. e.g. ld x5,8(x5) + if (LdSt.modifiesRegister(LdSt.getOperand(1).getReg(), TRI)) + return false; + + if (!LdSt.getOperand(2).isImm()) + return false; + + return true; +} bool RISCVInstrInfo::getMemOperandsWithOffsetWidth( const MachineInstr &LdSt, SmallVectorImpl &BaseOps, diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.h b/llvm/lib/Target/RISCV/RISCVInstrInfo.h index 005cba5d35610..3daa6fdceffaf 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.h +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.h @@ -298,6 +298,15 @@ class RISCVInstrInfo : public RISCVGenInstrInfo { unsigned getTailDuplicateSize(CodeGenOptLevel OptLevel) const override; + /// Return true if pairing the given load or store may be paired with another. + static bool isPairableLdStInstOpc(unsigned Opc); + + static bool isLdStSafeToPair(const MachineInstr &LdSt, + const TargetRegisterInfo *TRI); + + std::optional> + isRVVSpillForZvlsseg(unsigned Opcode) const; + protected: const RISCVSubtarget &STI; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td index 5747f05ffafd4..cc4bfa95981ed 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td @@ -251,6 +251,147 @@ def simm12 : RISCVSImmLeafOp<12> { }]; } +// A 7-bit unsigned immediate where the least significant two bits are zero. +def uimm7_lsb00 : RISCVOp, + ImmLeaf(Imm);}]> { + let ParserMatchClass = UImmAsmOperand<7, "Lsb00">; + let EncoderMethod = "getImmOpValue"; + let DecoderMethod = "decodeUImmOperand<7>"; + let OperandType = "OPERAND_UIMM7_LSB00"; + let MCOperandPredicate = [{ + int64_t Imm; + if (!MCOp.evaluateAsConstantImm(Imm)) + return false; + return isShiftedUInt<5, 2>(Imm); + }]; +} + +// A 7-bit unsigned immediate where the least significant three bits are zero. +def uimm7_lsb000 : RISCVOp, + ImmLeaf(Imm);}]> { + let ParserMatchClass = UImmAsmOperand<7, "Lsb000">; + let EncoderMethod = "getImmOpValue"; + let DecoderMethod = "decodeUImmOperand<7>"; + let OperandType = "OPERAND_UIMM7_LSB000"; + let MCOperandPredicate = [{ + int64_t Imm; + if (!MCOp.evaluateAsConstantImm(Imm)) + return false; + return isShiftedUInt<4, 3>(Imm); + }]; +} + +// A 8-bit unsigned immediate where the least significant two bits are zero. +def uimm8_lsb00 : RISCVOp, + ImmLeaf(Imm);}]> { + let ParserMatchClass = UImmAsmOperand<8, "Lsb00">; + let EncoderMethod = "getImmOpValue"; + let DecoderMethod = "decodeUImmOperand<8>"; + let OperandType = "OPERAND_UIMM8_LSB00"; + let MCOperandPredicate = [{ + int64_t Imm; + if (!MCOp.evaluateAsConstantImm(Imm)) + return false; + return isShiftedUInt<6, 2>(Imm); + }]; +} + +// A 8-bit unsigned immediate where the least significant three bits are zero. +def uimm8_lsb000 : RISCVOp, + ImmLeaf(Imm);}]> { + let ParserMatchClass = UImmAsmOperand<8, "Lsb000">; + let EncoderMethod = "getImmOpValue"; + let DecoderMethod = "decodeUImmOperand<8>"; + let OperandType = "OPERAND_UIMM8_LSB000"; + let MCOperandPredicate = [{ + int64_t Imm; + if (!MCOp.evaluateAsConstantImm(Imm)) + return false; + return isShiftedUInt<5, 3>(Imm); + }]; +} + +// A 9-bit signed immediate where the least significant bit is zero. +def simm9_lsb0 : Operand, + ImmLeaf(Imm);}]> { + let ParserMatchClass = SImmAsmOperand<9, "Lsb0">; + let PrintMethod = "printBranchOperand"; + let EncoderMethod = "getImmOpValueAsr1"; + let DecoderMethod = "decodeSImmOperandAndLsl1<9>"; + let MCOperandPredicate = [{ + int64_t Imm; + if (MCOp.evaluateAsConstantImm(Imm)) + return isShiftedInt<8, 1>(Imm); + return MCOp.isBareSymbolRef(); + + }]; + let OperandType = "OPERAND_PCREL"; +} + +// A 9-bit unsigned immediate where the least significant three bits are zero. +def uimm9_lsb000 : RISCVOp, + ImmLeaf(Imm);}]> { + let ParserMatchClass = UImmAsmOperand<9, "Lsb000">; + let EncoderMethod = "getImmOpValue"; + let DecoderMethod = "decodeUImmOperand<9>"; + let OperandType = "OPERAND_UIMM9_LSB000"; + let MCOperandPredicate = [{ + int64_t Imm; + if (!MCOp.evaluateAsConstantImm(Imm)) + return false; + return isShiftedUInt<6, 3>(Imm); + }]; +} + +// A 10-bit unsigned immediate where the least significant two bits are zero +// and the immediate can't be zero. +def uimm10_lsb00nonzero : RISCVOp, + ImmLeaf(Imm) && (Imm != 0);}]> { + let ParserMatchClass = UImmAsmOperand<10, "Lsb00NonZero">; + let EncoderMethod = "getImmOpValue"; + let DecoderMethod = "decodeUImmNonZeroOperand<10>"; + let OperandType = "OPERAND_UIMM10_LSB00_NONZERO"; + let MCOperandPredicate = [{ + int64_t Imm; + if (!MCOp.evaluateAsConstantImm(Imm)) + return false; + return isShiftedUInt<8, 2>(Imm) && (Imm != 0); + }]; +} + +// A 10-bit signed immediate where the least significant four bits are zero. +def simm10_lsb0000nonzero : RISCVOp, + ImmLeaf(Imm);}]> { + let ParserMatchClass = SImmAsmOperand<10, "Lsb0000NonZero">; + let EncoderMethod = "getImmOpValue"; + let DecoderMethod = "decodeSImmNonZeroOperand<10>"; + let OperandType = "OPERAND_SIMM10_LSB0000_NONZERO"; + let MCOperandPredicate = [{ + int64_t Imm; + if (!MCOp.evaluateAsConstantImm(Imm)) + return false; + return isShiftedInt<6, 4>(Imm) && (Imm != 0); + }]; +} + +// A 12-bit signed immediate where the least significant bit is zero. +def simm12_lsb0 : Operand, + ImmLeaf(Imm);}]> { + let ParserMatchClass = SImmAsmOperand<12, "Lsb0">; + let PrintMethod = "printBranchOperand"; + let EncoderMethod = "getImmOpValueAsr1"; + let DecoderMethod = "decodeSImmOperandAndLsl1<12>"; + let MCOperandPredicate = [{ + int64_t Imm; + if (MCOp.evaluateAsConstantImm(Imm)) + return isShiftedInt<11, 1>(Imm); + return MCOp.isBareSymbolRef(); + }]; + let OperandType = "OPERAND_PCREL"; +} + // A 12-bit signed immediate which cannot fit in 6-bit signed immediate, // but even negative value fit in 12-bit. def simm12_no6 : ImmLeaf { // Standalone (codegen-only) immleaf patterns. +// A 12-bit signed immediate plus one where the imm range will be -2047~2048. +def simm12_plus1 : ImmLeaf(Imm) && Imm != -2048) || Imm == 2048;}]>; + // A 6-bit constant greater than 32. def uimm6gt32 : ImmLeaf(Imm) && Imm > 32; @@ -800,6 +945,13 @@ def SRAW : ALUW_rr<0b0100000, 0b101, "sraw">, } // IsSignExtendingOpW = 1 } // Predicates = [IsRV64] +let Predicates = [UsesMIPSCMov], hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { +def CCMOV : RVInstR4<0b11, 0b011, OPC_CUSTOM_0, (outs GPR:$rd), + (ins GPR:$rs1, GPR:$rs2, GPR:$rs3), + "ccmov", "$rd, $rs2, $rs1, $rs3">, + Sched<[]>; +} // Predicates = [UsesMIPSCMov] + //===----------------------------------------------------------------------===// // Privileged instructions //===----------------------------------------------------------------------===// @@ -2017,6 +2169,74 @@ def : Pat<(binop_allwusers GPR:$rs1, (AddiPair:$rs2)), } //===----------------------------------------------------------------------===// + +// MIPS extensions +//===----------------------------------------------------------------------===// + +let Predicates = [UsesMIPSCMov] in { +def : Pat<(select (XLenVT (setne (XLenVT GPR:$rs2), (XLenVT 0))), + (XLenVT GPR:$rs1), (XLenVT GPR:$rs3)), + (CCMOV GPR:$rs1, GPR:$rs2, GPR:$rs3)>; +def : Pat<(select (XLenVT (seteq (XLenVT GPR:$rs2), (XLenVT 0))), + (XLenVT GPR:$rs3), (XLenVT GPR:$rs1)), + (CCMOV GPR:$rs1, GPR:$rs2, GPR:$rs3)>; +def : Pat<(select (XLenVT (setne (XLenVT GPR:$x), (XLenVT simm12_plus1:$y))), + (XLenVT GPR:$rs1), (XLenVT GPR:$rs3)), + (CCMOV GPR:$rs1, (ADDI GPR:$x, (NegImm simm12_plus1:$y)), GPR:$rs3)>; +def : Pat<(select (XLenVT (seteq (XLenVT GPR:$x), (XLenVT simm12_plus1:$y))), + (XLenVT GPR:$rs3), (XLenVT GPR:$rs1)), + (CCMOV GPR:$rs1, (ADDI GPR:$x, (NegImm simm12_plus1:$y)), GPR:$rs3)>; +def : Pat<(select (XLenVT (setne (XLenVT GPR:$x), (XLenVT GPR:$y))), + (XLenVT GPR:$rs1), (XLenVT GPR:$rs3)), + (CCMOV GPR:$rs1, (XOR GPR:$x, GPR:$y), GPR:$rs3)>; +def : Pat<(select (XLenVT (seteq (XLenVT GPR:$x), (XLenVT GPR:$y))), + (XLenVT GPR:$rs3), (XLenVT GPR:$rs1)), + (CCMOV GPR:$rs1, (XOR GPR:$x, GPR:$y), GPR:$rs3)>; +def : Pat<(select (XLenVT (setuge (XLenVT GPR:$x), (XLenVT GPR:$y))), + (XLenVT GPR:$rs3), (XLenVT GPR:$rs1)), + (CCMOV GPR:$rs1, (SLTU GPR:$x, GPR:$y), GPR:$rs3)>; +def : Pat<(select (XLenVT (setule (XLenVT GPR:$y), (XLenVT GPR:$x))), + (XLenVT GPR:$rs3), (XLenVT GPR:$rs1)), + (CCMOV GPR:$rs1, (SLTU GPR:$x, GPR:$y), GPR:$rs3)>; +def : Pat<(select (XLenVT (setge (XLenVT GPR:$x), (XLenVT GPR:$y))), + (XLenVT GPR:$rs3), (XLenVT GPR:$rs1)), + (CCMOV GPR:$rs1, (SLT GPR:$x, GPR:$y), GPR:$rs3)>; +def : Pat<(select (XLenVT (setle (XLenVT GPR:$y), (XLenVT GPR:$x))), + (XLenVT GPR:$rs3), (XLenVT GPR:$rs1)), + (CCMOV GPR:$rs1, (SLT GPR:$x, GPR:$y), GPR:$rs3)>; +def : Pat<(select (XLenVT GPR:$rs2), (XLenVT GPR:$rs1), (XLenVT GPR:$rs3)), + (CCMOV GPR:$rs1, GPR:$rs2, GPR:$rs3)>; +} // Predicates = [UsesMIPSCMov] + +let Predicates = [UsesMIPSLoadStorePairs], hasSideEffects = 0 in { +def LWP : LWPFormat<(outs GPR:$rd1, GPR:$rd2), (ins GPR:$rs1, uimm7_lsb00:$imm7), + "lwp", "$rd1, $rd2, ${imm7}(${rs1})">, + Sched<[WriteLDW, WriteLDW, ReadMemBase]> { +let mayLoad = 1; +let mayStore = 0; +} +def LDP : LDPFormat<(outs GPR:$rd1, GPR:$rd2), (ins GPR:$rs1, uimm7_lsb000:$imm7), + "ldp", "$rd1, $rd2, ${imm7}(${rs1})">, + Sched<[WriteLDD, WriteLDD, ReadMemBase]> { +let mayLoad = 1; +let mayStore = 0; +} +def SWP : SWPFormat<(outs), (ins GPR:$rs2, GPR:$rs3, GPR:$rs1, uimm7_lsb00:$imm7), + "swp", "$rs2, $rs3, ${imm7}(${rs1})">, + Sched<[WriteSTW, ReadStoreData, ReadStoreData, ReadMemBase]> { +let mayLoad = 0; +let mayStore = 1; +} +def SDP : SDPFormat<(outs), (ins GPR:$rs2, GPR:$rs3, GPR:$rs1, uimm7_lsb000:$imm7), + "sdp", "$rs2, $rs3, ${imm7}(${rs1})">, + Sched<[WriteSTD, ReadStoreData, ReadStoreData, ReadMemBase]> { +let mayLoad = 0; +let mayStore = 1; +} +} + +//===----------------------------------------------------------------------===// + // Standard extensions //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td index e5a5f60f9fec1..c297e83f4be2c 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoC.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoC.td @@ -94,131 +94,6 @@ def c_lui_imm : RISCVOp, }]; } -// A 7-bit unsigned immediate where the least significant two bits are zero. -def uimm7_lsb00 : RISCVOp, - ImmLeaf(Imm);}]> { - let ParserMatchClass = UImmAsmOperand<7, "Lsb00">; - let EncoderMethod = "getImmOpValue"; - let DecoderMethod = "decodeUImmOperand<7>"; - let OperandType = "OPERAND_UIMM7_LSB00"; - let MCOperandPredicate = [{ - int64_t Imm; - if (!MCOp.evaluateAsConstantImm(Imm)) - return false; - return isShiftedUInt<5, 2>(Imm); - }]; -} - -// A 8-bit unsigned immediate where the least significant two bits are zero. -def uimm8_lsb00 : RISCVOp, - ImmLeaf(Imm);}]> { - let ParserMatchClass = UImmAsmOperand<8, "Lsb00">; - let EncoderMethod = "getImmOpValue"; - let DecoderMethod = "decodeUImmOperand<8>"; - let OperandType = "OPERAND_UIMM8_LSB00"; - let MCOperandPredicate = [{ - int64_t Imm; - if (!MCOp.evaluateAsConstantImm(Imm)) - return false; - return isShiftedUInt<6, 2>(Imm); - }]; -} - -// A 8-bit unsigned immediate where the least significant three bits are zero. -def uimm8_lsb000 : RISCVOp, - ImmLeaf(Imm);}]> { - let ParserMatchClass = UImmAsmOperand<8, "Lsb000">; - let EncoderMethod = "getImmOpValue"; - let DecoderMethod = "decodeUImmOperand<8>"; - let OperandType = "OPERAND_UIMM8_LSB000"; - let MCOperandPredicate = [{ - int64_t Imm; - if (!MCOp.evaluateAsConstantImm(Imm)) - return false; - return isShiftedUInt<5, 3>(Imm); - }]; -} - -// A 9-bit signed immediate where the least significant bit is zero. -def simm9_lsb0 : Operand, - ImmLeaf(Imm);}]> { - let ParserMatchClass = SImmAsmOperand<9, "Lsb0">; - let PrintMethod = "printBranchOperand"; - let EncoderMethod = "getImmOpValueAsr1"; - let DecoderMethod = "decodeSImmOperandAndLsl1<9>"; - let MCOperandPredicate = [{ - int64_t Imm; - if (MCOp.evaluateAsConstantImm(Imm)) - return isShiftedInt<8, 1>(Imm); - return MCOp.isBareSymbolRef(); - - }]; - let OperandType = "OPERAND_PCREL"; -} - -// A 9-bit unsigned immediate where the least significant three bits are zero. -def uimm9_lsb000 : RISCVOp, - ImmLeaf(Imm);}]> { - let ParserMatchClass = UImmAsmOperand<9, "Lsb000">; - let EncoderMethod = "getImmOpValue"; - let DecoderMethod = "decodeUImmOperand<9>"; - let OperandType = "OPERAND_UIMM9_LSB000"; - let MCOperandPredicate = [{ - int64_t Imm; - if (!MCOp.evaluateAsConstantImm(Imm)) - return false; - return isShiftedUInt<6, 3>(Imm); - }]; -} - -// A 10-bit unsigned immediate where the least significant two bits are zero -// and the immediate can't be zero. -def uimm10_lsb00nonzero : RISCVOp, - ImmLeaf(Imm) && (Imm != 0);}]> { - let ParserMatchClass = UImmAsmOperand<10, "Lsb00NonZero">; - let EncoderMethod = "getImmOpValue"; - let DecoderMethod = "decodeUImmNonZeroOperand<10>"; - let OperandType = "OPERAND_UIMM10_LSB00_NONZERO"; - let MCOperandPredicate = [{ - int64_t Imm; - if (!MCOp.evaluateAsConstantImm(Imm)) - return false; - return isShiftedUInt<8, 2>(Imm) && (Imm != 0); - }]; -} - -// A 10-bit signed immediate where the least significant four bits are zero. -def simm10_lsb0000nonzero : RISCVOp, - ImmLeaf(Imm);}]> { - let ParserMatchClass = SImmAsmOperand<10, "Lsb0000NonZero">; - let EncoderMethod = "getImmOpValue"; - let DecoderMethod = "decodeSImmNonZeroOperand<10>"; - let OperandType = "OPERAND_SIMM10_LSB0000_NONZERO"; - let MCOperandPredicate = [{ - int64_t Imm; - if (!MCOp.evaluateAsConstantImm(Imm)) - return false; - return isShiftedInt<6, 4>(Imm) && (Imm != 0); - }]; -} - -// A 12-bit signed immediate where the least significant bit is zero. -def simm12_lsb0 : Operand, - ImmLeaf(Imm);}]> { - let ParserMatchClass = SImmAsmOperand<12, "Lsb0">; - let PrintMethod = "printBranchOperand"; - let EncoderMethod = "getImmOpValueAsr1"; - let DecoderMethod = "decodeSImmOperandAndLsl1<12>"; - let MCOperandPredicate = [{ - int64_t Imm; - if (MCOp.evaluateAsConstantImm(Imm)) - return isShiftedInt<11, 1>(Imm); - return MCOp.isBareSymbolRef(); - }]; - let OperandType = "OPERAND_PCREL"; -} def InsnCDirectiveOpcode : AsmOperandClass { let Name = "InsnCDirectiveOpcode"; diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td index c57e7af3c5614..23713b6d49758 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZb.td @@ -427,7 +427,7 @@ def UNZIP_RV32 : Unary_r<0b000010001111, 0b101, "unzip">, // Pseudo Instructions //===----------------------------------------------------------------------===// -let Predicates = [HasStdExtZba, IsRV64] in { +let Predicates = [HasStdExtZba, IsRV64], EmitPriority = 0 in { def : InstAlias<"zext.w $rd, $rs", (ADD_UW GPR:$rd, GPR:$rs, X0)>; } // Predicates = [HasStdExtZba, IsRV64] diff --git a/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp b/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp new file mode 100644 index 0000000000000..52dc97664d983 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVLoadStoreOptimizer.cpp @@ -0,0 +1,371 @@ +//===----- RISCVLoadStoreOptimizer.cpp ------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Bundle loads and stores that operate on consecutive memory locations to take +// the advantage of hardware load/store bonding. +// +//===----------------------------------------------------------------------===// + +#include "RISCV.h" +#include "RISCVTargetMachine.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/Debug.h" +#include "llvm/Target/TargetOptions.h" + +using namespace llvm; + +#define DEBUG_TYPE "riscv-load-store-opt" +#define RISCV_LOAD_STORE_OPT_NAME "RISCV Load / Store Optimizer" +namespace { + +struct RISCVLoadStoreOpt : public MachineFunctionPass { + static char ID; + bool runOnMachineFunction(MachineFunction &Fn) override; + + RISCVLoadStoreOpt() : MachineFunctionPass(ID) {} + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + StringRef getPassName() const override { return RISCV_LOAD_STORE_OPT_NAME; } + + // Find and pair load/store instructions. + bool tryToPairLdStInst(MachineBasicBlock::iterator &MBBI); + + // Convert load/store pairs to single instructions. + bool tryConvertToLdStPair(MachineBasicBlock::iterator First, + MachineBasicBlock::iterator Second); + + // Scan the instructions looking for a load/store that can be combined + // with the current instruction into a load/store pair. + // Return the matching instruction if one is found, else MBB->end(). + MachineBasicBlock::iterator findMatchingInsn(MachineBasicBlock::iterator I, + bool &MergeForward); + + MachineBasicBlock::iterator + mergePairedInsns(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator Paired, bool MergeForward); + +private: + AliasAnalysis *AA; + MachineRegisterInfo *MRI; + const RISCVInstrInfo *TII; + const RISCVRegisterInfo *TRI; + LiveRegUnits ModifiedRegUnits, UsedRegUnits; + bool UseLoadStorePair = false; +}; +} // end anonymous namespace + +char RISCVLoadStoreOpt::ID = 0; +INITIALIZE_PASS(RISCVLoadStoreOpt, DEBUG_TYPE, RISCV_LOAD_STORE_OPT_NAME, false, + false) + +bool RISCVLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { + if (skipFunction(Fn.getFunction())) + return false; + const RISCVSubtarget &Subtarget = Fn.getSubtarget(); + + if (!Subtarget.useLoadStorePairs()) + return false; + + bool MadeChange = false; + TII = Subtarget.getInstrInfo(); + TRI = Subtarget.getRegisterInfo(); + MRI = &Fn.getRegInfo(); + AA = &getAnalysis().getAAResults(); + ModifiedRegUnits.init(*TRI); + UsedRegUnits.init(*TRI); + UseLoadStorePair = Subtarget.useLoadStorePairs(); + + for (MachineBasicBlock &MBB : Fn) { + LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n"); + + for (MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); + MBBI != E;) { + if (TII->isPairableLdStInstOpc(MBBI->getOpcode()) && + tryToPairLdStInst(MBBI)) + MadeChange = true; + else + ++MBBI; + } + } + return MadeChange; +} + +// Find loads and stores that can be merged into a single load or store pair +// instruction. +bool RISCVLoadStoreOpt::tryToPairLdStInst(MachineBasicBlock::iterator &MBBI) { + MachineInstr &MI = *MBBI; + MachineBasicBlock::iterator E = MI.getParent()->end(); + + if (!TII->isLdStSafeToPair(MI, TRI)) + return false; + + // Look ahead for a pairable instruction. + bool MergeForward; + MachineBasicBlock::iterator Paired = findMatchingInsn(MBBI, MergeForward); + if (Paired != E) { + MBBI = mergePairedInsns(MBBI, Paired, MergeForward); + return true; + } + return false; +} + +bool RISCVLoadStoreOpt::tryConvertToLdStPair( + MachineBasicBlock::iterator First, MachineBasicBlock::iterator Second) { + if (!UseLoadStorePair) + return false; + + unsigned PairOpc; + switch (First->getOpcode()) { + default: + return false; + case RISCV::SW: + PairOpc = RISCV::SWP; + break; + case RISCV::LW: + PairOpc = RISCV::LWP; + break; + case RISCV::SD: + PairOpc = RISCV::SDP; + break; + case RISCV::LD: + PairOpc = RISCV::LDP; + break; + } + + MachineFunction *MF = First->getMF(); + const MachineMemOperand *MMO = *First->memoperands_begin(); + Align MMOAlign = MMO->getAlign(); + if (const PseudoSourceValue *Source = MMO->getPseudoValue()) + if (Source->kind() == PseudoSourceValue::FixedStack) + MMOAlign = MF->getSubtarget().getFrameLowering()->getStackAlign(); + + if (MMOAlign < Align(MMO->getSize().getValue() * 2)) + return false; + int64_t Offset = First->getOperand(2).getImm(); + if (!isUInt<7>(Offset) || + !isAligned(Align(MMO->getSize().getValue()), Offset)) + return false; + MachineInstrBuilder MIB = BuildMI( + *MF, + First->getDebugLoc().get() ? First->getDebugLoc() : Second->getDebugLoc(), + TII->get(PairOpc)); + MIB.add(First->getOperand(0)) + .add(Second->getOperand(0)) + .add(First->getOperand(1)) + .add(First->getOperand(2)) + .cloneMergedMemRefs({&*First, &*Second}); + + First->getParent()->insert(First, MIB); + + First->removeFromParent(); + Second->removeFromParent(); + + return true; +} + +/// TODO: Move to lambda +static bool mayAlias(MachineInstr &MIa, + SmallVectorImpl &MemInsns, + AliasAnalysis *AA) { + for (MachineInstr *MIb : MemInsns) + if (MIa.mayAlias(AA, *MIb, /*UseTBAA*/ false)) + return true; + + return false; +} + +/// Scan the instructions looking for a load/store that can be combined with the +/// current instruction into a wider equivalent or a load/store pair. +MachineBasicBlock::iterator +RISCVLoadStoreOpt::findMatchingInsn(MachineBasicBlock::iterator I, + bool &MergeForward) { + MachineBasicBlock::iterator E = I->getParent()->end(); + MachineBasicBlock::iterator MBBI = I; + MachineInstr &FirstMI = *I; + MBBI = next_nodbg(MBBI, E); + + bool MayLoad = FirstMI.mayLoad(); + Register Reg = FirstMI.getOperand(0).getReg(); + Register BaseReg = FirstMI.getOperand(1).getReg(); + int Offset = FirstMI.getOperand(2).getImm(); + int OffsetStride = (*FirstMI.memoperands_begin())->getSize().getValue(); + + LiveRegUnits UsedInBetween; + UsedInBetween.init(*TRI); + + MergeForward = false; + + // Track which register units have been modified and used between the first + // insn (inclusive) and the second insn. + ModifiedRegUnits.clear(); + UsedRegUnits.clear(); + + // Remember any instructions that read/write memory between FirstMI and MI. + SmallVector MemInsns; + + for (unsigned Count = 0; MBBI != E && Count < 128; + MBBI = next_nodbg(MBBI, E)) { + MachineInstr &MI = *MBBI; + + UsedInBetween.accumulate(MI); + + // Don't count transient instructions towards the search limit since there + // may be different numbers of them if e.g. debug information is present. + if (!MI.isTransient()) + ++Count; + + if (MI.getOpcode() == FirstMI.getOpcode() && + TII->isLdStSafeToPair(MI, TRI)) { + Register MIBaseReg = MI.getOperand(1).getReg(); + int MIOffset = MI.getOperand(2).getImm(); + + if (BaseReg == MIBaseReg) { + + if ((Offset != MIOffset + OffsetStride) && + (Offset + OffsetStride != MIOffset)) { + LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, + TRI); + MemInsns.push_back(&MI); + continue; + } + + // If the destination register of one load is the same register or a + // sub/super register of the other load, bail and keep looking. + if (MayLoad && + TRI->isSuperOrSubRegisterEq(Reg, MI.getOperand(0).getReg())) { + LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, + TRI); + MemInsns.push_back(&MI); + continue; + } + + // If the BaseReg has been modified, then we cannot do the optimization. + if (!ModifiedRegUnits.available(BaseReg)) + return E; + + // If the Rt of the second instruction was not modified or used between + // the two instructions and none of the instructions between the second + // and first alias with the second, we can combine the second into the + // first. + if (ModifiedRegUnits.available(MI.getOperand(0).getReg()) && + !(MI.mayLoad() && + !UsedRegUnits.available(MI.getOperand(0).getReg())) && + !mayAlias(MI, MemInsns, AA)) { + + MergeForward = false; + return MBBI; + } + + // Likewise, if the Rt of the first instruction is not modified or used + // between the two instructions and none of the instructions between the + // first and the second alias with the first, we can combine the first + // into the second. + if (!(MayLoad && + !UsedRegUnits.available(FirstMI.getOperand(0).getReg())) && + !mayAlias(FirstMI, MemInsns, AA)) { + + if (ModifiedRegUnits.available(FirstMI.getOperand(0).getReg())) { + MergeForward = true; + return MBBI; + } + } + // Unable to combine these instructions due to interference in between. + // Keep looking. + } + } + + // If the instruction wasn't a matching load or store. Stop searching if we + // encounter a call instruction that might modify memory. + if (MI.isCall()) + return E; + + // Update modified / uses register units. + LiveRegUnits::accumulateUsedDefed(MI, ModifiedRegUnits, UsedRegUnits, TRI); + + // Otherwise, if the base register is modified, we have no match, so + // return early. + if (!ModifiedRegUnits.available(BaseReg)) + return E; + + // Update list of instructions that read/write memory. + if (MI.mayLoadOrStore()) + MemInsns.push_back(&MI); + } + return E; +} + +MachineBasicBlock::iterator __attribute__((noinline)) +RISCVLoadStoreOpt::mergePairedInsns(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator Paired, + bool MergeForward) { + MachineBasicBlock::iterator E = I->getParent()->end(); + MachineBasicBlock::iterator NextI = next_nodbg(I, E); + if (NextI == Paired) + NextI = next_nodbg(NextI, E); + + // Insert our new paired instruction after whichever of the paired + // instructions MergeForward indicates. + MachineBasicBlock::iterator InsertionPoint = MergeForward ? Paired : I; + MachineBasicBlock::iterator DeletionPoint = MergeForward ? I : Paired; + int Offset = I->getOperand(2).getImm(); + int PairedOffset = Paired->getOperand(2).getImm(); + bool InsertAfter = (Offset < PairedOffset) ^ MergeForward; + + if (!MergeForward) + Paired->getOperand(1).setIsKill(false); + + // Kill flags may become invalid when moving stores for pairing. + if (I->getOperand(0).isUse()) { + if (!MergeForward) { + // Clear kill flags on store if moving upwards. + I->getOperand(0).setIsKill(false); + Paired->getOperand(0).setIsKill(false); + } else { + // Clear kill flags of the first stores register. + Register Reg = I->getOperand(0).getReg(); + for (MachineInstr &MI : make_range(std::next(I), Paired)) + MI.clearRegisterKills(Reg, TRI); + } + } + + MachineInstr *ToInsert = DeletionPoint->removeFromParent(); + MachineBasicBlock &MBB = *InsertionPoint->getParent(); + MachineBasicBlock::iterator First, Second; + + if (!InsertAfter) { + First = MBB.insert(InsertionPoint, ToInsert); + Second = InsertionPoint; + } else { + Second = MBB.insertAfter(InsertionPoint, ToInsert); + First = InsertionPoint; + } + + if (!tryConvertToLdStPair(First, Second)) + finalizeBundle(MBB, First.getInstrIterator(), + std::next(Second).getInstrIterator()); + + LLVM_DEBUG(dbgs() << "Bonding pair load/store:\n "); + LLVM_DEBUG(prev_nodbg(NextI, MBB.begin())->print(dbgs())); + return NextI; +} + +/// Returns an instance of the Load / Store Optimization pass. +FunctionPass *llvm::createRISCVLoadStoreOptPass() { + return new RISCVLoadStoreOpt(); +} diff --git a/llvm/lib/Target/RISCV/RISCVProcessors.td b/llvm/lib/Target/RISCV/RISCVProcessors.td index 03a48ff3c1758..8a5a9b4f19ecb 100644 --- a/llvm/lib/Target/RISCV/RISCVProcessors.td +++ b/llvm/lib/Target/RISCV/RISCVProcessors.td @@ -78,6 +78,21 @@ def GENERIC_RV64 : RISCVProcessorModel<"generic-rv64", // to change to the appropriate rv32/rv64 version. def GENERIC : RISCVTuneProcessorModel<"generic", NoSchedModel>, GenericTuneInfo; +def MIPS_P8700 : RISCVProcessorModel<"mips-p8700", + MIPSP8700Model, + [Feature64Bit, + FeatureStdExtI, + FeatureStdExtM, + FeatureStdExtA, + FeatureStdExtF, + FeatureStdExtD, + FeatureStdExtC, + FeatureStdExtZba, + FeatureStdExtZbb, + FeatureMIPSCMov, + FeatureMIPSLoadStorePairs], + [TuneMIPSP8700]>; + def ROCKET_RV32 : RISCVProcessorModel<"rocket-rv32", RocketModel, [Feature32Bit, @@ -279,7 +294,6 @@ def SIFIVE_P470 : RISCVProcessorModel<"sifive-p470", SiFiveP400Model, !listconcat(SiFiveP400TuneFeatures, [TuneNoSinkSplatOperands])>; - def SIFIVE_P670 : RISCVProcessorModel<"sifive-p670", SiFiveP600Model, !listconcat(RVA22U64Features, [FeatureStdExtV, diff --git a/llvm/lib/Target/RISCV/RISCVRemoveBackToBackBranches.cpp b/llvm/lib/Target/RISCV/RISCVRemoveBackToBackBranches.cpp new file mode 100644 index 0000000000000..55b8d263f6f11 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVRemoveBackToBackBranches.cpp @@ -0,0 +1,158 @@ +//===----------------------- RISCVRemoveBackToBackBranches.cpp ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "RISCV.h" +#include "RISCVInstrInfo.h" +#include "RISCVSubtarget.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/TargetSubtargetInfo.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +#define DEBUG_TYPE "riscv-remove-back-to-back-branches" + +STATISTIC(NumInsertedAligments, "Number of aligments set"); + +namespace { + +// According to the MIPS specification, there shouldn't be two conditional +// branches in the same 8-byte aligned region of code. +constexpr unsigned NumberOfBytesOfCodeRegion = 8; + +class RISCVRemoveBackToBackBranches : public MachineFunctionPass { +public: + static char ID; + + RISCVRemoveBackToBackBranches() : MachineFunctionPass(ID) { + initializeRISCVRemoveBackToBackBranchesPass( + *PassRegistry::getPassRegistry()); + } + + StringRef getPassName() const override { + return "RISCV Remove Back To Back Branches Pass"; + } + + bool runOnMachineFunction(MachineFunction &F) override; + + MachineFunctionProperties getRequiredProperties() const override { + return MachineFunctionProperties().set( + MachineFunctionProperties::Property::NoVRegs); + } + +private: + const RISCVSubtarget *STI; + const RISCVInstrInfo *TII; +}; + +} // end of anonymous namespace + +char RISCVRemoveBackToBackBranches::ID = 0; + +INITIALIZE_PASS(RISCVRemoveBackToBackBranches, DEBUG_TYPE, + "Fix hazards by removing back to back branches", false, false) + +/// Returns a pass that clears pipeline hazards. +FunctionPass *llvm::createRISCVRemoveBackToBackBranches() { + return new RISCVRemoveBackToBackBranches(); +} + +static bool CheckCompressedISA(MachineBasicBlock *MBB, + const RISCVInstrInfo *TII) { + unsigned SizeInBytes = 0; + for (auto &I : *MBB) { + // Skip some 0-sized meta instrucitons, such as debug ones. + if (!TII->getInstSizeInBytes(I)) + continue; + + SizeInBytes += TII->getInstSizeInBytes(I); + + // This means that there is something other than the conditional branch + // here. + if (!I.isConditionalBranch()) + continue; + + // If it is a conditional branch, make sure it is the last one + // in this MBB and the cumulative size in bytes of other instructions in the + // block is <= 6 (since there potentially could be space for the two + // branches in the same 8-byte aligned code region, when compressed version + // of the instructions (16-bit size) is being used). + if (&I == &*MBB->getLastNonDebugInstr()) { + if (SizeInBytes <= 6) + return true; + return false; + } + } + + return false; +} + +static bool CheckNonCompressedISA(MachineBasicBlock *MBB, + const RISCVInstrInfo *TII) { + for (auto &I : *MBB) { + // Skip some 0-sized meta instrucitons, such as debug ones. + if (!TII->getInstSizeInBytes(I)) + continue; + + // This means that there is something other than the conditional branch + // here. + if (!I.isConditionalBranch()) + return false; + + // If it is a conditional branch, make sure it is the last one + // in this MBB. + if (&I == &*MBB->getLastNonDebugInstr()) + return true; + return false; + } + return false; +} + +bool RISCVRemoveBackToBackBranches::runOnMachineFunction(MachineFunction &MF) { + STI = &static_cast(MF.getSubtarget()); + TII = static_cast(STI->getInstrInfo()); + + if (!STI->shouldRemoveBackToBackBranches()) { + LLVM_DEBUG(llvm::dbgs() + << "Ignoring RISCV Remove Back To Back Branches Pass\n"); + return false; + } + + bool Changed = false; + for (auto &MBB : MF) { + auto BBTerminator = MBB.getFirstTerminator(); + // If it is not a conditional branch, we are not interested. + if (BBTerminator == MBB.end() || + &*BBTerminator != &*MBB.getLastNonDebugInstr() || + !BBTerminator->isConditionalBranch()) + continue; + + for (auto &Successor : MBB.successors()) { + // Set up aligment in order to avoid hazards. No 2 conditional branches + // should be in the same 8-byte aligned region of code. Similar to MIPS + // forbidden slots problem. We may want to insert a NOP only, but we + // need to think of Compressed ISA, so it is more safe to just set up + // aligment to the successor block if it meets requirements. + bool ShouldSetAligment = STI->getFeatureBits()[RISCV::FeatureStdExtC] + ? CheckCompressedISA(Successor, TII) + : CheckNonCompressedISA(Successor, TII); + if (ShouldSetAligment) { + Successor->setAlignment(Align(NumberOfBytesOfCodeRegion)); + Changed = true; + ++NumInsertedAligments; + } + } + } + + return Changed; +} diff --git a/llvm/lib/Target/RISCV/RISCVSchedMIPSP8700.td b/llvm/lib/Target/RISCV/RISCVSchedMIPSP8700.td new file mode 100644 index 0000000000000..75f015d97df19 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVSchedMIPSP8700.td @@ -0,0 +1,279 @@ +//===-- RISCVSchedMIPSP8700.td - MIPS RISC-V Processor -----*- tablegen -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// RISC-V processor by MIPS. +//===----------------------------------------------------------------------===// + +def MIPSP8700Model : SchedMachineModel { + int IssueWidth = 4; + int MicroOpBufferSize = 96; // as per the specification + int LoadLatency = 4; + int MispredictPenalty = 8; // TODO: Estimated + let CompleteModel = 0; +} + +let SchedModel = MIPSP8700Model in { + +// Handle ALQ Pipelines. +def p8700ALQ : ProcResource<1> { let BufferSize = 16; } +def p8700IssueALU : ProcResource<1> { let Super = p8700ALQ; } + + +// Handle AGQ Pipelines. +def p8700AGQ : ProcResource<3> { let BufferSize = 16; } +def p8700IssueAL2 : ProcResource<1> { let Super = p8700AGQ; } +def p8700IssueCTISTD : ProcResource<1> { let Super = p8700AGQ; } +def p8700IssueLDST : ProcResource<1> { let Super = p8700AGQ; } +def p8700GpDiv : ProcResource<1>; +def p8700GpMul : ProcResource<1>; +def p8700WriteEitherALU : ProcResGroup<[p8700IssueALU, p8700IssueAL2]>; + +let Latency = 1 in { +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +// Handle zba. +def : WriteRes; +def : WriteRes; + +// Handle zbb. +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +} + +let Latency = 0 in { +def : WriteRes; +} + +let Latency = 4 in { +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +} + +let Latency = 8 in { +def : WriteRes; +def : WriteRes; +} + +let Latency = 3 in { +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +def : WriteRes; +def : WriteRes; +} + +let Latency = 1 in { +def : WriteRes; +def : WriteRes; +} + +let Latency = 7 in { +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +} + +let Latency = 4 in { +def : WriteRes; +def : WriteRes; +} + +let Latency = 8, ReleaseAtCycles = [5] in { +def : WriteRes; +def : WriteRes; +} + +def : WriteRes; +def : WriteRes; + +// Handle CTISTD Pipeline. +let Latency = 1 in { +def : WriteRes; +def : WriteRes; +} + +let Latency = 2 in { +def : WriteRes; +def : WriteRes; +} + +// Handle FPU Pipelines. +def p8700FPQ : ProcResource<3> { let BufferSize = 16; } +def p8700IssueFPUS : ProcResource<1> { let Super = p8700FPQ; } +def p8700IssueFPUL : ProcResource<1> { let Super = p8700FPQ; } +def p8700IssueFPULoad : ProcResource<1> { let Super = p8700FPQ; } +def p8700FpuApu : ProcResource<1>; +def p8700FpuLong : ProcResource<1>; + +let Latency = 4, ReleaseAtCycles = [1, 1] in { +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +def : WriteRes; +def : WriteRes; +} + +let Latency = 2, ReleaseAtCycles = [1, 1] in { +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; + +def : WriteRes; +def : WriteRes; +def : WriteRes; +def : WriteRes; +} + +let Latency = 8, ReleaseAtCycles = [1, 1] in { +def : WriteRes; +def : WriteRes; +} + +let Latency = 5, ReleaseAtCycles = [1, 1] in { +def : WriteRes; +def : WriteRes; +} + +let Latency = 17, ReleaseAtCycles = [1, 17] in { +def : WriteRes; +def : WriteRes; + +def : WriteRes; +def : WriteRes; +} + +def : WriteRes; + +// Bypass and advance. +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +// Unsupported extensions. +defm : UnsupportedSchedV; +defm : UnsupportedSchedZbc; +defm : UnsupportedSchedZbs; +defm : UnsupportedSchedZbkb; +defm : UnsupportedSchedZbkx; +defm : UnsupportedSchedZfa; +defm : UnsupportedSchedZfh; +defm : UnsupportedSchedSFB; +defm : UnsupportedSchedZabha; +defm : UnsupportedSchedXsfvcp; +defm : UnsupportedSchedZvk; +defm : UnsupportedSchedZvkned; +} diff --git a/llvm/lib/Target/RISCV/RISCVSchedule.td b/llvm/lib/Target/RISCV/RISCVSchedule.td index 1fdbc7cbcbaf4..114261b0b7680 100644 --- a/llvm/lib/Target/RISCV/RISCVSchedule.td +++ b/llvm/lib/Target/RISCV/RISCVSchedule.td @@ -22,6 +22,7 @@ def WriteIMul32 : SchedWrite; // 32-bit multiply on RV64I def WriteJmp : SchedWrite; // Jump def WriteJal : SchedWrite; // Jump and link def WriteJalr : SchedWrite; // Jump and link register +def WriteJmpReg : SchedWrite; // Jump register def WriteNop : SchedWrite; def WriteLDB : SchedWrite; // Load byte def WriteLDH : SchedWrite; // Load half-word diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp index 426d368204904..96ff5975d4897 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp @@ -62,6 +62,20 @@ static cl::opt RISCVMinimumJumpTableEntries( "riscv-min-jump-table-entries", cl::Hidden, cl::desc("Set minimum number of entries to use a jump table on RISCV")); +static cl::opt + UseLoadStorePairsOpt("riscv-load-store-pairs", + cl::desc("RISCV: Optimize for load-store bonding"), + cl::init(false), cl::Hidden); + +static cl::opt UseCCMovInsn("riscv-ccmov", + cl::desc("RISCV: Use 'ccmov' instruction"), + cl::init(true), cl::Hidden); + +static cl::opt RISCVRemoveBackToBackBranches( + "riscv-remove-back-to-back-branches", + cl::desc("RISCV: Insert nops to clear pipeline hazards."), cl::init(false), + cl::Hidden); + void RISCVSubtarget::anchor() {} RISCVSubtarget & @@ -70,8 +84,17 @@ RISCVSubtarget::initializeSubtargetDependencies(const Triple &TT, StringRef CPU, StringRef ABIName) { // Determine default and user-specified characteristics bool Is64Bit = TT.isArch64Bit(); - if (CPU.empty() || CPU == "generic") - CPU = Is64Bit ? "generic-rv64" : "generic-rv32"; + if (CPU.empty() || CPU == "generic") { + if (Is64Bit) { + if (TT.getVendor() == llvm::Triple::MipsTechnologies) { + CPU = "p8700"; + } else { + CPU = "generic-rv64"; + } + } else { + CPU = "generic-rv32"; + } + } if (TuneCPU.empty()) TuneCPU = CPU; @@ -207,3 +230,15 @@ void RISCVSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, // register-pressure tracking. This will increase compile time. Policy.ShouldTrackPressure = true; } + +bool RISCVSubtarget::useLoadStorePairs() const { + return UseLoadStorePairsOpt && HasMIPSLSP; +} + +bool RISCVSubtarget::useCCMovInsn() const { + return UseCCMovInsn && HasMIPSCMov; +} + +bool RISCVSubtarget::shouldRemoveBackToBackBranches() const { + return RISCVRemoveBackToBackBranches && hasFeature(RISCV::TuneMIPSP8700); +} diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h index 043838e13b964..cbe28a0416ff6 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -169,9 +169,10 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo { MVT getXLenVT() const { return is64Bit() ? MVT::i64 : MVT::i32; } - unsigned getXLen() const { - return is64Bit() ? 64 : 32; - } + unsigned getXLen() const { return is64Bit() ? 64 : 32; } + bool shouldRemoveBackToBackBranches() const; + bool useLoadStorePairs() const; + bool useCCMovInsn() const; unsigned getFLen() const { if (HasStdExtD) return 64; diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index fa507653264cc..57169161a8a3f 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -81,6 +81,10 @@ static cl::opt EnableRISCVCopyPropagation( "riscv-enable-copy-propagation", cl::desc("Enable the copy propagation with RISC-V copy instr"), cl::init(true), cl::Hidden); +static cl::opt + EnableGEPOpt("riscv-enable-gep-opt", cl::Hidden, + cl::desc("Enable optimizations on complex GEPs"), + cl::init(false)); static cl::opt EnableRISCVDeadRegisterElimination( "riscv-enable-dead-defs", cl::Hidden, @@ -140,6 +144,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() { initializeRISCVDAGToDAGISelLegacyPass(*PR); initializeRISCVMoveMergePass(*PR); initializeRISCVPushPopOptPass(*PR); + initializeRISCVLoadStoreOptPass(*PR); } static StringRef computeDataLayout(const Triple &TT, @@ -367,6 +372,16 @@ class RISCVPassConfig : public TargetPassConfig { DAG->addMutation(createStoreClusterDAGMutation( DAG->TII, DAG->TRI, /*ReorderWhileClustering=*/true)); } + + const RISCVSubtarget &ST = C->MF->getSubtarget(); + if (!ST.getMacroFusions().empty()) { + DAG = DAG ? DAG : createGenericSchedLive(C); + + if (ST.useLoadStorePairs()) { + DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); + DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); + } + } return DAG; } @@ -454,6 +469,16 @@ void RISCVPassConfig::addIRPasses() { addPass(createAtomicExpandLegacyPass()); addPass(createRISCVZacasABIFixPass()); + if (TM->getOptLevel() == CodeGenOptLevel::Aggressive && EnableGEPOpt) { + addPass(createSeparateConstOffsetFromGEPPass(false)); + // Call EarlyCSE pass to find and remove subexpressions in the lowered + // result. + addPass(createEarlyCSEPass()); + // Do loop invariant code motion in case part of the lowered result is + // invariant. + addPass(createLICMPass()); + } + if (getOptLevel() != CodeGenOptLevel::None) { if (EnableLoopDataPrefetch) addPass(createLoopDataPrefetchPass()); @@ -540,6 +565,9 @@ void RISCVPassConfig::addPreSched2() { // Emit KCFI checks for indirect calls. addPass(createKCFIPass()); + if (TM->getOptLevel() != CodeGenOptLevel::None) { + addPass(createRISCVLoadStoreOptPass()); + } } void RISCVPassConfig::addPreEmitPass() { @@ -551,8 +579,14 @@ void RISCVPassConfig::addPreEmitPass() { if (TM->getOptLevel() >= CodeGenOptLevel::Default && EnableRISCVCopyPropagation) addPass(createMachineCopyPropagationPass(true)); - addPass(&BranchRelaxationPassID); addPass(createRISCVMakeCompressibleOptPass()); + + // LoadStoreOptimizer creates bundles for load-store bonding. + addPass(createUnpackMachineBundles([](const MachineFunction &MF) { + return MF.getSubtarget().useLoadStorePairs(); + })); + addPass(&BranchRelaxationPassID); + addPass(createRISCVRemoveBackToBackBranches()); } void RISCVPassConfig::addPreEmitPass2() { diff --git a/llvm/test/CodeGen/MIR/RISCV/riscv-remove-back-to-back-branches.mir b/llvm/test/CodeGen/MIR/RISCV/riscv-remove-back-to-back-branches.mir new file mode 100644 index 0000000000000..448414678fa06 --- /dev/null +++ b/llvm/test/CodeGen/MIR/RISCV/riscv-remove-back-to-back-branches.mir @@ -0,0 +1,108 @@ +# RUN: llc -mtriple=riscv64 -mattr=-c -riscv-remove-back-to-back-branches=1 -o - %s | FileCheck %s + +# CHECK: %bb.0: +# CHECK: blez +# CHECK: .p2align 3 +# CHECK: %bb.1: +# CHECK: blez + +--- | + ; ModuleID = 'hazaard.c' + source_filename = "hazaard.c" + target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" + target triple = "riscv64-unknown-linux-gnu" + + ; Function Attrs: nounwind optsize + define dso_local void @test(i32 noundef signext %a, i32 noundef signext %b) local_unnamed_addr #0 { + entry: + %cmp = icmp sgt i32 %a, 0 + br i1 %cmp, label %if.then, label %if.end3 + + if.then: ; preds = %entry + %cmp1 = icmp slt i32 %b, 1 + br i1 %cmp1, label %if.then2, label %if.end3 + + if.then2: ; preds = %if.then + tail call void asm sideeffect "nop", ""() #1, !srcloc !4 + ret void + + if.end3: ; preds = %if.then, %entry + ret void + } + + attributes #0 = { nounwind optsize "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="mips-p8700" "target-features"="+64bit,+a,+c,+d,+f,+m" } + attributes #1 = { nounwind } + + !llvm.module.flags = !{!0, !1, !2} + !llvm.ident = !{!3} + + !0 = !{i32 1, !"wchar_size", i32 4} + !1 = !{i32 1, !"target-abi", !"lp64d"} + !2 = !{i32 1, !"SmallDataLimit", i32 8} + !3 = !{!"clang version 14.0.0 (git@github.com:MIPS/llvm.git ae54cf4034587fab977092097c9772c7a275ddc8)"} + !4 = !{i64 88} + +... +--- +name: test +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +failsVerification: false +tracksDebugUserValues: true +registers: [] +liveins: + - { reg: '$x10', virtual-reg: '' } + - { reg: '$x11', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 1 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + hasTailCall: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: [] +callSites: [] +debugValueSubstitutions: [] +constants: [] +machineFunctionInfo: {} +body: | + bb.0.entry: + successors: %bb.1(0x50000000), %bb.2(0x30000000) + liveins: $x10, $x11 + + BGE $x0, killed renamable $x10, %bb.2 + + bb.1.if.then: + successors: %bb.3(0x30000000), %bb.2(0x50000000) + liveins: $x11 + + BGE $x0, killed renamable $x11, %bb.3 + + bb.2.if.end3: + PseudoRET + + bb.3.if.then2: + INLINEASM &nop, 1 /* sideeffect attdialect */, !4 + PseudoRET + +... diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/rv64zba.ll b/llvm/test/CodeGen/RISCV/GlobalISel/rv64zba.ll index 9584270d8e66f..f3d6c01f35e55 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/rv64zba.ll +++ b/llvm/test/CodeGen/RISCV/GlobalISel/rv64zba.ll @@ -96,7 +96,7 @@ define i64 @zextw_i64(i64 %a) nounwind { ; ; RV64ZBA-LABEL: zextw_i64: ; RV64ZBA: # %bb.0: -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret %and = and i64 %a, 4294967295 ret i64 %and @@ -117,7 +117,7 @@ define i64 @zextw_demandedbits_i64(i64 %0) { ; RV64ZBA-LABEL: zextw_demandedbits_i64: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: li a1, -2 -; RV64ZBA-NEXT: zext.w a1, a1 +; RV64ZBA-NEXT: add.uw a1, a1, zero ; RV64ZBA-NEXT: and a0, a0, a1 ; RV64ZBA-NEXT: ori a0, a0, 1 ; RV64ZBA-NEXT: ret @@ -396,14 +396,14 @@ define zeroext i32 @sext_ashr_zext_i8(i8 %a) nounwind { ; RV64ZBANOZBB: # %bb.0: ; RV64ZBANOZBB-NEXT: slli a0, a0, 56 ; RV64ZBANOZBB-NEXT: srai a0, a0, 63 -; RV64ZBANOZBB-NEXT: zext.w a0, a0 +; RV64ZBANOZBB-NEXT: add.uw a0, a0, zero ; RV64ZBANOZBB-NEXT: ret ; ; RV64ZBAZBB-LABEL: sext_ashr_zext_i8: ; RV64ZBAZBB: # %bb.0: ; RV64ZBAZBB-NEXT: sext.b a0, a0 ; RV64ZBAZBB-NEXT: srai a0, a0, 9 -; RV64ZBAZBB-NEXT: zext.w a0, a0 +; RV64ZBAZBB-NEXT: add.uw a0, a0, zero ; RV64ZBAZBB-NEXT: ret %ext = sext i8 %a to i32 %1 = ashr i32 %ext, 9 @@ -516,14 +516,14 @@ define zeroext i32 @sext_ashr_zext_i16(i16 %a) nounwind { ; RV64ZBANOZBB: # %bb.0: ; RV64ZBANOZBB-NEXT: slli a0, a0, 48 ; RV64ZBANOZBB-NEXT: srai a0, a0, 57 -; RV64ZBANOZBB-NEXT: zext.w a0, a0 +; RV64ZBANOZBB-NEXT: add.uw a0, a0, zero ; RV64ZBANOZBB-NEXT: ret ; ; RV64ZBAZBB-LABEL: sext_ashr_zext_i16: ; RV64ZBAZBB: # %bb.0: ; RV64ZBAZBB-NEXT: sext.h a0, a0 ; RV64ZBAZBB-NEXT: srai a0, a0, 9 -; RV64ZBAZBB-NEXT: zext.w a0, a0 +; RV64ZBAZBB-NEXT: add.uw a0, a0, zero ; RV64ZBAZBB-NEXT: ret %ext = sext i16 %a to i32 %1 = ashr i32 %ext, 9 @@ -1011,7 +1011,7 @@ define i64 @pack_i64(i64 %a, i64 %b) nounwind { ; ; RV64ZBA-LABEL: pack_i64: ; RV64ZBA: # %bb.0: -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: slli a1, a1, 32 ; RV64ZBA-NEXT: or a0, a1, a0 ; RV64ZBA-NEXT: ret @@ -1034,8 +1034,8 @@ define i64 @pack_i64_2(i32 signext %a, i32 signext %b) nounwind { ; ; RV64ZBA-LABEL: pack_i64_2: ; RV64ZBA: # %bb.0: -; RV64ZBA-NEXT: zext.w a0, a0 -; RV64ZBA-NEXT: zext.w a1, a1 +; RV64ZBA-NEXT: add.uw a0, a0, zero +; RV64ZBA-NEXT: add.uw a1, a1, zero ; RV64ZBA-NEXT: slli a1, a1, 32 ; RV64ZBA-NEXT: or a0, a1, a0 ; RV64ZBA-NEXT: ret @@ -1056,7 +1056,7 @@ define i64 @pack_i64_disjoint(i64 %a, i64 %b) nounwind { ; ; RV64ZBA-LABEL: pack_i64_disjoint: ; RV64ZBA: # %bb.0: -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: or a0, a1, a0 ; RV64ZBA-NEXT: ret %shl = and i64 %a, 4294967295 @@ -1074,7 +1074,7 @@ define i64 @pack_i64_disjoint_2(i32 signext %a, i64 %b) nounwind { ; ; RV64ZBA-LABEL: pack_i64_disjoint_2: ; RV64ZBA: # %bb.0: -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: or a0, a1, a0 ; RV64ZBA-NEXT: ret %zexta = zext i32 %a to i64 @@ -1450,7 +1450,7 @@ define ptr @gep_lshr_i32(ptr %0, i64 %1) { ; RV64ZBA-LABEL: gep_lshr_i32: ; RV64ZBA: # %bb.0: # %entry ; RV64ZBA-NEXT: srli a1, a1, 2 -; RV64ZBA-NEXT: zext.w a1, a1 +; RV64ZBA-NEXT: add.uw a1, a1, zero ; RV64ZBA-NEXT: li a2, 80 ; RV64ZBA-NEXT: mul a1, a1, a2 ; RV64ZBA-NEXT: add a0, a0, a1 @@ -1671,9 +1671,9 @@ define i64 @add_u32simm32_zextw(i64 %x) nounwind { ; RV64ZBA-LABEL: add_u32simm32_zextw: ; RV64ZBA: # %bb.0: # %entry ; RV64ZBA-NEXT: li a1, -2 -; RV64ZBA-NEXT: zext.w a1, a1 +; RV64ZBA-NEXT: add.uw a1, a1, zero ; RV64ZBA-NEXT: add a0, a0, a1 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret entry: %add = add i64 %x, 4294967294 diff --git a/llvm/test/CodeGen/RISCV/O0-pipeline.ll b/llvm/test/CodeGen/RISCV/O0-pipeline.ll index f60def9d546f8..083b39b881243 100644 --- a/llvm/test/CodeGen/RISCV/O0-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O0-pipeline.ll @@ -61,8 +61,10 @@ ; CHECK-NEXT: Insert fentry calls ; CHECK-NEXT: Insert XRay ops ; CHECK-NEXT: Implement the 'patchable-function' attribute -; CHECK-NEXT: Branch relaxation pass ; CHECK-NEXT: RISC-V Make Compressible +; CHECK-NEXT: Unpack machine instruction bundles +; CHECK-NEXT: Branch relaxation pass +; CHECK-NEXT: RISCV Remove Back To Back Branches Pass ; CHECK-NEXT: Contiguously Lay Out Funclets ; CHECK-NEXT: Remove Loads Into Fake Uses ; CHECK-NEXT: StackMap Liveness Analysis diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll index 8fd9ae9850366..1f91071e7331e 100644 --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -176,6 +176,7 @@ ; CHECK-NEXT: Post-RA pseudo instruction expansion pass ; CHECK-NEXT: RISC-V post-regalloc pseudo instruction expansion pass ; CHECK-NEXT: Insert KCFI indirect call checks +; CHECK-NEXT: RISCV Load / Store Optimizer ; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: Machine Natural Loop Construction ; CHECK-NEXT: PostRA Machine Instruction Scheduler @@ -187,8 +188,10 @@ ; CHECK-NEXT: Insert XRay ops ; CHECK-NEXT: Implement the 'patchable-function' attribute ; CHECK-NEXT: Machine Copy Propagation Pass -; CHECK-NEXT: Branch relaxation pass ; CHECK-NEXT: RISC-V Make Compressible +; CHECK-NEXT: Unpack machine instruction bundles +; CHECK-NEXT: Branch relaxation pass +; CHECK-NEXT: RISCV Remove Back To Back Branches Pass ; CHECK-NEXT: Contiguously Lay Out Funclets ; CHECK-NEXT: Remove Loads Into Fake Uses ; CHECK-NEXT: StackMap Liveness Analysis diff --git a/llvm/test/CodeGen/RISCV/div-by-constant.ll b/llvm/test/CodeGen/RISCV/div-by-constant.ll index 844fa0d1e6ad6..598992d362f47 100644 --- a/llvm/test/CodeGen/RISCV/div-by-constant.ll +++ b/llvm/test/CodeGen/RISCV/div-by-constant.ll @@ -62,7 +62,7 @@ define i32 @udiv_constant_add(i32 %a) nounwind { ; ; RV64IMZB-LABEL: udiv_constant_add: ; RV64IMZB: # %bb.0: -; RV64IMZB-NEXT: zext.w a1, a0 +; RV64IMZB-NEXT: add.uw a1, a0, zero ; RV64IMZB-NEXT: lui a2, 149797 ; RV64IMZB-NEXT: addiw a2, a2, -1755 ; RV64IMZB-NEXT: mul a1, a1, a2 diff --git a/llvm/test/CodeGen/RISCV/load-store-pair.ll b/llvm/test/CodeGen/RISCV/load-store-pair.ll new file mode 100644 index 0000000000000..ac62e80334e1e --- /dev/null +++ b/llvm/test/CodeGen/RISCV/load-store-pair.ll @@ -0,0 +1,509 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv32 -target-abi ilp32d -mattr=+d -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32D +; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64I +; RUN: llc -mtriple=riscv64 -target-abi lp64d -mattr=+d -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64D +; RUN: llc -mtriple=riscv32 -mattr=+xmipslsp -riscv-load-store-pairs=1 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32I_PAIR +; RUN: llc -mtriple=riscv32 -target-abi ilp32d -mattr=+d,+xmipslsp -riscv-load-store-pairs=1 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV32D_PAIR +; RUN: llc -mtriple=riscv64 -mattr=+xmipslsp -riscv-load-store-pairs=1 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64I_PAIR +; RUN: llc -mtriple=riscv64 -mcpu mips-p8700 -mattr=+xmipslsp -riscv-load-store-pairs=1 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64P_8700 +; RUN: llc -mtriple=riscv64 -target-abi lp64d -mattr=+d,+xmipslsp -riscv-load-store-pairs=1 -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64D_PAIR +; RUN: llc -mtriple=riscv64 -target-abi lp64d -mattr=+d -verify-machineinstrs < %s \ +; RUN: | FileCheck %s -check-prefix=RV64D_8700 + +define dso_local void @testi(i8** nocapture noundef readonly %a) local_unnamed_addr #0 { +; RV32I-LABEL: testi: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: .cfi_def_cfa_offset 16 +; RV32I-NEXT: sw s2, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s3, 8(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s4, 4(sp) # 4-byte Folded Spill +; RV32I-NEXT: sw s5, 0(sp) # 4-byte Folded Spill +; RV32I-NEXT: .cfi_offset s2, -4 +; RV32I-NEXT: .cfi_offset s3, -8 +; RV32I-NEXT: .cfi_offset s4, -12 +; RV32I-NEXT: .cfi_offset s5, -16 +; RV32I-NEXT: lw s3, 0(a0) +; RV32I-NEXT: lw s2, 4(a0) +; RV32I-NEXT: lw s5, 8(a0) +; RV32I-NEXT: lw s4, 12(a0) +; RV32I-NEXT: #APP +; RV32I-NEXT: #NO_APP +; RV32I-NEXT: lw s2, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s3, 8(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s4, 4(sp) # 4-byte Folded Reload +; RV32I-NEXT: lw s5, 0(sp) # 4-byte Folded Reload +; RV32I-NEXT: .cfi_restore s2 +; RV32I-NEXT: .cfi_restore s3 +; RV32I-NEXT: .cfi_restore s4 +; RV32I-NEXT: .cfi_restore s5 +; RV32I-NEXT: addi sp, sp, 16 +; RV32I-NEXT: .cfi_def_cfa_offset 0 +; RV32I-NEXT: ret +; +; RV32D-LABEL: testi: +; RV32D: # %bb.0: # %entry +; RV32D-NEXT: addi sp, sp, -16 +; RV32D-NEXT: .cfi_def_cfa_offset 16 +; RV32D-NEXT: sw s2, 12(sp) # 4-byte Folded Spill +; RV32D-NEXT: sw s3, 8(sp) # 4-byte Folded Spill +; RV32D-NEXT: sw s4, 4(sp) # 4-byte Folded Spill +; RV32D-NEXT: sw s5, 0(sp) # 4-byte Folded Spill +; RV32D-NEXT: .cfi_offset s2, -4 +; RV32D-NEXT: .cfi_offset s3, -8 +; RV32D-NEXT: .cfi_offset s4, -12 +; RV32D-NEXT: .cfi_offset s5, -16 +; RV32D-NEXT: lw s3, 0(a0) +; RV32D-NEXT: lw s2, 4(a0) +; RV32D-NEXT: lw s5, 8(a0) +; RV32D-NEXT: lw s4, 12(a0) +; RV32D-NEXT: #APP +; RV32D-NEXT: #NO_APP +; RV32D-NEXT: lw s2, 12(sp) # 4-byte Folded Reload +; RV32D-NEXT: lw s3, 8(sp) # 4-byte Folded Reload +; RV32D-NEXT: lw s4, 4(sp) # 4-byte Folded Reload +; RV32D-NEXT: lw s5, 0(sp) # 4-byte Folded Reload +; RV32D-NEXT: .cfi_restore s2 +; RV32D-NEXT: .cfi_restore s3 +; RV32D-NEXT: .cfi_restore s4 +; RV32D-NEXT: .cfi_restore s5 +; RV32D-NEXT: addi sp, sp, 16 +; RV32D-NEXT: .cfi_def_cfa_offset 0 +; RV32D-NEXT: ret +; +; RV64I-LABEL: testi: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: addi sp, sp, -32 +; RV64I-NEXT: .cfi_def_cfa_offset 32 +; RV64I-NEXT: sd s2, 24(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s3, 16(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s4, 8(sp) # 8-byte Folded Spill +; RV64I-NEXT: sd s5, 0(sp) # 8-byte Folded Spill +; RV64I-NEXT: .cfi_offset s2, -8 +; RV64I-NEXT: .cfi_offset s3, -16 +; RV64I-NEXT: .cfi_offset s4, -24 +; RV64I-NEXT: .cfi_offset s5, -32 +; RV64I-NEXT: ld s3, 0(a0) +; RV64I-NEXT: ld s2, 8(a0) +; RV64I-NEXT: ld s5, 16(a0) +; RV64I-NEXT: ld s4, 24(a0) +; RV64I-NEXT: #APP +; RV64I-NEXT: #NO_APP +; RV64I-NEXT: ld s2, 24(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s3, 16(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s4, 8(sp) # 8-byte Folded Reload +; RV64I-NEXT: ld s5, 0(sp) # 8-byte Folded Reload +; RV64I-NEXT: .cfi_restore s2 +; RV64I-NEXT: .cfi_restore s3 +; RV64I-NEXT: .cfi_restore s4 +; RV64I-NEXT: .cfi_restore s5 +; RV64I-NEXT: addi sp, sp, 32 +; RV64I-NEXT: .cfi_def_cfa_offset 0 +; RV64I-NEXT: ret +; +; RV64D-LABEL: testi: +; RV64D: # %bb.0: # %entry +; RV64D-NEXT: addi sp, sp, -32 +; RV64D-NEXT: .cfi_def_cfa_offset 32 +; RV64D-NEXT: sd s2, 24(sp) # 8-byte Folded Spill +; RV64D-NEXT: sd s3, 16(sp) # 8-byte Folded Spill +; RV64D-NEXT: sd s4, 8(sp) # 8-byte Folded Spill +; RV64D-NEXT: sd s5, 0(sp) # 8-byte Folded Spill +; RV64D-NEXT: .cfi_offset s2, -8 +; RV64D-NEXT: .cfi_offset s3, -16 +; RV64D-NEXT: .cfi_offset s4, -24 +; RV64D-NEXT: .cfi_offset s5, -32 +; RV64D-NEXT: ld s3, 0(a0) +; RV64D-NEXT: ld s2, 8(a0) +; RV64D-NEXT: ld s5, 16(a0) +; RV64D-NEXT: ld s4, 24(a0) +; RV64D-NEXT: #APP +; RV64D-NEXT: #NO_APP +; RV64D-NEXT: ld s2, 24(sp) # 8-byte Folded Reload +; RV64D-NEXT: ld s3, 16(sp) # 8-byte Folded Reload +; RV64D-NEXT: ld s4, 8(sp) # 8-byte Folded Reload +; RV64D-NEXT: ld s5, 0(sp) # 8-byte Folded Reload +; RV64D-NEXT: .cfi_restore s2 +; RV64D-NEXT: .cfi_restore s3 +; RV64D-NEXT: .cfi_restore s4 +; RV64D-NEXT: .cfi_restore s5 +; RV64D-NEXT: addi sp, sp, 32 +; RV64D-NEXT: .cfi_def_cfa_offset 0 +; RV64D-NEXT: ret +; +; RV32I_PAIR-LABEL: testi: +; RV32I_PAIR: # %bb.0: # %entry +; RV32I_PAIR-NEXT: addi sp, sp, -16 +; RV32I_PAIR-NEXT: .cfi_def_cfa_offset 16 +; RV32I_PAIR-NEXT: swp s3, s2, 8(sp) # 8-byte Folded Spill +; RV32I_PAIR-NEXT: swp s5, s4, 0(sp) # 8-byte Folded Spill +; RV32I_PAIR-NEXT: .cfi_offset s2, -4 +; RV32I_PAIR-NEXT: .cfi_offset s3, -8 +; RV32I_PAIR-NEXT: .cfi_offset s4, -12 +; RV32I_PAIR-NEXT: .cfi_offset s5, -16 +; RV32I_PAIR-NEXT: lwp s3, s2, 0(a0) +; RV32I_PAIR-NEXT: lwp s5, s4, 8(a0) +; RV32I_PAIR-NEXT: #APP +; RV32I_PAIR-NEXT: #NO_APP +; RV32I_PAIR-NEXT: lwp s3, s2, 8(sp) # 8-byte Folded Reload +; RV32I_PAIR-NEXT: lwp s5, s4, 0(sp) # 8-byte Folded Reload +; RV32I_PAIR-NEXT: .cfi_restore s2 +; RV32I_PAIR-NEXT: .cfi_restore s3 +; RV32I_PAIR-NEXT: .cfi_restore s4 +; RV32I_PAIR-NEXT: .cfi_restore s5 +; RV32I_PAIR-NEXT: addi sp, sp, 16 +; RV32I_PAIR-NEXT: .cfi_def_cfa_offset 0 +; RV32I_PAIR-NEXT: ret +; +; RV32D_PAIR-LABEL: testi: +; RV32D_PAIR: # %bb.0: # %entry +; RV32D_PAIR-NEXT: addi sp, sp, -16 +; RV32D_PAIR-NEXT: .cfi_def_cfa_offset 16 +; RV32D_PAIR-NEXT: swp s3, s2, 8(sp) # 8-byte Folded Spill +; RV32D_PAIR-NEXT: swp s5, s4, 0(sp) # 8-byte Folded Spill +; RV32D_PAIR-NEXT: .cfi_offset s2, -4 +; RV32D_PAIR-NEXT: .cfi_offset s3, -8 +; RV32D_PAIR-NEXT: .cfi_offset s4, -12 +; RV32D_PAIR-NEXT: .cfi_offset s5, -16 +; RV32D_PAIR-NEXT: lwp s3, s2, 0(a0) +; RV32D_PAIR-NEXT: lwp s5, s4, 8(a0) +; RV32D_PAIR-NEXT: #APP +; RV32D_PAIR-NEXT: #NO_APP +; RV32D_PAIR-NEXT: lwp s3, s2, 8(sp) # 8-byte Folded Reload +; RV32D_PAIR-NEXT: lwp s5, s4, 0(sp) # 8-byte Folded Reload +; RV32D_PAIR-NEXT: .cfi_restore s2 +; RV32D_PAIR-NEXT: .cfi_restore s3 +; RV32D_PAIR-NEXT: .cfi_restore s4 +; RV32D_PAIR-NEXT: .cfi_restore s5 +; RV32D_PAIR-NEXT: addi sp, sp, 16 +; RV32D_PAIR-NEXT: .cfi_def_cfa_offset 0 +; RV32D_PAIR-NEXT: ret +; +; RV64I_PAIR-LABEL: testi: +; RV64I_PAIR: # %bb.0: # %entry +; RV64I_PAIR-NEXT: addi sp, sp, -32 +; RV64I_PAIR-NEXT: .cfi_def_cfa_offset 32 +; RV64I_PAIR-NEXT: sdp s3, s2, 16(sp) # 16-byte Folded Spill +; RV64I_PAIR-NEXT: sdp s5, s4, 0(sp) # 16-byte Folded Spill +; RV64I_PAIR-NEXT: .cfi_offset s2, -8 +; RV64I_PAIR-NEXT: .cfi_offset s3, -16 +; RV64I_PAIR-NEXT: .cfi_offset s4, -24 +; RV64I_PAIR-NEXT: .cfi_offset s5, -32 +; RV64I_PAIR-NEXT: ld s3, 0(a0) +; RV64I_PAIR-NEXT: ld s2, 8(a0) +; RV64I_PAIR-NEXT: ld s5, 16(a0) +; RV64I_PAIR-NEXT: ld s4, 24(a0) +; RV64I_PAIR-NEXT: #APP +; RV64I_PAIR-NEXT: #NO_APP +; RV64I_PAIR-NEXT: ldp s3, s2, 16(sp) # 16-byte Folded Reload +; RV64I_PAIR-NEXT: ldp s5, s4, 0(sp) # 16-byte Folded Reload +; RV64I_PAIR-NEXT: .cfi_restore s2 +; RV64I_PAIR-NEXT: .cfi_restore s3 +; RV64I_PAIR-NEXT: .cfi_restore s4 +; RV64I_PAIR-NEXT: .cfi_restore s5 +; RV64I_PAIR-NEXT: addi sp, sp, 32 +; RV64I_PAIR-NEXT: .cfi_def_cfa_offset 0 +; RV64I_PAIR-NEXT: ret +; +; RV64P_8700-LABEL: testi: +; RV64P_8700: # %bb.0: # %entry +; RV64P_8700-NEXT: addi sp, sp, -32 +; RV64P_8700-NEXT: .cfi_def_cfa_offset 32 +; RV64P_8700-NEXT: sdp s3, s2, 16(sp) # 16-byte Folded Spill +; RV64P_8700-NEXT: sdp s5, s4, 0(sp) # 16-byte Folded Spill +; RV64P_8700-NEXT: .cfi_offset s2, -8 +; RV64P_8700-NEXT: .cfi_offset s3, -16 +; RV64P_8700-NEXT: .cfi_offset s4, -24 +; RV64P_8700-NEXT: .cfi_offset s5, -32 +; RV64P_8700-NEXT: ld s3, 0(a0) +; RV64P_8700-NEXT: ld s2, 8(a0) +; RV64P_8700-NEXT: ld s5, 16(a0) +; RV64P_8700-NEXT: ld s4, 24(a0) +; RV64P_8700-NEXT: #APP +; RV64P_8700-NEXT: #NO_APP +; RV64P_8700-NEXT: ldp s3, s2, 16(sp) # 16-byte Folded Reload +; RV64P_8700-NEXT: ldp s5, s4, 0(sp) # 16-byte Folded Reload +; RV64P_8700-NEXT: .cfi_restore s2 +; RV64P_8700-NEXT: .cfi_restore s3 +; RV64P_8700-NEXT: .cfi_restore s4 +; RV64P_8700-NEXT: .cfi_restore s5 +; RV64P_8700-NEXT: addi sp, sp, 32 +; RV64P_8700-NEXT: .cfi_def_cfa_offset 0 +; RV64P_8700-NEXT: ret +; +; RV64D_PAIR-LABEL: testi: +; RV64D_PAIR: # %bb.0: # %entry +; RV64D_PAIR-NEXT: addi sp, sp, -32 +; RV64D_PAIR-NEXT: .cfi_def_cfa_offset 32 +; RV64D_PAIR-NEXT: sdp s3, s2, 16(sp) # 16-byte Folded Spill +; RV64D_PAIR-NEXT: sdp s5, s4, 0(sp) # 16-byte Folded Spill +; RV64D_PAIR-NEXT: .cfi_offset s2, -8 +; RV64D_PAIR-NEXT: .cfi_offset s3, -16 +; RV64D_PAIR-NEXT: .cfi_offset s4, -24 +; RV64D_PAIR-NEXT: .cfi_offset s5, -32 +; RV64D_PAIR-NEXT: ld s3, 0(a0) +; RV64D_PAIR-NEXT: ld s2, 8(a0) +; RV64D_PAIR-NEXT: ld s5, 16(a0) +; RV64D_PAIR-NEXT: ld s4, 24(a0) +; RV64D_PAIR-NEXT: #APP +; RV64D_PAIR-NEXT: #NO_APP +; RV64D_PAIR-NEXT: ldp s3, s2, 16(sp) # 16-byte Folded Reload +; RV64D_PAIR-NEXT: ldp s5, s4, 0(sp) # 16-byte Folded Reload +; RV64D_PAIR-NEXT: .cfi_restore s2 +; RV64D_PAIR-NEXT: .cfi_restore s3 +; RV64D_PAIR-NEXT: .cfi_restore s4 +; RV64D_PAIR-NEXT: .cfi_restore s5 +; RV64D_PAIR-NEXT: addi sp, sp, 32 +; RV64D_PAIR-NEXT: .cfi_def_cfa_offset 0 +; RV64D_PAIR-NEXT: ret +; +; RV64D_8700-LABEL: testi: +; RV64D_8700: # %bb.0: # %entry +; RV64D_8700-NEXT: addi sp, sp, -32 +; RV64D_8700-NEXT: .cfi_def_cfa_offset 32 +; RV64D_8700-NEXT: sd s2, 24(sp) # 8-byte Folded Spill +; RV64D_8700-NEXT: sd s3, 16(sp) # 8-byte Folded Spill +; RV64D_8700-NEXT: sd s4, 8(sp) # 8-byte Folded Spill +; RV64D_8700-NEXT: sd s5, 0(sp) # 8-byte Folded Spill +; RV64D_8700-NEXT: .cfi_offset s2, -8 +; RV64D_8700-NEXT: .cfi_offset s3, -16 +; RV64D_8700-NEXT: .cfi_offset s4, -24 +; RV64D_8700-NEXT: .cfi_offset s5, -32 +; RV64D_8700-NEXT: ld s3, 0(a0) +; RV64D_8700-NEXT: ld s2, 8(a0) +; RV64D_8700-NEXT: ld s5, 16(a0) +; RV64D_8700-NEXT: ld s4, 24(a0) +; RV64D_8700-NEXT: #APP +; RV64D_8700-NEXT: #NO_APP +; RV64D_8700-NEXT: ld s2, 24(sp) # 8-byte Folded Reload +; RV64D_8700-NEXT: ld s3, 16(sp) # 8-byte Folded Reload +; RV64D_8700-NEXT: ld s4, 8(sp) # 8-byte Folded Reload +; RV64D_8700-NEXT: ld s5, 0(sp) # 8-byte Folded Reload +; RV64D_8700-NEXT: .cfi_restore s2 +; RV64D_8700-NEXT: .cfi_restore s3 +; RV64D_8700-NEXT: .cfi_restore s4 +; RV64D_8700-NEXT: .cfi_restore s5 +; RV64D_8700-NEXT: addi sp, sp, 32 +; RV64D_8700-NEXT: .cfi_def_cfa_offset 0 +; RV64D_8700-NEXT: ret +entry: + %arrayidx = getelementptr inbounds i8*, i8** %a, i64 1 + %0 = load i8*, i8** %arrayidx, align 8 + %1 = load i8*, i8** %a, align 8 + %arrayidx2 = getelementptr inbounds i8*, i8** %a, i64 3 + %2 = load i8*, i8** %arrayidx2, align 8 + %arrayidx3 = getelementptr inbounds i8*, i8** %a, i64 2 + %3 = load i8*, i8** %arrayidx3, align 8 + tail call void asm sideeffect "", "{x18},{x19},{x20},{x21}"(i8* %0, i8* %1, i8* %2, i8* %3) + ret void +} + + +define dso_local void @testf(float* nocapture noundef readonly %a) local_unnamed_addr #0 { +; RV32I-LABEL: testf: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lw a3, 0(a0) +; RV32I-NEXT: lw a4, 4(a0) +; RV32I-NEXT: lw a2, 8(a0) +; RV32I-NEXT: lw a1, 12(a0) +; RV32I-NEXT: mv a0, a4 +; RV32I-NEXT: tail sinkf +; +; RV32D-LABEL: testf: +; RV32D: # %bb.0: # %entry +; RV32D-NEXT: flw fa3, 0(a0) +; RV32D-NEXT: flw fa0, 4(a0) +; RV32D-NEXT: flw fa2, 8(a0) +; RV32D-NEXT: flw fa1, 12(a0) +; RV32D-NEXT: tail sinkf +; +; RV64I-LABEL: testf: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: lw a3, 0(a0) +; RV64I-NEXT: lw a4, 4(a0) +; RV64I-NEXT: lw a2, 8(a0) +; RV64I-NEXT: lw a1, 12(a0) +; RV64I-NEXT: mv a0, a4 +; RV64I-NEXT: tail sinkf +; +; RV64D-LABEL: testf: +; RV64D: # %bb.0: # %entry +; RV64D-NEXT: flw fa3, 0(a0) +; RV64D-NEXT: flw fa0, 4(a0) +; RV64D-NEXT: flw fa2, 8(a0) +; RV64D-NEXT: flw fa1, 12(a0) +; RV64D-NEXT: tail sinkf +; +; RV32I_PAIR-LABEL: testf: +; RV32I_PAIR: # %bb.0: # %entry +; RV32I_PAIR-NEXT: lw a3, 0(a0) +; RV32I_PAIR-NEXT: lw a4, 4(a0) +; RV32I_PAIR-NEXT: lw a2, 8(a0) +; RV32I_PAIR-NEXT: lw a1, 12(a0) +; RV32I_PAIR-NEXT: mv a0, a4 +; RV32I_PAIR-NEXT: tail sinkf +; +; RV32D_PAIR-LABEL: testf: +; RV32D_PAIR: # %bb.0: # %entry +; RV32D_PAIR-NEXT: flw fa3, 0(a0) +; RV32D_PAIR-NEXT: flw fa0, 4(a0) +; RV32D_PAIR-NEXT: flw fa2, 8(a0) +; RV32D_PAIR-NEXT: flw fa1, 12(a0) +; RV32D_PAIR-NEXT: tail sinkf +; +; RV64I_PAIR-LABEL: testf: +; RV64I_PAIR: # %bb.0: # %entry +; RV64I_PAIR-NEXT: lw a3, 0(a0) +; RV64I_PAIR-NEXT: lw a4, 4(a0) +; RV64I_PAIR-NEXT: lw a2, 8(a0) +; RV64I_PAIR-NEXT: lw a1, 12(a0) +; RV64I_PAIR-NEXT: mv a0, a4 +; RV64I_PAIR-NEXT: tail sinkf +; +; RV64P_8700-LABEL: testf: +; RV64P_8700: # %bb.0: # %entry +; RV64P_8700-NEXT: flw fa3, 0(a0) +; RV64P_8700-NEXT: flw fa0, 4(a0) +; RV64P_8700-NEXT: flw fa2, 8(a0) +; RV64P_8700-NEXT: flw fa1, 12(a0) +; RV64P_8700-NEXT: tail sinkf +; +; RV64D_PAIR-LABEL: testf: +; RV64D_PAIR: # %bb.0: # %entry +; RV64D_PAIR-NEXT: flw fa3, 0(a0) +; RV64D_PAIR-NEXT: flw fa0, 4(a0) +; RV64D_PAIR-NEXT: flw fa2, 8(a0) +; RV64D_PAIR-NEXT: flw fa1, 12(a0) +; RV64D_PAIR-NEXT: tail sinkf +; +; RV64D_8700-LABEL: testf: +; RV64D_8700: # %bb.0: # %entry +; RV64D_8700-NEXT: flw fa3, 0(a0) +; RV64D_8700-NEXT: flw fa0, 4(a0) +; RV64D_8700-NEXT: flw fa2, 8(a0) +; RV64D_8700-NEXT: flw fa1, 12(a0) +; RV64D_8700-NEXT: tail sinkf +entry: + %arrayidx = getelementptr inbounds float, float* %a, i64 1 + %0 = load float, float* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds float, float* %a, i64 3 + %1 = load float, float* %arrayidx1, align 4 + %arrayidx2 = getelementptr inbounds float, float* %a, i64 2 + %2 = load float, float* %arrayidx2, align 4 + %3 = load float, float* %a, align 4 + tail call void @sinkf(float noundef %0, float noundef %1, float noundef %2, float noundef %3) + ret void +} + +declare dso_local void @sinkf(float noundef, float noundef, float noundef, float noundef) local_unnamed_addr + +define dso_local void @testd(double* nocapture noundef readonly %a) local_unnamed_addr #0 { +; RV32I-LABEL: testd: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lw a4, 16(a0) +; RV32I-NEXT: lw a5, 20(a0) +; RV32I-NEXT: lw a2, 24(a0) +; RV32I-NEXT: lw a3, 28(a0) +; RV32I-NEXT: lw a6, 0(a0) +; RV32I-NEXT: lw a7, 4(a0) +; RV32I-NEXT: lw t0, 8(a0) +; RV32I-NEXT: lw a1, 12(a0) +; RV32I-NEXT: mv a0, t0 +; RV32I-NEXT: tail sinkd +; +; RV32D-LABEL: testd: +; RV32D: # %bb.0: # %entry +; RV32D-NEXT: fld fa3, 0(a0) +; RV32D-NEXT: fld fa0, 8(a0) +; RV32D-NEXT: fld fa2, 16(a0) +; RV32D-NEXT: fld fa1, 24(a0) +; RV32D-NEXT: tail sinkd +; +; RV64I-LABEL: testd: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: ld a3, 0(a0) +; RV64I-NEXT: ld a4, 8(a0) +; RV64I-NEXT: ld a2, 16(a0) +; RV64I-NEXT: ld a1, 24(a0) +; RV64I-NEXT: mv a0, a4 +; RV64I-NEXT: tail sinkd +; +; RV64D-LABEL: testd: +; RV64D: # %bb.0: # %entry +; RV64D-NEXT: fld fa3, 0(a0) +; RV64D-NEXT: fld fa0, 8(a0) +; RV64D-NEXT: fld fa2, 16(a0) +; RV64D-NEXT: fld fa1, 24(a0) +; RV64D-NEXT: tail sinkd +; +; RV32I_PAIR-LABEL: testd: +; RV32I_PAIR: # %bb.0: # %entry +; RV32I_PAIR-NEXT: lwp a4, a5, 16(a0) +; RV32I_PAIR-NEXT: lwp a2, a3, 24(a0) +; RV32I_PAIR-NEXT: lwp a6, a7, 0(a0) +; RV32I_PAIR-NEXT: lwp a0, a1, 8(a0) +; RV32I_PAIR-NEXT: tail sinkd +; +; RV32D_PAIR-LABEL: testd: +; RV32D_PAIR: # %bb.0: # %entry +; RV32D_PAIR-NEXT: fld fa3, 0(a0) +; RV32D_PAIR-NEXT: fld fa0, 8(a0) +; RV32D_PAIR-NEXT: fld fa2, 16(a0) +; RV32D_PAIR-NEXT: fld fa1, 24(a0) +; RV32D_PAIR-NEXT: tail sinkd +; +; RV64I_PAIR-LABEL: testd: +; RV64I_PAIR: # %bb.0: # %entry +; RV64I_PAIR-NEXT: ld a3, 0(a0) +; RV64I_PAIR-NEXT: ld a4, 8(a0) +; RV64I_PAIR-NEXT: ld a2, 16(a0) +; RV64I_PAIR-NEXT: ld a1, 24(a0) +; RV64I_PAIR-NEXT: mv a0, a4 +; RV64I_PAIR-NEXT: tail sinkd +; +; RV64P_8700-LABEL: testd: +; RV64P_8700: # %bb.0: # %entry +; RV64P_8700-NEXT: fld fa3, 0(a0) +; RV64P_8700-NEXT: fld fa0, 8(a0) +; RV64P_8700-NEXT: fld fa2, 16(a0) +; RV64P_8700-NEXT: fld fa1, 24(a0) +; RV64P_8700-NEXT: tail sinkd +; +; RV64D_PAIR-LABEL: testd: +; RV64D_PAIR: # %bb.0: # %entry +; RV64D_PAIR-NEXT: fld fa3, 0(a0) +; RV64D_PAIR-NEXT: fld fa0, 8(a0) +; RV64D_PAIR-NEXT: fld fa2, 16(a0) +; RV64D_PAIR-NEXT: fld fa1, 24(a0) +; RV64D_PAIR-NEXT: tail sinkd +; +; RV64D_8700-LABEL: testd: +; RV64D_8700: # %bb.0: # %entry +; RV64D_8700-NEXT: fld fa3, 0(a0) +; RV64D_8700-NEXT: fld fa0, 8(a0) +; RV64D_8700-NEXT: fld fa2, 16(a0) +; RV64D_8700-NEXT: fld fa1, 24(a0) +; RV64D_8700-NEXT: tail sinkd +entry: + %arrayidx = getelementptr inbounds double, double* %a, i64 1 + %0 = load double, double* %arrayidx, align 8 + %arrayidx1 = getelementptr inbounds double, double* %a, i64 3 + %1 = load double, double* %arrayidx1, align 8 + %arrayidx2 = getelementptr inbounds double, double* %a, i64 2 + %2 = load double, double* %arrayidx2, align 8 + %3 = load double, double* %a, align 8 + tail call void @sinkd(double noundef %0, double noundef %1, double noundef %2, double noundef %3) + ret void +} + +declare dso_local void @sinkd(double noundef, double noundef, double noundef, double noundef) local_unnamed_addr diff --git a/llvm/test/CodeGen/RISCV/riscv-enable-gep-opt.ll b/llvm/test/CodeGen/RISCV/riscv-enable-gep-opt.ll new file mode 100644 index 0000000000000..3191e7e38e5ce --- /dev/null +++ b/llvm/test/CodeGen/RISCV/riscv-enable-gep-opt.ll @@ -0,0 +1,10 @@ +; RUN: llc -mtriple=riscv32 -O3 -riscv-enable-gep-opt=true -debug-pass=Structure < %s -o /dev/null 2>&1 | \ +; RUN: grep -v "Verify generated machine code" | \ +; RUN: FileCheck %s --check-prefixes=CHECK + + +; REQUIRES: asserts + +; CHECK-LABEL: Pass Arguments: +; CHECK: Split GEPs to a variadic base and a constant offset for better CSE + diff --git a/llvm/test/CodeGen/RISCV/rv64i-exhaustive-w-insts.ll b/llvm/test/CodeGen/RISCV/rv64i-exhaustive-w-insts.ll index dad20b2d19464..3707eb45125a5 100644 --- a/llvm/test/CodeGen/RISCV/rv64i-exhaustive-w-insts.ll +++ b/llvm/test/CodeGen/RISCV/rv64i-exhaustive-w-insts.ll @@ -188,7 +188,7 @@ define zeroext i32 @zext_addw_aext_aext(i32 %a, i32 %b) nounwind { ; RV64ZBA-LABEL: zext_addw_aext_aext: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: add a0, a0, a1 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret %1 = add i32 %a, %b ret i32 %1 @@ -205,7 +205,7 @@ define zeroext i32 @zext_addw_aext_sext(i32 %a, i32 signext %b) nounwind { ; RV64ZBA-LABEL: zext_addw_aext_sext: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: add a0, a0, a1 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret %1 = add i32 %a, %b ret i32 %1 @@ -222,7 +222,7 @@ define zeroext i32 @zext_addw_aext_zext(i32 %a, i32 zeroext %b) nounwind { ; RV64ZBA-LABEL: zext_addw_aext_zext: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: add a0, a0, a1 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret %1 = add i32 %a, %b ret i32 %1 @@ -239,7 +239,7 @@ define zeroext i32 @zext_addw_sext_aext(i32 signext %a, i32 %b) nounwind { ; RV64ZBA-LABEL: zext_addw_sext_aext: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: add a0, a0, a1 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret %1 = add i32 %a, %b ret i32 %1 @@ -256,7 +256,7 @@ define zeroext i32 @zext_addw_sext_sext(i32 signext %a, i32 signext %b) nounwind ; RV64ZBA-LABEL: zext_addw_sext_sext: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: add a0, a0, a1 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret %1 = add i32 %a, %b ret i32 %1 @@ -273,7 +273,7 @@ define zeroext i32 @zext_addw_sext_zext(i32 signext %a, i32 zeroext %b) nounwind ; RV64ZBA-LABEL: zext_addw_sext_zext: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: add a0, a0, a1 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret %1 = add i32 %a, %b ret i32 %1 @@ -290,7 +290,7 @@ define zeroext i32 @zext_addw_zext_aext(i32 zeroext %a, i32 %b) nounwind { ; RV64ZBA-LABEL: zext_addw_zext_aext: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: add a0, a0, a1 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret %1 = add i32 %a, %b ret i32 %1 @@ -307,7 +307,7 @@ define zeroext i32 @zext_addw_zext_sext(i32 zeroext %a, i32 signext %b) nounwind ; RV64ZBA-LABEL: zext_addw_zext_sext: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: add a0, a0, a1 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret %1 = add i32 %a, %b ret i32 %1 @@ -324,7 +324,7 @@ define zeroext i32 @zext_addw_zext_zext(i32 zeroext %a, i32 zeroext %b) nounwind ; RV64ZBA-LABEL: zext_addw_zext_zext: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: add a0, a0, a1 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret %1 = add i32 %a, %b ret i32 %1 @@ -509,7 +509,7 @@ define zeroext i32 @zext_subw_aext_aext(i32 %a, i32 %b) nounwind { ; RV64ZBA-LABEL: zext_subw_aext_aext: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: subw a0, a0, a1 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret %1 = sub i32 %a, %b ret i32 %1 @@ -526,7 +526,7 @@ define zeroext i32 @zext_subw_aext_sext(i32 %a, i32 signext %b) nounwind { ; RV64ZBA-LABEL: zext_subw_aext_sext: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: subw a0, a0, a1 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret %1 = sub i32 %a, %b ret i32 %1 @@ -543,7 +543,7 @@ define zeroext i32 @zext_subw_aext_zext(i32 %a, i32 zeroext %b) nounwind { ; RV64ZBA-LABEL: zext_subw_aext_zext: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: subw a0, a0, a1 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret %1 = sub i32 %a, %b ret i32 %1 @@ -560,7 +560,7 @@ define zeroext i32 @zext_subw_sext_aext(i32 signext %a, i32 %b) nounwind { ; RV64ZBA-LABEL: zext_subw_sext_aext: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: subw a0, a0, a1 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret %1 = sub i32 %a, %b ret i32 %1 @@ -577,7 +577,7 @@ define zeroext i32 @zext_subw_sext_sext(i32 signext %a, i32 signext %b) nounwind ; RV64ZBA-LABEL: zext_subw_sext_sext: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: subw a0, a0, a1 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret %1 = sub i32 %a, %b ret i32 %1 @@ -594,7 +594,7 @@ define zeroext i32 @zext_subw_sext_zext(i32 signext %a, i32 zeroext %b) nounwind ; RV64ZBA-LABEL: zext_subw_sext_zext: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: subw a0, a0, a1 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret %1 = sub i32 %a, %b ret i32 %1 @@ -611,7 +611,7 @@ define zeroext i32 @zext_subw_zext_aext(i32 zeroext %a, i32 %b) nounwind { ; RV64ZBA-LABEL: zext_subw_zext_aext: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: subw a0, a0, a1 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret %1 = sub i32 %a, %b ret i32 %1 @@ -628,7 +628,7 @@ define zeroext i32 @zext_subw_zext_sext(i32 zeroext %a, i32 signext %b) nounwind ; RV64ZBA-LABEL: zext_subw_zext_sext: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: subw a0, a0, a1 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret %1 = sub i32 %a, %b ret i32 %1 @@ -645,7 +645,7 @@ define zeroext i32 @zext_subw_zext_zext(i32 zeroext %a, i32 zeroext %b) nounwind ; RV64ZBA-LABEL: zext_subw_zext_zext: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: subw a0, a0, a1 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret %1 = sub i32 %a, %b ret i32 %1 @@ -828,7 +828,7 @@ define zeroext i32 @zext_sllw_aext_aext(i32 %a, i32 %b) nounwind { ; RV64ZBA-LABEL: zext_sllw_aext_aext: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: sllw a0, a0, a1 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret %1 = shl i32 %a, %b ret i32 %1 @@ -845,7 +845,7 @@ define zeroext i32 @zext_sllw_aext_sext(i32 %a, i32 signext %b) nounwind { ; RV64ZBA-LABEL: zext_sllw_aext_sext: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: sllw a0, a0, a1 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret %1 = shl i32 %a, %b ret i32 %1 @@ -862,7 +862,7 @@ define zeroext i32 @zext_sllw_aext_zext(i32 %a, i32 zeroext %b) nounwind { ; RV64ZBA-LABEL: zext_sllw_aext_zext: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: sllw a0, a0, a1 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret %1 = shl i32 %a, %b ret i32 %1 @@ -879,7 +879,7 @@ define zeroext i32 @zext_sllw_sext_aext(i32 signext %a, i32 %b) nounwind { ; RV64ZBA-LABEL: zext_sllw_sext_aext: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: sllw a0, a0, a1 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret %1 = shl i32 %a, %b ret i32 %1 @@ -896,7 +896,7 @@ define zeroext i32 @zext_sllw_sext_sext(i32 signext %a, i32 signext %b) nounwind ; RV64ZBA-LABEL: zext_sllw_sext_sext: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: sllw a0, a0, a1 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret %1 = shl i32 %a, %b ret i32 %1 @@ -913,7 +913,7 @@ define zeroext i32 @zext_sllw_sext_zext(i32 signext %a, i32 zeroext %b) nounwind ; RV64ZBA-LABEL: zext_sllw_sext_zext: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: sllw a0, a0, a1 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret %1 = shl i32 %a, %b ret i32 %1 @@ -930,7 +930,7 @@ define zeroext i32 @zext_sllw_zext_aext(i32 zeroext %a, i32 %b) nounwind { ; RV64ZBA-LABEL: zext_sllw_zext_aext: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: sllw a0, a0, a1 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret %1 = shl i32 %a, %b ret i32 %1 @@ -947,7 +947,7 @@ define zeroext i32 @zext_sllw_zext_sext(i32 zeroext %a, i32 signext %b) nounwind ; RV64ZBA-LABEL: zext_sllw_zext_sext: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: sllw a0, a0, a1 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret %1 = shl i32 %a, %b ret i32 %1 @@ -964,7 +964,7 @@ define zeroext i32 @zext_sllw_zext_zext(i32 zeroext %a, i32 zeroext %b) nounwind ; RV64ZBA-LABEL: zext_sllw_zext_zext: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: sllw a0, a0, a1 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret %1 = shl i32 %a, %b ret i32 %1 @@ -1143,7 +1143,7 @@ define zeroext i32 @zext_srlw_aext_aext(i32 %a, i32 %b) nounwind { ; RV64ZBA-LABEL: zext_srlw_aext_aext: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: srlw a0, a0, a1 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret %1 = lshr i32 %a, %b ret i32 %1 @@ -1160,7 +1160,7 @@ define zeroext i32 @zext_srlw_aext_sext(i32 %a, i32 signext %b) nounwind { ; RV64ZBA-LABEL: zext_srlw_aext_sext: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: srlw a0, a0, a1 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret %1 = lshr i32 %a, %b ret i32 %1 @@ -1177,7 +1177,7 @@ define zeroext i32 @zext_srlw_aext_zext(i32 %a, i32 zeroext %b) nounwind { ; RV64ZBA-LABEL: zext_srlw_aext_zext: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: srlw a0, a0, a1 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret %1 = lshr i32 %a, %b ret i32 %1 @@ -1194,7 +1194,7 @@ define zeroext i32 @zext_srlw_sext_aext(i32 signext %a, i32 %b) nounwind { ; RV64ZBA-LABEL: zext_srlw_sext_aext: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: srlw a0, a0, a1 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret %1 = lshr i32 %a, %b ret i32 %1 @@ -1211,7 +1211,7 @@ define zeroext i32 @zext_srlw_sext_sext(i32 signext %a, i32 signext %b) nounwind ; RV64ZBA-LABEL: zext_srlw_sext_sext: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: srlw a0, a0, a1 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret %1 = lshr i32 %a, %b ret i32 %1 @@ -1228,7 +1228,7 @@ define zeroext i32 @zext_srlw_sext_zext(i32 signext %a, i32 zeroext %b) nounwind ; RV64ZBA-LABEL: zext_srlw_sext_zext: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: srlw a0, a0, a1 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret %1 = lshr i32 %a, %b ret i32 %1 @@ -1245,7 +1245,7 @@ define zeroext i32 @zext_srlw_zext_aext(i32 zeroext %a, i32 %b) nounwind { ; RV64ZBA-LABEL: zext_srlw_zext_aext: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: srlw a0, a0, a1 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret %1 = lshr i32 %a, %b ret i32 %1 @@ -1262,7 +1262,7 @@ define zeroext i32 @zext_srlw_zext_sext(i32 zeroext %a, i32 signext %b) nounwind ; RV64ZBA-LABEL: zext_srlw_zext_sext: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: srlw a0, a0, a1 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret %1 = lshr i32 %a, %b ret i32 %1 @@ -1279,7 +1279,7 @@ define zeroext i32 @zext_srlw_zext_zext(i32 zeroext %a, i32 zeroext %b) nounwind ; RV64ZBA-LABEL: zext_srlw_zext_zext: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: srlw a0, a0, a1 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret %1 = lshr i32 %a, %b ret i32 %1 @@ -1458,7 +1458,7 @@ define zeroext i32 @zext_sraw_aext_aext(i32 %a, i32 %b) nounwind { ; RV64ZBA-LABEL: zext_sraw_aext_aext: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: sraw a0, a0, a1 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret %1 = ashr i32 %a, %b ret i32 %1 @@ -1475,7 +1475,7 @@ define zeroext i32 @zext_sraw_aext_sext(i32 %a, i32 signext %b) nounwind { ; RV64ZBA-LABEL: zext_sraw_aext_sext: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: sraw a0, a0, a1 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret %1 = ashr i32 %a, %b ret i32 %1 @@ -1492,7 +1492,7 @@ define zeroext i32 @zext_sraw_aext_zext(i32 %a, i32 zeroext %b) nounwind { ; RV64ZBA-LABEL: zext_sraw_aext_zext: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: sraw a0, a0, a1 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret %1 = ashr i32 %a, %b ret i32 %1 @@ -1509,7 +1509,7 @@ define zeroext i32 @zext_sraw_sext_aext(i32 signext %a, i32 %b) nounwind { ; RV64ZBA-LABEL: zext_sraw_sext_aext: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: sraw a0, a0, a1 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret %1 = ashr i32 %a, %b ret i32 %1 @@ -1526,7 +1526,7 @@ define zeroext i32 @zext_sraw_sext_sext(i32 signext %a, i32 signext %b) nounwind ; RV64ZBA-LABEL: zext_sraw_sext_sext: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: sraw a0, a0, a1 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret %1 = ashr i32 %a, %b ret i32 %1 @@ -1543,7 +1543,7 @@ define zeroext i32 @zext_sraw_sext_zext(i32 signext %a, i32 zeroext %b) nounwind ; RV64ZBA-LABEL: zext_sraw_sext_zext: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: sraw a0, a0, a1 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret %1 = ashr i32 %a, %b ret i32 %1 @@ -1560,7 +1560,7 @@ define zeroext i32 @zext_sraw_zext_aext(i32 zeroext %a, i32 %b) nounwind { ; RV64ZBA-LABEL: zext_sraw_zext_aext: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: sraw a0, a0, a1 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret %1 = ashr i32 %a, %b ret i32 %1 @@ -1577,7 +1577,7 @@ define zeroext i32 @zext_sraw_zext_sext(i32 zeroext %a, i32 signext %b) nounwind ; RV64ZBA-LABEL: zext_sraw_zext_sext: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: sraw a0, a0, a1 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret %1 = ashr i32 %a, %b ret i32 %1 @@ -1594,7 +1594,7 @@ define zeroext i32 @zext_sraw_zext_zext(i32 zeroext %a, i32 zeroext %b) nounwind ; RV64ZBA-LABEL: zext_sraw_zext_zext: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: sraw a0, a0, a1 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret %1 = ashr i32 %a, %b ret i32 %1 @@ -1667,7 +1667,7 @@ define zeroext i32 @zext_addiw_aext(i32 %a) nounwind { ; RV64ZBA-LABEL: zext_addiw_aext: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: addi a0, a0, 7 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret %1 = add i32 %a, 7 ret i32 %1 @@ -1684,7 +1684,7 @@ define zeroext i32 @zext_addiw_sext(i32 signext %a) nounwind { ; RV64ZBA-LABEL: zext_addiw_sext: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: addi a0, a0, 8 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret %1 = add i32 %a, 8 ret i32 %1 @@ -1701,7 +1701,7 @@ define zeroext i32 @zext_addiw_zext(i32 zeroext %a) nounwind { ; RV64ZBA-LABEL: zext_addiw_zext: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: addi a0, a0, 9 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret %1 = add i32 %a, 9 ret i32 %1 @@ -1944,7 +1944,7 @@ define zeroext i32 @zext_sraiw_aext(i32 %a) nounwind { ; RV64ZBA-LABEL: zext_sraiw_aext: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: sraiw a0, a0, 7 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret %1 = ashr i32 %a, 7 ret i32 %1 @@ -1971,7 +1971,7 @@ define zeroext i32 @zext_sraiw_zext(i32 zeroext %a) nounwind { ; RV64ZBA-LABEL: zext_sraiw_zext: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: sraiw a0, a0, 9 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret %1 = ashr i32 %a, 9 ret i32 %1 diff --git a/llvm/test/CodeGen/RISCV/rv64zba.ll b/llvm/test/CodeGen/RISCV/rv64zba.ll index 9760821832b37..f8134c0adf5b7 100644 --- a/llvm/test/CodeGen/RISCV/rv64zba.ll +++ b/llvm/test/CodeGen/RISCV/rv64zba.ll @@ -93,7 +93,7 @@ define i64 @zextw_i64(i64 %a) nounwind { ; ; RV64ZBA-LABEL: zextw_i64: ; RV64ZBA: # %bb.0: -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret %and = and i64 %a, 4294967295 ret i64 %and @@ -112,7 +112,7 @@ define i64 @zextw_demandedbits_i64(i64 %0) { ; RV64ZBA-LABEL: zextw_demandedbits_i64: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: ori a0, a0, 1 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret %2 = and i64 %0, 4294967294 %3 = or i64 %2, 1 @@ -1177,7 +1177,7 @@ define i64 @adduw_imm(i32 signext %0) nounwind { ; ; RV64ZBA-LABEL: adduw_imm: ; RV64ZBA: # %bb.0: -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: addi a0, a0, 5 ; RV64ZBA-NEXT: ret %a = zext i32 %0 to i64 @@ -1244,7 +1244,7 @@ define i64 @imm_zextw() nounwind { ; RV64ZBA-LABEL: imm_zextw: ; RV64ZBA: # %bb.0: ; RV64ZBA-NEXT: li a0, -2 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret ret i64 4294967294 ; -2 in 32 bits. } @@ -3230,7 +3230,7 @@ define i64 @add_u32simm32_zextw(i64 %x) nounwind { ; RV64ZBA-LABEL: add_u32simm32_zextw: ; RV64ZBA: # %bb.0: # %entry ; RV64ZBA-NEXT: addi a0, a0, -2 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: ret entry: %add = add i64 %x, 4294967294 diff --git a/llvm/test/CodeGen/RISCV/select-and.ll b/llvm/test/CodeGen/RISCV/select-and.ll index d305993f0e966..e35fcf3d3035e 100644 --- a/llvm/test/CodeGen/RISCV/select-and.ll +++ b/llvm/test/CodeGen/RISCV/select-and.ll @@ -3,6 +3,8 @@ ; RUN: | FileCheck -check-prefix=RV32I %s ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV64I %s +; RUN: llc -mtriple=riscv64 -mattr=+xmipscmov -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64I-CCMOV %s ;; There are a few different ways to lower (select (and A, B), X, Y). This test ;; ensures that we do so with as few branches as possible. @@ -27,6 +29,12 @@ define signext i32 @select_of_and(i1 zeroext %a, i1 zeroext %b, i32 signext %c, ; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: .LBB0_2: ; RV64I-NEXT: ret +; +; RV64I-CCMOV-LABEL: select_of_and: +; RV64I-CCMOV: # %bb.0: +; RV64I-CCMOV-NEXT: and a0, a0, a1 +; RV64I-CCMOV-NEXT: ccmov a0, a0, a2, a3 +; RV64I-CCMOV-NEXT: ret %1 = and i1 %a, %b %2 = select i1 %1, i32 %c, i32 %d ret i32 %2 @@ -69,6 +77,23 @@ define signext i32 @if_of_and(i1 zeroext %a, i1 zeroext %b) nounwind { ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; RV64I-CCMOV-LABEL: if_of_and: +; RV64I-CCMOV: # %bb.0: +; RV64I-CCMOV-NEXT: addi sp, sp, -16 +; RV64I-CCMOV-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-CCMOV-NEXT: beqz a0, .LBB1_3 +; RV64I-CCMOV-NEXT: # %bb.1: +; RV64I-CCMOV-NEXT: beqz a1, .LBB1_3 +; RV64I-CCMOV-NEXT: # %bb.2: # %if.then +; RV64I-CCMOV-NEXT: call both +; RV64I-CCMOV-NEXT: j .LBB1_4 +; RV64I-CCMOV-NEXT: .LBB1_3: # %if.else +; RV64I-CCMOV-NEXT: call neither +; RV64I-CCMOV-NEXT: .LBB1_4: # %if.end +; RV64I-CCMOV-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-CCMOV-NEXT: addi sp, sp, 16 +; RV64I-CCMOV-NEXT: ret %1 = and i1 %a, %b br i1 %1, label %if.then, label %if.else diff --git a/llvm/test/CodeGen/RISCV/select-bare.ll b/llvm/test/CodeGen/RISCV/select-bare.ll index cf8fe96742bfb..29acf8d347f10 100644 --- a/llvm/test/CodeGen/RISCV/select-bare.ll +++ b/llvm/test/CodeGen/RISCV/select-bare.ll @@ -1,6 +1,8 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ ; RUN: | FileCheck %s -check-prefix=RV32I +; RUN: llc -mtriple=riscv64 -mattr=+xmipscmov -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64I-CCMOV %s define i32 @bare_select(i1 %a, i32 %b, i32 %c) nounwind { ; RV32I-LABEL: bare_select: @@ -12,6 +14,12 @@ define i32 @bare_select(i1 %a, i32 %b, i32 %c) nounwind { ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: .LBB0_2: ; RV32I-NEXT: ret +; +; RV64I-CCMOV-LABEL: bare_select: +; RV64I-CCMOV: # %bb.0: +; RV64I-CCMOV-NEXT: andi a0, a0, 1 +; RV64I-CCMOV-NEXT: ccmov a0, a0, a1, a2 +; RV64I-CCMOV-NEXT: ret %1 = select i1 %a, i32 %b, i32 %c ret i32 %1 } @@ -26,6 +34,12 @@ define float @bare_select_float(i1 %a, float %b, float %c) nounwind { ; RV32I-NEXT: mv a0, a2 ; RV32I-NEXT: .LBB1_2: ; RV32I-NEXT: ret +; +; RV64I-CCMOV-LABEL: bare_select_float: +; RV64I-CCMOV: # %bb.0: +; RV64I-CCMOV-NEXT: andi a0, a0, 1 +; RV64I-CCMOV-NEXT: ccmov a0, a0, a1, a2 +; RV64I-CCMOV-NEXT: ret %1 = select i1 %a, float %b, float %c ret float %1 } diff --git a/llvm/test/CodeGen/RISCV/select-cc.ll b/llvm/test/CodeGen/RISCV/select-cc.ll index 31e25702da8ba..11f17831fd17e 100644 --- a/llvm/test/CodeGen/RISCV/select-cc.ll +++ b/llvm/test/CodeGen/RISCV/select-cc.ll @@ -3,6 +3,8 @@ ; RUN: | FileCheck -check-prefixes=RV32I %s ; RUN: llc -mtriple=riscv64 -disable-block-placement -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefixes=RV64I %s +; RUN: llc -mtriple=riscv64 -mattr=+xmipscmov -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64I-CCMOV %s define signext i32 @foo(i32 signext %a, ptr %b) nounwind { ; RV32I-LABEL: foo: @@ -156,6 +158,57 @@ define signext i32 @foo(i32 signext %a, ptr %b) nounwind { ; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: .LBB0_28: ; RV64I-NEXT: ret +; +; RV64I-CCMOV-LABEL: foo: +; RV64I-CCMOV: # %bb.0: +; RV64I-CCMOV-NEXT: lw a2, 0(a1) +; RV64I-CCMOV-NEXT: lw a3, 0(a1) +; RV64I-CCMOV-NEXT: lw a4, 0(a1) +; RV64I-CCMOV-NEXT: lw a5, 0(a1) +; RV64I-CCMOV-NEXT: xor a6, a0, a2 +; RV64I-CCMOV-NEXT: ccmov a0, a6, a2, a0 +; RV64I-CCMOV-NEXT: xor a2, a0, a3 +; RV64I-CCMOV-NEXT: ccmov a0, a2, a0, a3 +; RV64I-CCMOV-NEXT: lw a2, 0(a1) +; RV64I-CCMOV-NEXT: sltu a3, a4, a0 +; RV64I-CCMOV-NEXT: ccmov a0, a3, a0, a4 +; RV64I-CCMOV-NEXT: lw a3, 0(a1) +; RV64I-CCMOV-NEXT: sltu a4, a0, a5 +; RV64I-CCMOV-NEXT: ccmov a0, a4, a5, a0 +; RV64I-CCMOV-NEXT: lw a4, 0(a1) +; RV64I-CCMOV-NEXT: sltu a5, a0, a2 +; RV64I-CCMOV-NEXT: ccmov a0, a5, a0, a2 +; RV64I-CCMOV-NEXT: lw a2, 0(a1) +; RV64I-CCMOV-NEXT: sltu a5, a3, a0 +; RV64I-CCMOV-NEXT: ccmov a0, a5, a3, a0 +; RV64I-CCMOV-NEXT: lw a3, 0(a1) +; RV64I-CCMOV-NEXT: sext.w a5, a0 +; RV64I-CCMOV-NEXT: slt a5, a4, a5 +; RV64I-CCMOV-NEXT: ccmov a0, a5, a0, a4 +; RV64I-CCMOV-NEXT: lw a4, 0(a1) +; RV64I-CCMOV-NEXT: sext.w a5, a0 +; RV64I-CCMOV-NEXT: slt a5, a5, a2 +; RV64I-CCMOV-NEXT: ccmov a0, a5, a2, a0 +; RV64I-CCMOV-NEXT: lw a2, 0(a1) +; RV64I-CCMOV-NEXT: sext.w a5, a0 +; RV64I-CCMOV-NEXT: slt a5, a5, a3 +; RV64I-CCMOV-NEXT: ccmov a0, a5, a0, a3 +; RV64I-CCMOV-NEXT: lw a3, 0(a1) +; RV64I-CCMOV-NEXT: sext.w a5, a0 +; RV64I-CCMOV-NEXT: slt a5, a4, a5 +; RV64I-CCMOV-NEXT: ccmov a0, a5, a4, a0 +; RV64I-CCMOV-NEXT: lw a4, 0(a1) +; RV64I-CCMOV-NEXT: slti a5, a2, 1 +; RV64I-CCMOV-NEXT: ccmov a0, a5, a0, a2 +; RV64I-CCMOV-NEXT: slti a5, a2, 0 +; RV64I-CCMOV-NEXT: ccmov a0, a5, a3, a0 +; RV64I-CCMOV-NEXT: lw a1, 0(a1) +; RV64I-CCMOV-NEXT: slti a3, a4, 1025 +; RV64I-CCMOV-NEXT: ccmov a0, a3, a4, a0 +; RV64I-CCMOV-NEXT: sltiu a2, a2, 2047 +; RV64I-CCMOV-NEXT: ccmov a0, a2, a1, a0 +; RV64I-CCMOV-NEXT: sext.w a0, a0 +; RV64I-CCMOV-NEXT: ret %val1 = load volatile i32, ptr %b %tst1 = icmp eq i32 %a, %val1 %val2 = select i1 %tst1, i32 %a, i32 %val1 @@ -258,6 +311,23 @@ define signext i16 @numsignbits(i16 signext %0, i16 signext %1, i16 signext %2, ; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; RV64I-CCMOV-LABEL: numsignbits: +; RV64I-CCMOV: # %bb.0: +; RV64I-CCMOV-NEXT: addi sp, sp, -16 +; RV64I-CCMOV-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-CCMOV-NEXT: sd s0, 0(sp) # 8-byte Folded Spill +; RV64I-CCMOV-NEXT: ccmov s0, a0, a2, a3 +; RV64I-CCMOV-NEXT: beqz a1, .LBB1_2 +; RV64I-CCMOV-NEXT: # %bb.1: +; RV64I-CCMOV-NEXT: mv a0, s0 +; RV64I-CCMOV-NEXT: call bar +; RV64I-CCMOV-NEXT: .LBB1_2: +; RV64I-CCMOV-NEXT: mv a0, s0 +; RV64I-CCMOV-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-CCMOV-NEXT: ld s0, 0(sp) # 8-byte Folded Reload +; RV64I-CCMOV-NEXT: addi sp, sp, 16 +; RV64I-CCMOV-NEXT: ret %5 = icmp eq i16 %0, 0 %6 = select i1 %5, i16 %3, i16 %2 %7 = icmp eq i16 %1, 0 @@ -295,6 +365,14 @@ define i32 @select_sge_int16min(i32 signext %x, i32 signext %y, i32 signext %z) ; RV64I-NEXT: .LBB2_2: ; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ret +; +; RV64I-CCMOV-LABEL: select_sge_int16min: +; RV64I-CCMOV: # %bb.0: +; RV64I-CCMOV-NEXT: lui a3, 1048560 +; RV64I-CCMOV-NEXT: addiw a3, a3, -1 +; RV64I-CCMOV-NEXT: slt a0, a3, a0 +; RV64I-CCMOV-NEXT: ccmov a0, a0, a1, a2 +; RV64I-CCMOV-NEXT: ret %a = icmp sge i32 %x, -65536 %b = select i1 %a, i32 %y, i32 %z ret i32 %b @@ -331,6 +409,14 @@ define i64 @select_sge_int32min(i64 %x, i64 %y, i64 %z) { ; RV64I-NEXT: .LBB3_2: ; RV64I-NEXT: mv a0, a1 ; RV64I-NEXT: ret +; +; RV64I-CCMOV-LABEL: select_sge_int32min: +; RV64I-CCMOV: # %bb.0: +; RV64I-CCMOV-NEXT: lui a3, 524288 +; RV64I-CCMOV-NEXT: addi a3, a3, -1 +; RV64I-CCMOV-NEXT: slt a0, a3, a0 +; RV64I-CCMOV-NEXT: ccmov a0, a0, a1, a2 +; RV64I-CCMOV-NEXT: ret %a = icmp sge i64 %x, -2147483648 %b = select i1 %a, i64 %y, i64 %z ret i64 %b diff --git a/llvm/test/CodeGen/RISCV/select-or.ll b/llvm/test/CodeGen/RISCV/select-or.ll index 20a5ec15290cd..924bb7ff9f97e 100644 --- a/llvm/test/CodeGen/RISCV/select-or.ll +++ b/llvm/test/CodeGen/RISCV/select-or.ll @@ -3,6 +3,8 @@ ; RUN: | FileCheck -check-prefix=RV32I %s ; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ ; RUN: | FileCheck -check-prefix=RV64I %s +; RUN: llc -mtriple=riscv64 -mattr=+xmipscmov -verify-machineinstrs < %s \ +; RUN: | FileCheck -check-prefix=RV64I-CCMOV %s ;; There are a few different ways to lower (select (or A, B), X, Y). This test ;; ensures that we do so with as few branches as possible. @@ -27,6 +29,12 @@ define signext i32 @select_of_or(i1 zeroext %a, i1 zeroext %b, i32 signext %c, i ; RV64I-NEXT: mv a0, a3 ; RV64I-NEXT: .LBB0_2: ; RV64I-NEXT: ret +; +; RV64I-CCMOV-LABEL: select_of_or: +; RV64I-CCMOV: # %bb.0: +; RV64I-CCMOV-NEXT: or a0, a0, a1 +; RV64I-CCMOV-NEXT: ccmov a0, a0, a2, a3 +; RV64I-CCMOV-NEXT: ret %1 = or i1 %a, %b %2 = select i1 %1, i32 %c, i32 %d ret i32 %2 @@ -69,6 +77,23 @@ define signext i32 @if_of_or(i1 zeroext %a, i1 zeroext %b) nounwind { ; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload ; RV64I-NEXT: addi sp, sp, 16 ; RV64I-NEXT: ret +; +; RV64I-CCMOV-LABEL: if_of_or: +; RV64I-CCMOV: # %bb.0: +; RV64I-CCMOV-NEXT: addi sp, sp, -16 +; RV64I-CCMOV-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64I-CCMOV-NEXT: bnez a0, .LBB1_3 +; RV64I-CCMOV-NEXT: # %bb.1: +; RV64I-CCMOV-NEXT: bnez a1, .LBB1_3 +; RV64I-CCMOV-NEXT: # %bb.2: # %if.else +; RV64I-CCMOV-NEXT: call neither +; RV64I-CCMOV-NEXT: j .LBB1_4 +; RV64I-CCMOV-NEXT: .LBB1_3: # %if.then +; RV64I-CCMOV-NEXT: call either +; RV64I-CCMOV-NEXT: .LBB1_4: # %if.end +; RV64I-CCMOV-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64I-CCMOV-NEXT: addi sp, sp, 16 +; RV64I-CCMOV-NEXT: ret %1 = or i1 %a, %b br i1 %1, label %if.then, label %if.else diff --git a/llvm/test/CodeGen/RISCV/xaluo.ll b/llvm/test/CodeGen/RISCV/xaluo.ll index c0cbbb3ff9389..cadaf71fbcda6 100644 --- a/llvm/test/CodeGen/RISCV/xaluo.ll +++ b/llvm/test/CodeGen/RISCV/xaluo.ll @@ -1690,8 +1690,8 @@ define zeroext i1 @umulo.i32(i32 signext %v1, i32 signext %v2, ptr %res) { ; ; RV64ZBA-LABEL: umulo.i32: ; RV64ZBA: # %bb.0: # %entry -; RV64ZBA-NEXT: zext.w a1, a1 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a1, a1, zero +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: mul a1, a0, a1 ; RV64ZBA-NEXT: srli a0, a1, 32 ; RV64ZBA-NEXT: snez a0, a0 @@ -1759,7 +1759,7 @@ define zeroext i1 @umulo2.i32(i32 signext %v1, ptr %res) { ; ; RV64ZBA-LABEL: umulo2.i32: ; RV64ZBA: # %bb.0: # %entry -; RV64ZBA-NEXT: zext.w a2, a0 +; RV64ZBA-NEXT: add.uw a2, a0, zero ; RV64ZBA-NEXT: sh1add.uw a0, a0, a2 ; RV64ZBA-NEXT: sh2add a2, a0, a2 ; RV64ZBA-NEXT: srli a0, a2, 32 @@ -1828,8 +1828,8 @@ define signext i32 @umulo3.i32(i32 signext %0, i32 signext %1, ptr %2) { ; ; RV64ZBA-LABEL: umulo3.i32: ; RV64ZBA: # %bb.0: -; RV64ZBA-NEXT: zext.w a1, a1 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a1, a1, zero +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: mul a3, a0, a1 ; RV64ZBA-NEXT: srli a3, a3, 32 ; RV64ZBA-NEXT: snez a3, a3 @@ -3748,8 +3748,8 @@ define i32 @umulo.select.i32(i32 signext %v1, i32 signext %v2) { ; ; RV64ZBA-LABEL: umulo.select.i32: ; RV64ZBA: # %bb.0: # %entry -; RV64ZBA-NEXT: zext.w a2, a1 -; RV64ZBA-NEXT: zext.w a3, a0 +; RV64ZBA-NEXT: add.uw a2, a1, zero +; RV64ZBA-NEXT: add.uw a3, a0, zero ; RV64ZBA-NEXT: mul a2, a3, a2 ; RV64ZBA-NEXT: srli a2, a2, 32 ; RV64ZBA-NEXT: bnez a2, .LBB48_2 @@ -3807,8 +3807,8 @@ define i1 @umulo.not.i32(i32 signext %v1, i32 signext %v2) { ; ; RV64ZBA-LABEL: umulo.not.i32: ; RV64ZBA: # %bb.0: # %entry -; RV64ZBA-NEXT: zext.w a1, a1 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a1, a1, zero +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: mul a0, a0, a1 ; RV64ZBA-NEXT: srli a0, a0, 32 ; RV64ZBA-NEXT: seqz a0, a0 @@ -5323,8 +5323,8 @@ define zeroext i1 @umulo.br.i32(i32 signext %v1, i32 signext %v2) { ; ; RV64ZBA-LABEL: umulo.br.i32: ; RV64ZBA: # %bb.0: # %entry -; RV64ZBA-NEXT: zext.w a1, a1 -; RV64ZBA-NEXT: zext.w a0, a0 +; RV64ZBA-NEXT: add.uw a1, a1, zero +; RV64ZBA-NEXT: add.uw a0, a0, zero ; RV64ZBA-NEXT: mul a0, a0, a1 ; RV64ZBA-NEXT: srli a0, a0, 32 ; RV64ZBA-NEXT: beqz a0, .LBB63_2 diff --git a/llvm/test/CodeGen/RISCV/zcb-regalloc-hints.ll b/llvm/test/CodeGen/RISCV/zcb-regalloc-hints.ll index 545d6c6aca041..639252428bb54 100644 --- a/llvm/test/CodeGen/RISCV/zcb-regalloc-hints.ll +++ b/llvm/test/CodeGen/RISCV/zcb-regalloc-hints.ll @@ -76,7 +76,7 @@ define i64 @c_zext_h(i64 %x, i16 %y) { define i64 @c_zext_w(i64 %x, i32 %y) { ; CHECK-LABEL: c_zext_w: ; CHECK: # %bb.0: -; CHECK-NEXT: zext.w a1, a1 +; CHECK-NEXT: add.uw a1, a1, zero ; CHECK-NEXT: li a0, 1234 ; CHECK-NEXT: mul a0, a0, a1 ; CHECK-NEXT: ret diff --git a/llvm/test/MC/RISCV/rv64zba-aliases-valid.s b/llvm/test/MC/RISCV/rv64zba-aliases-valid.s index bb8eeb41dde75..1397f0aeacb83 100644 --- a/llvm/test/MC/RISCV/rv64zba-aliases-valid.s +++ b/llvm/test/MC/RISCV/rv64zba-aliases-valid.s @@ -16,13 +16,13 @@ # aliases disabled # CHECK-S-OBJ-NOALIAS: add.uw t0, t1, zero -# CHECK-S-OBJ: zext.w t0, t1 +# CHECK-S-OBJ: add.uw t0, t1, zero zext.w x5, x6 # CHECK-S-OBJ-NOALIAS: addi t1, zero, -2 # CHECK-S-OBJ-NOALIAS-NEXT: add.uw t1, t1, zero # CHECK-S-OBJ: li t1, -2 -# CHECK-S-OBJ-NEXT: zext.w t1, t1 +# CHECK-S-OBJ-NEXT: add.uw t1, t1, zero li x6, 0xfffffffe # CHECK-S-OBJ-NOALIAS: lui t0, 768955