From ffbfd68d91740acc0171564f2c6fa3a0cfe10328 Mon Sep 17 00:00:00 2001 From: Alex Maclean Date: Tue, 28 Jan 2025 00:30:16 +0000 Subject: [PATCH 1/2] auto-update cc ptx_kernel --- llvm/include/llvm/IR/AutoUpgrade.h | 4 ++ llvm/lib/AsmParser/LLParser.cpp | 1 + llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 2 + llvm/lib/IR/AutoUpgrade.cpp | 67 +++++++++++++++++++ llvm/lib/Linker/IRMover.cpp | 1 + llvm/lib/Target/NVPTX/NVPTXUtilities.cpp | 27 +++----- llvm/lib/Target/NVPTX/NVPTXUtilities.h | 7 +- .../CodeGen/NVPTX/upgrade-nvvm-annotations.ll | 28 ++++++++ 8 files changed, 118 insertions(+), 19 deletions(-) create mode 100644 llvm/test/CodeGen/NVPTX/upgrade-nvvm-annotations.ll diff --git a/llvm/include/llvm/IR/AutoUpgrade.h b/llvm/include/llvm/IR/AutoUpgrade.h index 97c3e4d7589d7..8c093568a1e03 100644 --- a/llvm/include/llvm/IR/AutoUpgrade.h +++ b/llvm/include/llvm/IR/AutoUpgrade.h @@ -61,6 +61,10 @@ namespace llvm { /// module is modified. bool UpgradeModuleFlags(Module &M); + /// Convert legacy nvvm.annotations metadata to appropriate function + /// attributes. + void UpgradeNVVMAnnotations(Module &M); + /// Convert calls to ARC runtime functions to intrinsic calls and upgrade the /// old retain release marker to new module flag format. void UpgradeARCRuntime(Module &M); diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp index e842a8b2e3797..a1f79926fcc99 100644 --- a/llvm/lib/AsmParser/LLParser.cpp +++ b/llvm/lib/AsmParser/LLParser.cpp @@ -448,6 +448,7 @@ bool LLParser::validateEndOfModule(bool UpgradeDebugInfo) { llvm::UpgradeDebugInfo(*M); UpgradeModuleFlags(*M); + UpgradeNVVMAnnotations(*M); UpgradeSectionAttributes(*M); if (PreserveInputDbgFormat != cl::boolOrDefault::BOU_TRUE) diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index e16e8a0f4703f..1a09e80c4fbb2 100644 --- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -7157,6 +7157,8 @@ Error BitcodeReader::materializeModule() { UpgradeModuleFlags(*TheModule); + UpgradeNVVMAnnotations(*TheModule); + UpgradeARCRuntime(*TheModule); return Error::success(); diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index 3725f412b8930..e886a6012b219 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/StringSwitch.h" #include "llvm/BinaryFormat/Dwarf.h" #include "llvm/IR/AttributeMask.h" +#include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugInfoMetadata.h" @@ -5019,6 +5020,72 @@ bool llvm::UpgradeDebugInfo(Module &M) { return Modified; } +bool static upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, + const Metadata *V) { + if (K == "kernel") { + if (!mdconst::extract(V)->isZero()) + cast(GV)->setCallingConv(CallingConv::PTX_Kernel); + return true; + } + if (K == "align") { + // V is a bitfeild specifying two 16-bit values. The alignment value is + // specfied in low 16-bits, The index is specified in the high bits. For the + // index, 0 indicates the return value while higher values correspond to + // each parameter (idx = param + 1). + const uint64_t AlignIdxValuePair = + mdconst::extract(V)->getZExtValue(); + const unsigned Idx = (AlignIdxValuePair >> 16); + const Align StackAlign = Align(AlignIdxValuePair & 0xFFFF); + // TODO: Skip adding the stackalign attribute for returns, for now. + if (!Idx) + return false; + cast(GV)->addAttributeAtIndex( + Idx, Attribute::getWithStackAlignment(GV->getContext(), StackAlign)); + return true; + } + + return false; +} + +void llvm::UpgradeNVVMAnnotations(Module &M) { + NamedMDNode *NamedMD = M.getNamedMetadata("nvvm.annotations"); + if (!NamedMD) + return; + + SmallVector NewNodes; + SmallSet SeenNodes; + for (MDNode *MD : NamedMD->operands()) { + if (!SeenNodes.insert(MD).second) + continue; + + auto *GV = mdconst::dyn_extract_or_null(MD->getOperand(0)); + if (!GV) + continue; + + assert((MD->getNumOperands() % 2) == 1 && "Invalid number of operands"); + + SmallVector NewOperands{MD->getOperand(0)}; + // Each nvvm.annotations metadata entry will be of the following form: + // !{ ptr @gv, !"key1", value1, !"key2", value2, ... } + // start index = 1, to skip the global variable key + // increment = 2, to skip the value for each property-value pairs + for (unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) { + MDString *K = cast(MD->getOperand(j)); + const MDOperand &V = MD->getOperand(j + 1); + bool Upgraded = upgradeSingleNVVMAnnotation(GV, K->getString(), V); + if (!Upgraded) + NewOperands.append({K, V}); + } + + if (NewOperands.size() > 1) + NewNodes.push_back(MDNode::get(M.getContext(), NewOperands)); + } + + NamedMD->clearOperands(); + for (MDNode *N : NewNodes) + NamedMD->addOperand(N); +} + /// This checks for objc retain release marker which should be upgraded. It /// returns true if module is modified. static bool upgradeRetainReleaseMarker(Module &M) { diff --git a/llvm/lib/Linker/IRMover.cpp b/llvm/lib/Linker/IRMover.cpp index 43fcfe75ba46b..62e2af4da57bb 100644 --- a/llvm/lib/Linker/IRMover.cpp +++ b/llvm/lib/Linker/IRMover.cpp @@ -1244,6 +1244,7 @@ Error IRLinker::linkModuleFlagsMetadata() { // Check for module flag for updates before do anything. UpgradeModuleFlags(*SrcM); + UpgradeNVVMAnnotations(*SrcM); // If the destination module doesn't have module flags yet, then just copy // over the source module's flags. diff --git a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp index 0f2bec711b249..a41943880807c 100644 --- a/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp +++ b/llvm/lib/Target/NVPTX/NVPTXUtilities.cpp @@ -310,30 +310,21 @@ std::optional getMaxNReg(const Function &F) { return findOneNVVMAnnotation(&F, "maxnreg"); } -bool isKernelFunction(const Function &F) { - if (F.getCallingConv() == CallingConv::PTX_Kernel) - return true; - - if (const auto X = findOneNVVMAnnotation(&F, "kernel")) - return (*X == 1); - - return false; -} - MaybeAlign getAlign(const Function &F, unsigned Index) { // First check the alignstack metadata if (MaybeAlign StackAlign = F.getAttributes().getAttributes(Index).getStackAlignment()) return StackAlign; - // If that is missing, check the legacy nvvm metadata - std::vector Vs; - bool retval = findAllNVVMAnnotation(&F, "align", Vs); - if (!retval) - return std::nullopt; - for (unsigned V : Vs) - if ((V >> 16) == Index) - return Align(V & 0xFFFF); + // check the legacy nvvm metadata only for the return value since llvm does + // not support stackalign attribute for this. + if (Index == 0) { + std::vector Vs; + if (findAllNVVMAnnotation(&F, "align", Vs)) + for (unsigned V : Vs) + if ((V >> 16) == Index) + return Align(V & 0xFFFF); + } return std::nullopt; } diff --git a/llvm/lib/Target/NVPTX/NVPTXUtilities.h b/llvm/lib/Target/NVPTX/NVPTXUtilities.h index 7ce00b9b5688d..cf35eaf4cbae5 100644 --- a/llvm/lib/Target/NVPTX/NVPTXUtilities.h +++ b/llvm/lib/Target/NVPTX/NVPTXUtilities.h @@ -16,6 +16,7 @@ #include "NVPTX.h" #include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/CallingConv.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/IntrinsicInst.h" @@ -63,7 +64,11 @@ std::optional getClusterDimz(const Function &); std::optional getMaxClusterRank(const Function &); std::optional getMinCTASm(const Function &); std::optional getMaxNReg(const Function &); -bool isKernelFunction(const Function &); + +inline bool isKernelFunction(const Function &F) { + return F.getCallingConv() == CallingConv::PTX_Kernel; +} + bool isParamGridConstant(const Value &); MaybeAlign getAlign(const Function &, unsigned); diff --git a/llvm/test/CodeGen/NVPTX/upgrade-nvvm-annotations.ll b/llvm/test/CodeGen/NVPTX/upgrade-nvvm-annotations.ll new file mode 100644 index 0000000000000..a9f370a12a945 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/upgrade-nvvm-annotations.ll @@ -0,0 +1,28 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 5 +; RUN: opt < %s -mtriple=nvptx64-unknown-unknown -O0 -S | FileCheck %s + +define i32 @foo(i32 %a, i32 %b) { +; CHECK-LABEL: define i32 @foo( +; CHECK-SAME: i32 alignstack(8) [[A:%.*]], i32 alignstack(16) [[B:%.*]]) { +; CHECK-NEXT: ret i32 0 +; + ret i32 0 +} + +define i32 @bar(i32 %a, i32 %b) { +; CHECK-LABEL: define ptx_kernel i32 @bar( +; CHECK-SAME: i32 [[A:%.*]], i32 [[B:%.*]]) { +; CHECK-NEXT: ret i32 0 +; + ret i32 0 +} + +!nvvm.annotations = !{!0, !1, !2} + +!0 = !{ptr @foo, !"align", i32 u0x00000008, !"align", i32 u0x00010008, !"align", i32 u0x00020010} +!1 = !{null, !"align", i32 u0x00000008, !"align", i32 u0x00010008, !"align", i32 u0x00020008} +!2 = !{ptr @bar, !"kernel", i32 1} + +;. +; CHECK: [[META0:![0-9]+]] = !{ptr @foo, !"align", i32 8} +;. From 7f126e1f3678c03994e3d2b2fd9901802299e570 Mon Sep 17 00:00:00 2001 From: Alex Maclean Date: Tue, 28 Jan 2025 00:52:28 +0000 Subject: [PATCH 2/2] remove dead omp code --- llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 28 ++++----------------------- 1 file changed, 4 insertions(+), 24 deletions(-) diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp index 682227916e712..70ae9327d75dd 100644 --- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp +++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp @@ -5906,39 +5906,19 @@ bool llvm::omp::isOpenMPKernel(Function &Fn) { } KernelSet llvm::omp::getDeviceKernels(Module &M) { - // TODO: Create a more cross-platform way of determining device kernels. KernelSet Kernels; - DenseSet SeenKernels; - auto ProcessKernel = [&](Function &KF) { - if (SeenKernels.insert(&KF).second) { + for (Function &F : M) + if (F.hasKernelCallingConv()) { // We are only interested in OpenMP target regions. Others, such as // kernels generated by CUDA but linked together, are not interesting to // this pass. - if (isOpenMPKernel(KF)) { + if (isOpenMPKernel(F)) { ++NumOpenMPTargetRegionKernels; - Kernels.insert(&KF); + Kernels.insert(&F); } else ++NumNonOpenMPTargetRegionKernels; } - }; - - if (NamedMDNode *MD = M.getNamedMetadata("nvvm.annotations")) - for (auto *Op : MD->operands()) { - if (Op->getNumOperands() < 2) - continue; - MDString *KindID = dyn_cast(Op->getOperand(1)); - if (!KindID || KindID->getString() != "kernel") - continue; - - if (auto *KernelFn = - mdconst::dyn_extract_or_null(Op->getOperand(0))) - ProcessKernel(*KernelFn); - } - - for (Function &F : M) - if (F.hasKernelCallingConv()) - ProcessKernel(F); return Kernels; }