diff --git a/llvm/include/llvm/SYCLLowerIR/LowerKernelProps.h b/llvm/include/llvm/SYCLLowerIR/LowerKernelProps.h deleted file mode 100644 index 72d405d8e61a3..0000000000000 --- a/llvm/include/llvm/SYCLLowerIR/LowerKernelProps.h +++ /dev/null @@ -1,27 +0,0 @@ -//===---- LowerKernelProps.h - lower kernel properties -----------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// Lowers SYCL kernel properties into attributes used by sycl-post-link -//===----------------------------------------------------------------------===// - -#pragma once - -#include "llvm/IR/PassManager.h" - -namespace sycl { -namespace kernel_props { -constexpr char ATTR_LARGE_GRF[] = "large-grf"; -} -} // namespace sycl -namespace llvm { -// Lowers calls to __sycl_set_kernel_properties -class SYCLLowerKernelPropsPass - : public PassInfoMixin { -public: - PreservedAnalyses run(Module &M, ModuleAnalysisManager &); -}; -} // namespace llvm diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 4066df582785f..1f1d088dac2ea 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -84,7 +84,6 @@ #include "llvm/SYCLLowerIR/ESIMD/ESIMDVerifier.h" #include "llvm/SYCLLowerIR/ESIMD/LowerESIMD.h" #include "llvm/SYCLLowerIR/LowerInvokeSimd.h" -#include "llvm/SYCLLowerIR/LowerKernelProps.h" #include "llvm/SYCLLowerIR/LowerWGLocalMemory.h" #include "llvm/SYCLLowerIR/LowerWGScope.h" #include "llvm/SYCLLowerIR/MutatePrintfAddrspace.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 136884275f8f6..b320fe62e8bb9 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -129,7 +129,6 @@ MODULE_PASS("memprof-module", ModuleMemProfilerPass()) MODULE_PASS("poison-checking", PoisonCheckingPass()) MODULE_PASS("pseudo-probe-update", PseudoProbeUpdatePass()) MODULE_PASS("LowerESIMD", SYCLLowerESIMDPass()) -MODULE_PASS("lower-kernel-props", SYCLLowerKernelPropsPass()) MODULE_PASS("ESIMDLowerVecArg", ESIMDLowerVecArgPass()) MODULE_PASS("esimd-opt-call-conv", ESIMDOptimizeVecArgCallConvPass()) MODULE_PASS("esimd-verifier", ESIMDVerifierPass()) diff --git a/llvm/lib/SYCLLowerIR/CMakeLists.txt b/llvm/lib/SYCLLowerIR/CMakeLists.txt index 4d217a910333e..626e5fade46d2 100644 --- a/llvm/lib/SYCLLowerIR/CMakeLists.txt +++ b/llvm/lib/SYCLLowerIR/CMakeLists.txt @@ -59,7 +59,6 @@ add_llvm_component_library(LLVMSYCLLowerIR ESIMD/LowerESIMDSlmReservation.cpp HostPipes.cpp LowerInvokeSimd.cpp - LowerKernelProps.cpp LowerWGLocalMemory.cpp LowerWGScope.cpp MutatePrintfAddrspace.cpp diff --git a/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp b/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp index a652f3cfb0608..557a1590f185a 100644 --- a/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp +++ b/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp @@ -10,6 +10,7 @@ #include "llvm/SYCLLowerIR/CompileTimePropertiesPass.h" #include "llvm/SYCLLowerIR/DeviceGlobals.h" +#include "llvm/SYCLLowerIR/ESIMD/ESIMDUtils.h" #include "llvm/SYCLLowerIR/HostPipes.h" #include "llvm/ADT/APInt.h" @@ -27,6 +28,7 @@ namespace { constexpr StringRef SYCL_HOST_ACCESS_ATTR = "sycl-host-access"; constexpr StringRef SYCL_PIPELINED_ATTR = "sycl-pipelined"; +constexpr StringRef SYCL_REGISTER_ALLOC_MODE_ATTR = "sycl-register-alloc-mode"; constexpr StringRef SPIRV_DECOR_MD_KIND = "spirv.Decorations"; constexpr StringRef SPIRV_PARAM_DECOR_MD_KIND = "spirv.ParameterDecorations"; @@ -175,14 +177,15 @@ MDNode *attributeToDecorateMetadata(LLVMContext &Ctx, const Attribute &Attr) { /// Tries to generate a SPIR-V execution mode metadata node from an attribute. /// If the attribute is unknown \c None will be returned. /// -/// @param M [in] the LLVM module. +/// @param F [in] the LLVM function. /// @param Attr [in] the LLVM attribute to generate metadata for. /// /// @returns a pair with the name of the resulting metadata and a pointer to /// the metadata node with its values if the attribute has a /// corresponding SPIR-V execution mode. Otherwise \c None is returned. std::optional> -attributeToExecModeMetadata(Module &M, const Attribute &Attr) { +attributeToExecModeMetadata(Function &F, const Attribute &Attr) { + Module &M = *F.getParent(); LLVMContext &Ctx = M.getContext(); const DataLayout &DLayout = M.getDataLayout(); @@ -267,6 +270,15 @@ attributeToExecModeMetadata(Module &M, const Attribute &Attr) { return std::pair("ip_interface", getIpInterface("csr", Ctx, Attr)); + if (AttrKindStr == SYCL_REGISTER_ALLOC_MODE_ATTR && + !llvm::esimd::isESIMD(F)) { + uint32_t RegAllocModeVal = getAttributeAsInteger(Attr); + Metadata *AttrMDArgs[] = {ConstantAsMetadata::get(Constant::getIntegerValue( + Type::getInt32Ty(Ctx), APInt(32, RegAllocModeVal)))}; + return std::pair("RegisterAllocMode", + MDNode::get(Ctx, AttrMDArgs)); + } + return std::nullopt; } @@ -420,7 +432,7 @@ PreservedAnalyses CompileTimePropertiesPass::run(Module &M, } else if (MDNode *SPIRVMetadata = attributeToDecorateMetadata(Ctx, Attribute)) MDOps.push_back(SPIRVMetadata); - else if (auto NamedMetadata = attributeToExecModeMetadata(M, Attribute)) + else if (auto NamedMetadata = attributeToExecModeMetadata(F, Attribute)) NamedMDOps.push_back(*NamedMetadata); } diff --git a/llvm/lib/SYCLLowerIR/ESIMD/ESIMDVerifier.cpp b/llvm/lib/SYCLLowerIR/ESIMD/ESIMDVerifier.cpp index 9db4d6a35e7d4..0c29357f7672f 100644 --- a/llvm/lib/SYCLLowerIR/ESIMD/ESIMDVerifier.cpp +++ b/llvm/lib/SYCLLowerIR/ESIMD/ESIMDVerifier.cpp @@ -59,7 +59,6 @@ static const char *LegalSYCLFunctions[] = { "^sycl::_V1::exp<.+>", "^sycl::_V1::bit_cast<.+>", "^sycl::_V1::operator.+<.+>", - "^sycl::_V1::ext::intel::experimental::set_kernel_properties", "^sycl::_V1::ext::oneapi::sub_group::.+", "^sycl::_V1::ext::oneapi::experimental::spec_constant<.+>::.+", "^sycl::_V1::ext::oneapi::experimental::this_sub_group", diff --git a/llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp b/llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp index f14b51e8a1a37..1efccb951fdbd 100644 --- a/llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp +++ b/llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp @@ -1969,8 +1969,6 @@ size_t SYCLLowerESIMDPass::runOnFunction(Function &F, ToErase.push_back(CI); continue; } - assert(!Name.startswith("__sycl_set_kernel_properties") && - "__sycl_set_kernel_properties must have been lowered"); if (Name.empty() || (!Name.startswith(ESIMD_INTRIN_PREF1) && !isDevicelibFunction(Name))) diff --git a/llvm/lib/SYCLLowerIR/LowerKernelProps.cpp b/llvm/lib/SYCLLowerIR/LowerKernelProps.cpp deleted file mode 100644 index 5d576b7edfd73..0000000000000 --- a/llvm/lib/SYCLLowerIR/LowerKernelProps.cpp +++ /dev/null @@ -1,90 +0,0 @@ -//===---- LowerKernelProps.cpp - lower __sycl_set_kernel_properties ---===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// Finds and lowers __sycl_set_kernel_properties calls: converts the call to -// function attributes and adds those attributes to all kernels which can -// potentially call this intrinsic. - -#include "llvm/SYCLLowerIR/LowerKernelProps.h" -#include "llvm/SYCLLowerIR/SYCLUtils.h" - -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Operator.h" -#include "llvm/Pass.h" - -#define DEBUG_TYPE "LowerKernelProps" - -using namespace llvm; - -namespace { - -constexpr char SET_KERNEL_PROPS_FUNC_NAME[] = - "_Z28__sycl_set_kernel_propertiesi"; - -// Kernel property identifiers. Should match ones in -// sycl/include/sycl/ext/intel/experimental/kernel_properties.hpp -enum property_ids { use_large_grf = 0 }; - -void processSetKernelPropertiesCall(CallInst &CI) { - auto F = CI.getFunction(); - auto *ArgV = CI.getArgOperand(0); - - if (!isa(ArgV)) { - llvm::report_fatal_error( - llvm::Twine(__FILE__ " ") + - "integral constant is expected for set_kernel_properties"); - } - uint64_t PropID = cast(ArgV)->getZExtValue(); - - switch (PropID) { - case property_ids::use_large_grf: - // TODO: Keep track of traversed functions to avoid repeating traversals - // over same function. - llvm::sycl::utils::traverseCallgraphUp(F, [](Function *GraphNode) { - GraphNode->addFnAttr(::sycl::kernel_props::ATTR_LARGE_GRF); - // Add RegisterAllocMode metadata with arg 2 to the kernel to tell - // IGC to compile this kernel in large GRF mode. 2 means large. - if (GraphNode->getCallingConv() == CallingConv::SPIR_KERNEL && - !GraphNode->hasMetadata("sycl_explicit_simd")) { - auto &Ctx = GraphNode->getContext(); - Metadata *AttrMDArgs[] = {ConstantAsMetadata::get( - Constant::getIntegerValue(Type::getInt32Ty(Ctx), APInt(32, 2)))}; - GraphNode->setMetadata("RegisterAllocMode", - MDNode::get(Ctx, AttrMDArgs)); - } - }); - break; - default: - assert(false && "Invalid property id"); - } -} - -} // namespace - -namespace llvm { -PreservedAnalyses SYCLLowerKernelPropsPass::run(Module &M, - ModuleAnalysisManager &MAM) { - Function *F = M.getFunction(SET_KERNEL_PROPS_FUNC_NAME); - - if (!F) { - return PreservedAnalyses::all(); - } - bool Modified = false; - SmallVector Users(F->users()); - - for (User *Usr : Users) { - // a call can be the only use of the __sycl_set_kernel_properties built-in - CallInst *CI = cast(Usr); - processSetKernelPropertiesCall(*CI); - CI->eraseFromParent(); - Modified = true; - } - return Modified ? PreservedAnalyses::none() : PreservedAnalyses::all(); -} -} // namespace llvm diff --git a/llvm/test/SYCLLowerIR/CompileTimePropertiesPass/kernel-attributes/register-alloc-mode.ll b/llvm/test/SYCLLowerIR/CompileTimePropertiesPass/kernel-attributes/register-alloc-mode.ll new file mode 100644 index 0000000000000..d7b27b33643b4 --- /dev/null +++ b/llvm/test/SYCLLowerIR/CompileTimePropertiesPass/kernel-attributes/register-alloc-mode.ll @@ -0,0 +1,35 @@ +; Check we create RegisterAllocMode metadata if there is a non-ESIMD kernel with that property +; RUN: opt -passes=compile-time-properties %s -S | FileCheck %s --check-prefix CHECK-IR + +; Function Attrs: convergent norecurse +define weak_odr dso_local spir_kernel void @sycl_regallocmode() #1 { +; CHECK-IR-NOT: !RegisterAllocMode +; CHECK-IR: sycl_regallocmode() #[[#Attr1:]]{{.*}}!RegisterAllocMode ![[#MDVal:]] { +; CHECK-IR-NOT: !RegisterAllocMode +; CHECK-IR: ![[#MDVal]] = !{i32 2} +entry: + ret void +} + +; Function Attrs: convergent norecurse +define weak_odr dso_local spir_kernel void @sycl_noregallocmode() #0 { +entry: + ret void +} + +; Function Attrs: convergent norecurse +define weak_odr dso_local spir_kernel void @esimd_regallocmode() #1 !sycl_explicit_simd !1 { +entry: + ret void +} + +; Function Attrs: convergent norecurse +define weak_odr dso_local spir_kernel void @esimd_noregallocmode() #0 { +entry: + ret void +} + +attributes #0 = { convergent norecurse } +attributes #1 = { convergent norecurse "sycl-register-alloc-mode"="2" } + +!1 = !{} diff --git a/llvm/test/SYCLLowerIR/lower_kernel_props.ll b/llvm/test/SYCLLowerIR/lower_kernel_props.ll deleted file mode 100644 index f2cef0617739c..0000000000000 --- a/llvm/test/SYCLLowerIR/lower_kernel_props.ll +++ /dev/null @@ -1,44 +0,0 @@ -; This test checks handling of the -; __sycl_set_kernel_properties(...); -; intrinsic by LowerKernelProps pass - it should: -; - determine kernels calling this intrinsic (walk up the call graph) -; - remove the intrinsic call -; - mark the kernel with corresponding attribute (only "large-grf" for now) - -; RUN: opt -passes=lower-kernel-props -S %s -o - | FileCheck %s --implicit-check-not='RegisterAllocMode' - -; ModuleID = 'large_grf.bc' -source_filename = "llvm-link" -target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" -target triple = "spir64-unknown-unknown" - -define dso_local spir_func void @_Z17large_grf_markerv() { -; CHECK: define dso_local spir_func void @_Z17large_grf_markerv() -; -- '0' constant argument means "large GRF" property: - call spir_func void @_Z28__sycl_set_kernel_propertiesi(i32 noundef 0) -; -- Check that LowerKernelProps removed the marker call above: -; CHECK-NOT: {{.*}} @_Z28__sycl_set_kernel_propertiesi - ret void -; CHECK-NEXT: ret void -} - -declare dso_local spir_func void @_Z28__sycl_set_kernel_propertiesi(i32 noundef) - -; -- This kernel calls the marker function indirectly -define weak_odr dso_local spir_kernel void @__large_grf_kernel1() !sycl_explicit_simd !0 !intel_reqd_sub_group_size !1 { -; CHECK: {{.*}} spir_kernel void @__large_grf_kernel1() #0 - call spir_func void @_Z17large_grf_markerv() - ret void -} - -; -- This kernel calls the marker function directly -define weak_odr dso_local spir_kernel void @__large_grf_kernel2() #0 !sycl_explicit_simd !0 !intel_reqd_sub_group_size !1 { -; CHECK: {{.*}} spir_kernel void @__large_grf_kernel2() #0 {{.*}} - call spir_func void @_Z28__sycl_set_kernel_propertiesi(i32 noundef 0) - ret void -} - -attributes #0 = { "large-grf" } - -!0 = !{} -!1 = !{i32 1} diff --git a/llvm/test/tools/sycl-post-link/sycl-esimd-large-grf.ll b/llvm/test/tools/sycl-post-link/registerallocmode.ll similarity index 55% rename from llvm/test/tools/sycl-post-link/sycl-esimd-large-grf.ll rename to llvm/test/tools/sycl-post-link/registerallocmode.ll index 6bae075384405..1afdc3023a6df 100644 --- a/llvm/test/tools/sycl-post-link/sycl-esimd-large-grf.ll +++ b/llvm/test/tools/sycl-post-link/registerallocmode.ll @@ -1,34 +1,41 @@ -; This test checks handling of the -; set_kernel_properties(kernel_properties::use_large_grf); -; by the post-link-tool: -; - ESIMD/SYCL splitting happens as usual -; - ESIMD module is further split into callgraphs for entry points requesting -; "large GRF" and callgraphs for entry points which are not -; - Compiler adds 'isLargeGRF' property to the ESIMD device binary -; images requesting "large GRF" +; This test checks handling of RegisterAllocMode in SYCL post link ; RUN: sycl-post-link -split=source -symbols -split-esimd -lower-esimd -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t.table -; RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes CHECK-ESIMD-LargeGRF-IR --implicit-check-not='RegisterAllocMode' +; RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes CHECK-ESIMD-LargeGRF-IR ; RUN: FileCheck %s -input-file=%t_esimd_0.prop --check-prefixes CHECK-ESIMD-LargeGRF-PROP +; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-SYCL-LargeGRF-IR +; RUN: FileCheck %s -input-file=%t_0.prop --check-prefixes CHECK-SYCL-LargeGRF-PROP +; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-SYCL-LargeGRF-SYM +; RUN: FileCheck %s -input-file=%t_1.prop --check-prefixes CHECK-SYCL-PROP ; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-SYCL-SYM -; RUN: FileCheck %s -input-file=%t_esimd_1.sym --check-prefixes CHECK-ESIMD-SYM +; RUN: FileCheck %s -input-file=%t_esimd_1.prop --check-prefixes CHECK-ESIMD-PROP ; RUN: FileCheck %s -input-file=%t_esimd_0.sym --check-prefixes CHECK-ESIMD-LargeGRF-SYM ; CHECK: [Code|Properties|Symbols] ; CHECK: {{.*}}_0.ll|{{.*}}_0.prop|{{.*}}_0.sym +; CHECK: {{.*}}_esimd_0.ll|{{.*}}_esimd_0.prop|{{.*}}_esimd_0.sym ; CHECK: {{.*}}_1.ll|{{.*}}_1.prop|{{.*}}_1.sym ; CHECK: {{.*}}_esimd_1.ll|{{.*}}_esimd_1.prop|{{.*}}_esimd_1.sym ; CHECK-ESIMD-LargeGRF-PROP: isEsimdImage=1|1 -; CHECK-ESIMD-LargeGRF-PROP: isLargeGRF=1|1 +; CHECK-ESIMD-LargeGRF-PROP: sycl-register-alloc-mode=1|2 + +; CHECK-SYCL-LargeGRF-PROP: sycl-register-alloc-mode=1|2 + +; CHECK-SYCL-PROP-NOT: sycl-register-alloc-mode ; CHECK-SYCL-SYM: __SYCL_kernel ; CHECK-SYCL-SYM-EMPTY: +; CHECK-SYCL-LargeGRF-SYM: __SYCL_kernel_large_grf +; CHECK-SYCL-LargeGRF-SYM-EMPTY: + ; CHECK-ESIMD-SYM: __ESIMD_kernel ; CHECK-ESIMD-SYM-EMPTY: +; CHECK-ESIMD-PROP-NOT: sycl-register-alloc-mode + ; CHECK-ESIMD-LargeGRF-SYM: __ESIMD_large_grf_kernel ; CHECK-ESIMD-LargeGRF-SYM-EMPTY: @@ -42,29 +49,26 @@ entry: ret void } -define weak_odr dso_local spir_kernel void @__ESIMD_kernel() #0 !sycl_explicit_simd !0 !intel_reqd_sub_group_size !1 { +define weak_odr dso_local spir_kernel void @__SYCL_kernel_large_grf() #1 { +; CHECK-SYCL-LargeGRF-IR: define {{.*}} spir_kernel void @__SYCL_kernel_large_grf() #[[#Attr:]] entry: ret void } -define dso_local spir_func void @_Z17large_grf_markerv() { +define weak_odr dso_local spir_kernel void @__ESIMD_kernel() #0 !sycl_explicit_simd !0 !intel_reqd_sub_group_size !1 { + entry: - call spir_func void @_Z28__sycl_set_kernel_propertiesi(i32 noundef 0) -; -- Check that ESIMD lowering removed the marker call above: -; CHECK-ESIMD-LargeGRF-IR-NOT: {{.*}} @_Z28__sycl_set_kernel_propertiesi ret void } -declare dso_local spir_func void @_Z28__sycl_set_kernel_propertiesi(i32 noundef) - -define weak_odr dso_local spir_kernel void @__ESIMD_large_grf_kernel() #0 !sycl_explicit_simd !0 !intel_reqd_sub_group_size !1 { -; CHECK-ESIMD-LargeGRF-IR: @__ESIMD_large_grf_kernel() {{.*}} +define weak_odr dso_local spir_kernel void @__ESIMD_large_grf_kernel() #1 !sycl_explicit_simd !0 !intel_reqd_sub_group_size !1 { +; CHECK-ESIMD-LargeGRF-IR: @__ESIMD_large_grf_kernel() entry: - call spir_func void @_Z17large_grf_markerv() ret void } attributes #0 = { "sycl-module-id"="a.cpp" } +attributes #1 = { "sycl-module-id"="a.cpp" "sycl-register-alloc-mode"="2" } !0 = !{} !1 = !{i32 1} diff --git a/llvm/test/tools/sycl-post-link/sycl-large-grf.ll b/llvm/test/tools/sycl-post-link/sycl-large-grf.ll deleted file mode 100644 index 105926200b0e9..0000000000000 --- a/llvm/test/tools/sycl-post-link/sycl-large-grf.ll +++ /dev/null @@ -1,60 +0,0 @@ -; This test checks handling of the -; set_kernel_properties(kernel_properties::use_large_grf); -; by the post-link-tool: -; - ESIMD/SYCL splitting happens as usual -; - ESIMD module is further split into callgraphs for entry points requesting -; "large GRF" and callgraphs for entry points which are not -; - Compiler adds 'isLargeGRF' property to the device binary -; images requesting "large GRF" - -; RUN: sycl-post-link -split=source -symbols -split-esimd -lower-esimd -S < %s -o %t.table -; RUN: FileCheck %s -input-file=%t.table -; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-LARGE-GRF-IR -; RUN: FileCheck %s -input-file=%t_0.prop --check-prefixes CHECK-LARGE-GRF-PROP -; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-SYCL-SYM -; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-LARGE-GRF-SYM - -; CHECK: [Code|Properties|Symbols] -; CHECK: {{.*}}_0.ll|{{.*}}_0.prop|{{.*}}_0.sym -; CHECK: {{.*}}_1.ll|{{.*}}_1.prop|{{.*}}_1.sym - -; CHECK-LARGE-GRF-PROP: isLargeGRF=1|1 - -; CHECK-SYCL-SYM: __SYCL_kernel -; CHECK-SYCL-SYM-EMPTY: - -; CHECK-LARGE-GRF-SYM: __large_grf_kernel -; CHECK-LARGE-GRF-SYM-EMPTY: - -; ModuleID = 'large_grf.bc' -source_filename = "llvm-link" -target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" -target triple = "spir64-unknown-unknown" - -define weak_odr dso_local spir_kernel void @__SYCL_kernel() #0 { -entry: - ret void -} - -define dso_local spir_func void @_Z17large_grf_markerv() { -entry: - call spir_func void @_Z28__sycl_set_kernel_propertiesi(i32 noundef 0) -; -- Check that LowerKernelProps lowering removed the marker call above: -; CHECK-LARGE-GRF-IR-NOT: {{.*}} @_Z28__sycl_set_kernel_propertiesi - ret void -} - -declare dso_local spir_func void @_Z28__sycl_set_kernel_propertiesi(i32 noundef) - -define weak_odr dso_local spir_kernel void @__large_grf_kernel() #0 { -; CHECK-LARGE-GRF-IR: @__large_grf_kernel() {{.*}} !RegisterAllocMode ![[MetadataArg:[0-9]+]] -; CHECK-LARGE-GRF-IR: ![[MetadataArg]] = !{i32 2} -entry: - call spir_func void @_Z17large_grf_markerv() - ret void -} - -attributes #0 = { "sycl-module-id"="a.cpp" } - -!0 = !{} -!1 = !{i32 1} diff --git a/llvm/tools/sycl-post-link/ModuleSplitter.cpp b/llvm/tools/sycl-post-link/ModuleSplitter.cpp index b23bb7e4c0e1d..49aa88b6290db 100644 --- a/llvm/tools/sycl-post-link/ModuleSplitter.cpp +++ b/llvm/tools/sycl-post-link/ModuleSplitter.cpp @@ -20,7 +20,6 @@ #include "llvm/IR/Module.h" #include "llvm/SYCLLowerIR/DeviceGlobals.h" #include "llvm/SYCLLowerIR/LowerInvokeSimd.h" -#include "llvm/SYCLLowerIR/LowerKernelProps.h" #include "llvm/SYCLLowerIR/SYCLUtils.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/GlobalDCE.h" @@ -849,8 +848,7 @@ getDeviceCodeSplitter(ModuleDesc &&MD, IRSplitMode Mode, bool IROutputOnly, // Optional features // Note: Add more rules at the end of the list to avoid chaning orders of // output files in existing tests. - Categorizer.registerSimpleFlagAttributeRule( - ::sycl::kernel_props::ATTR_LARGE_GRF, "large-grf"); + Categorizer.registerSimpleStringAttributeRule("sycl-register-alloc-mode"); Categorizer.registerListOfIntegersInMetadataSortedRule("sycl_used_aspects"); Categorizer.registerListOfIntegersInMetadataRule("reqd_work_group_size"); Categorizer.registerSimpleStringAttributeRule( diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index a676cab38fc59..5866d3e0ff07a 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -35,11 +35,11 @@ #include "llvm/IRReader/IRReader.h" #include "llvm/Linker/Linker.h" #include "llvm/Passes/PassBuilder.h" +#include "llvm/SYCLLowerIR/CompileTimePropertiesPass.h" #include "llvm/SYCLLowerIR/DeviceGlobals.h" #include "llvm/SYCLLowerIR/ESIMD/LowerESIMD.h" #include "llvm/SYCLLowerIR/HostPipes.h" #include "llvm/SYCLLowerIR/LowerInvokeSimd.h" -#include "llvm/SYCLLowerIR/LowerKernelProps.h" #include "llvm/SYCLLowerIR/SYCLUtils.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileSystem.h" @@ -455,14 +455,22 @@ std::string saveModuleProperties(module_split::ModuleDesc &MD, } { - // check for large GRF property - bool HasLargeGRF = llvm::any_of(MD.entries(), [](const Function *F) { - return F->hasFnAttribute(::sycl::kernel_props::ATTR_LARGE_GRF); + StringRef RegAllocModeAttr = "sycl-register-alloc-mode"; + uint32_t RegAllocModeVal; + + bool HasRegAllocMode = llvm::any_of(MD.entries(), [&](const Function *F) { + if (!F->hasFnAttribute(RegAllocModeAttr)) + return false; + const auto &Attr = F->getFnAttribute(RegAllocModeAttr); + RegAllocModeVal = getAttributeAsInteger(Attr); + return true; }); - - if (HasLargeGRF) - PropSet[PropSetRegTy::SYCL_MISC_PROP].insert({"isLargeGRF", true}); + if (HasRegAllocMode) { + PropSet[PropSetRegTy::SYCL_MISC_PROP].insert( + {RegAllocModeAttr, RegAllocModeVal}); + } } + // FIXME: Remove 'if' below when possible // GPU backend has a problem with accepting optimization level options in form // described by Level Zero specification (-ze-opt-level=1) when 'invoke_simd' @@ -789,11 +797,6 @@ processInputModule(std::unique_ptr M) { } Modified |= InvokeSimdMet; - // Lower kernel properties setting APIs before "large GRF" splitting, as: - // - the latter uses the result of the former - // - saves processing time - Modified |= runModulePass(*M); - DUMP_ENTRY_POINTS(*M, EmitOnlyKernelsAsEntryPoints, "Input"); // -ir-output-only assumes single module output thus no code splitting. @@ -855,8 +858,6 @@ processInputModule(std::unique_ptr M) { if (!MDesc2.isSYCL() && LowerEsimd) { assert(MDesc2.isESIMD() && "NYI"); - // ESIMD lowering also detects large-GRF kernels, so it must happen - // before large-GRF split. Modified |= lowerEsimdConstructs(MDesc2); } MMs.emplace_back(std::move(MDesc2)); diff --git a/sycl/CMakeLists.txt b/sycl/CMakeLists.txt index 34f93b9f94453..31b05dee0f234 100644 --- a/sycl/CMakeLists.txt +++ b/sycl/CMakeLists.txt @@ -36,7 +36,7 @@ set(SYCL_MAJOR_VERSION 7) set(SYCL_MINOR_VERSION 0) set(SYCL_PATCH_VERSION 0) -set(SYCL_DEV_ABI_VERSION 1) +set(SYCL_DEV_ABI_VERSION 2) if (SYCL_ADD_DEV_VERSION_POSTFIX) set(SYCL_VERSION_POSTFIX "-${SYCL_DEV_ABI_VERSION}") endif() diff --git a/sycl/include/sycl/detail/kernel_properties.hpp b/sycl/include/sycl/detail/kernel_properties.hpp new file mode 100644 index 0000000000000..01569922c39e5 --- /dev/null +++ b/sycl/include/sycl/detail/kernel_properties.hpp @@ -0,0 +1,57 @@ +//==---------------- kernel_properties.hpp - SYCL Kernel Properties -------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// APIs for setting kernel properties interpreted by GPU software stack. +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include + +namespace sycl { +__SYCL_INLINE_VER_NAMESPACE(_V1) { +namespace detail { +enum class register_alloc_mode_enum : uint32_t { + automatic = 0, + large = 2, +}; + +struct register_alloc_mode_key { + template + using value_t = sycl::ext::oneapi::experimental::property_value< + register_alloc_mode_key, + std::integral_constant>; +}; + +template +inline constexpr register_alloc_mode_key::value_t register_alloc_mode; +} // namespace detail + +namespace ext::oneapi::experimental { +template <> +struct is_property_key : std::true_type { +}; + +namespace detail { +template <> struct PropertyToKind { + static constexpr PropKind Kind = PropKind::RegisterAllocMode; +}; + +template <> +struct IsCompileTimeProperty + : std::true_type {}; + +template +struct PropertyMetaInfo> { + static constexpr const char *name = "sycl-register-alloc-mode"; + static constexpr sycl::detail::register_alloc_mode_enum value = Mode; +}; +} // namespace detail +} // namespace ext::oneapi::experimental +} // __SYCL_INLINE_VER_NAMESPACE(_V1) +} // namespace sycl diff --git a/sycl/include/sycl/ext/intel/experimental/kernel_properties.hpp b/sycl/include/sycl/ext/intel/experimental/kernel_properties.hpp deleted file mode 100644 index 1161ec321d4f4..0000000000000 --- a/sycl/include/sycl/ext/intel/experimental/kernel_properties.hpp +++ /dev/null @@ -1,81 +0,0 @@ -//==---------------- kernel_properties.hpp - SYCL Kernel Properties -------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// APIs for setting kernel properties interpreted by GPU software stack. -//===----------------------------------------------------------------------===// - -#pragma once - -#include - -#include - -namespace sycl { -__SYCL_INLINE_VER_NAMESPACE(_V1) { -namespace ext::intel::experimental { - -namespace kernel_properties { - -/// -/// This namespace contains APIs to set kernel properties. -/// - -// Implementation note: ::value fields should match property IDs -// specified in llvm/lib/SYCLLowerIR/LowerKernelProps.cpp - -namespace detail { -// Proxy to access private property classes' fields from the API code. -template struct proxy { - static constexpr int value = T::value; -}; -} // namespace detail - -/// A boolean property which requests the compiler to use large register -/// allocation mode at the expense of reducing the amount of available hardware -/// threads. -struct use_large_grf_tag { - template friend struct detail::proxy; - -private: - // Property identifier - static constexpr int value = 0; -}; - -__SYCL_DEPRECATED("use_double_grf is deprecated, use use_large_grf instead") -inline constexpr use_large_grf_tag use_double_grf = {}; -inline constexpr use_large_grf_tag use_large_grf = {}; - -} // namespace kernel_properties - -namespace __MP11_NS = sycl::detail::boost::mp11; - -// TODO this should be replaced with the generic SYCL compile-time properites -// mechanism once implementaion is available. -// https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/proposed/sycl_ext_oneapi_kernel_properties.asciidoc - -template -void set_kernel_properties(KernelProps... props) { - // TODO check for duplicates - using Props = __MP11_NS::mp_list; - __MP11_NS::mp_for_each([&](auto Prop) { - using PropT = decltype(Prop); - constexpr bool IsLargeGRF = - std::is_same_v; - if constexpr (IsLargeGRF) { - __sycl_set_kernel_properties( - kernel_properties::detail::proxy< - kernel_properties::use_large_grf_tag>::value); - } else { - static_assert(IsLargeGRF && - "set_kernel_properties: invalid kernel property"); - } - }); -} - -} // namespace ext::intel::experimental -} // __SYCL_INLINE_VER_NAMESPACE(_V1) -} // namespace sycl diff --git a/sycl/include/sycl/ext/oneapi/properties/property.hpp b/sycl/include/sycl/ext/oneapi/properties/property.hpp index aeefaee9a994d..2b0d2c1c3dd3c 100644 --- a/sycl/include/sycl/ext/oneapi/properties/property.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/property.hpp @@ -194,8 +194,9 @@ enum PropKind : uint32_t { ReadyLatency = 28, UsesValid = 29, UseRootSync = 30, + RegisterAllocMode = 31, // PropKindSize must always be the last value. - PropKindSize = 31, + PropKindSize = 32, }; // This trait must be specialized for all properties and must have a unique diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index fe6c48e319826..010f57494fd68 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -374,6 +375,29 @@ static std::string getUint32PropAsOptStr(const RTDeviceBinaryImage &Img, return temp; } +static void appendCompileOptionsForRegAllocMode(std::string &CompileOpts, + const RTDeviceBinaryImage &Img, + bool IsEsimdImage) { + pi_device_binary_property Prop = Img.getProperty("sycl-register-alloc-mode"); + if (!Prop) + return; + uint32_t PropVal = DeviceBinaryProperty(Prop).asUint32(); + if (PropVal == static_cast(register_alloc_mode_enum::large)) { + if (!CompileOpts.empty()) + CompileOpts += " "; + // This option works for both LO AND OCL backends. + CompileOpts += IsEsimdImage ? "-doubleGRF" : "-ze-opt-large-register-file"; + } + // TODO: Support Auto GRF for ESIMD once vc supports it. + if (PropVal == static_cast(register_alloc_mode_enum::automatic) && + !IsEsimdImage) { + if (!CompileOpts.empty()) + CompileOpts += " "; + // This option works for both LO AND OCL backends. + CompileOpts += "-ze-intel-enable-auto-large-GRF-mode"; + } +} + static void appendCompileOptionsFromImage(std::string &CompileOpts, const RTDeviceBinaryImage &Img, const std::vector &Devs, @@ -393,9 +417,6 @@ static void appendCompileOptionsFromImage(std::string &CompileOpts, CompileOpts += std::string(TemporaryStr); } bool isEsimdImage = getUint32PropAsBool(Img, "isEsimdImage"); - // TODO: Remove isDoubleGRF check in next ABI break - bool isLargeGRF = getUint32PropAsBool(Img, "isLargeGRF") || - getUint32PropAsBool(Img, "isDoubleGRF"); // The -vc-codegen option is always preserved for ESIMD kernels, regardless // of the contents SYCL_PROGRAM_COMPILE_OPTIONS environment variable. if (isEsimdImage) { @@ -407,14 +428,8 @@ static void appendCompileOptionsFromImage(std::string &CompileOpts, if (detail::SYCLConfig::get() == 0) CompileOpts += " -disable-finalizer-msg"; } - if (isLargeGRF) { - if (!CompileOpts.empty()) - CompileOpts += " "; - // TODO: Don't check the property or pass these flags after the next ABI - // break. The behavior is now controlled through the RegisterAllocMode - // metadata. - CompileOpts += isEsimdImage ? "-doubleGRF" : "-ze-opt-large-register-file"; - } + + appendCompileOptionsForRegAllocMode(CompileOpts, Img, isEsimdImage); const auto &PlatformImpl = detail::getSyclObjImpl(Devs[0].get_platform()); diff --git a/sycl/test-e2e/DeviceCodeSplit/large-grf.cpp b/sycl/test-e2e/DeviceCodeSplit/grf.cpp similarity index 74% rename from sycl/test-e2e/DeviceCodeSplit/large-grf.cpp rename to sycl/test-e2e/DeviceCodeSplit/grf.cpp index 893ed0aa8c35c..26ce23b18f8b1 100644 --- a/sycl/test-e2e/DeviceCodeSplit/large-grf.cpp +++ b/sycl/test-e2e/DeviceCodeSplit/grf.cpp @@ -5,14 +5,13 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// This test verifies effect of -// set_kernel_properties(kernel_properties::use_double_grf); +// This test verifies effect of the register_alloc_mode kernel property // API call in device code: // - ESIMD/SYCL splitting happens as usual -// - SYCL module is further split into callgraphs for entry points requesting -// "double GRF" and callgraphs for entry points which are not -// - SYCL device binary images requesting "double GRF" must be compiled with -// -ze-opt-large-register-file option +// - SYCL module is further split into callgraphs for entry points for +// each value +// - SYCL device binary images are compiled with the corresponding +// compiler option // REQUIRES: gpu-intel-pvc // UNSUPPORTED: cuda || hip @@ -22,17 +21,18 @@ // RUN: %{build} -o %t.out // RUN: env SYCL_PI_TRACE=-1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-NO-VAR // RUN: env SYCL_PROGRAM_COMPILE_OPTIONS="-g" SYCL_PI_TRACE=-1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-WITH-VAR -// RUN: %{build} -DUSE_LARGE_GRF=1 -o %t.out -// RUN: env SYCL_PI_TRACE=-1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-NO-VAR -// RUN: env SYCL_PROGRAM_COMPILE_OPTIONS="-g" SYCL_PI_TRACE=-1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-WITH-VAR - +// RUN: %{build} -DUSE_AUTO_GRF=1 -o %t.out +// RUN: env SYCL_PI_TRACE=-1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-AUTO-NO-VAR +// RUN: env SYCL_PROGRAM_COMPILE_OPTIONS="-g" SYCL_PI_TRACE=-1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-AUTO-WITH-VAR #include "../helpers.hpp" #include -#include +#include #include using namespace sycl; +using namespace sycl::detail; using namespace sycl::ext::intel::experimental; +using namespace sycl::ext::oneapi::experimental; bool checkResult(const std::vector &A, int Inc) { int err_cnt = 0; @@ -54,16 +54,6 @@ bool checkResult(const std::vector &A, int Inc) { return true; } -// Make the double GRF request from non-inlineable function - compiler should -// mark the caller kernel as "double GRF" anyway. -__attribute__((noinline)) void double_grf_marker() { -#ifdef USE_LARGE_GRF - set_kernel_properties(kernel_properties::use_large_grf); -#else - set_kernel_properties(kernel_properties::use_double_grf); -#endif -} - int main(void) { constexpr unsigned Size = 32; constexpr unsigned VL = 16; @@ -79,7 +69,8 @@ int main(void) { queue q(sycl::gpu_selector_v, exceptionHandlerHelper); auto dev = q.get_device(); - std::cout << "Running on " << dev.get_info() << "\n"; + std::cout << "Running on " << dev.get_info() + << "\n"; auto e = q.submit([&](handler &cgh) { auto PA = bufa.get_access(cgh); @@ -101,17 +92,21 @@ int main(void) { try { buffer bufa(A.data(), range<1>(Size)); +#ifdef USE_AUTO_GRF + properties prop{register_alloc_mode}; +#else + properties prop{register_alloc_mode}; +#endif queue q(sycl::gpu_selector_v, exceptionHandlerHelper); auto dev = q.get_device(); - std::cout << "Running on " << dev.get_info() << "\n"; + std::cout << "Running on " << dev.get_info() + << "\n"; auto e = q.submit([&](handler &cgh) { auto PA = bufa.get_access(cgh); - cgh.parallel_for(Size, [=](id<1> i) { - double_grf_marker(); - PA[i] += 2; - }); + cgh.parallel_for( + Size, prop, [=](id<1> i) { PA[i] += 2; }); }); e.wait(); } catch (sycl::exception const &e) { @@ -120,9 +115,9 @@ int main(void) { } if (checkResult(A, 4)) { - std::cout << "DoubleGRF kernel passed\n"; + std::cout << "SpecifiedGRF kernel passed\n"; } else { - std::cout << "DoubleGRF kernel failed\n"; + std::cout << "SpecifiedGRF kernel failed\n"; return 1; } @@ -140,7 +135,9 @@ int main(void) { // CHECK-LABEL: ---> piProgramBuild( // CHECK-NO-VAR: -ze-opt-large-register-file // CHECK-WITH-VAR: -g -ze-opt-large-register-file +// CHECK-AUTO-NO-VAR: -ze-intel-enable-auto-large-GRF-mode +// CHECK-AUTO-WITH-VAR: -g -ze-intel-enable-auto-large-GRF-mode // CHECK: ) ---> pi_result : PI_SUCCESS // CHECK-LABEL: ---> piKernelCreate( -// CHECK: : {{.*}}DoubleGRF +// CHECK: : {{.*}}SpecifiedGRF // CHECK: ) ---> pi_result : PI_SUCCESS diff --git a/sycl/test-e2e/ESIMD/large-grf.cpp b/sycl/test-e2e/ESIMD/large-grf.cpp index 0e1af71c628f8..079834fc07c07 100644 --- a/sycl/test-e2e/ESIMD/large-grf.cpp +++ b/sycl/test-e2e/ESIMD/large-grf.cpp @@ -5,14 +5,13 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// This test verifies effect of -// set_kernel_properties(kernel_properties::use_large_grf); +// This test verifies effect of the register_alloc_mode kernel property // API call in device code: // - ESIMD/SYCL splitting happens as usual -// - ESIMD module is further split into callgraphs for entry points requesting -// "large GRF" and callgraphs for entry points which are not -// - ESIMD device binary images requesting "large GRF" must be compiled with -// -doubleGRF option +// - ESIMD module is further split into callgraphs for entry points for +// each value +// - ESIMD device binary images are compiled with the corresponding +// compiler option // REQUIRES: gpu-intel-pvc // TODO/FIXME: esimd_emulator does not support online compilation that @@ -25,11 +24,12 @@ #include "esimd_test_utils.hpp" #include +#include #include -#include #include using namespace sycl; +using namespace sycl::detail; using namespace sycl::ext::intel::esimd; using namespace sycl::ext::intel::experimental; using namespace sycl::ext::intel::experimental::esimd; @@ -54,12 +54,6 @@ bool checkResult(const std::vector &A, int Inc) { return true; } -// Make the large GRF request from non-inlineable function - compiler should -// mark the caller kernel as "large GRF" anyway. -__attribute__((noinline)) void large_grf_marker() { - set_kernel_properties(kernel_properties::use_large_grf); -} - int main(void) { constexpr unsigned Size = 32; constexpr unsigned VL = 16; @@ -128,15 +122,15 @@ int main(void) { try { buffer bufa(A.data(), range<1>(Size)); queue q(esimd_test::ESIMDSelector, esimd_test::createExceptionHandler()); - + sycl::ext::oneapi::experimental::properties prop{ + register_alloc_mode}; auto dev = q.get_device(); std::cout << "Running on " << dev.get_info() << "\n"; auto e = q.submit([&](handler &cgh) { auto PA = bufa.get_access(cgh); cgh.parallel_for( - Size, [=](id<1> i) SYCL_ESIMD_KERNEL { - large_grf_marker(); + Size, prop, [=](id<1> i) SYCL_ESIMD_KERNEL { unsigned int offset = i * VL * sizeof(float); simd va; va.copy_from(PA, offset);