From 1b7250c75369a6f982fa755e01d858e3dee14203 Mon Sep 17 00:00:00 2001 From: "Sarnie, Nick" Date: Fri, 28 Apr 2023 07:17:09 -0700 Subject: [PATCH 1/5] [SYCL] Add support for per-kernel auto GRF mode specification, and reimplement feature using kernel properties This works extends the existing support we have for large GRF mode specification. I introduce a new argument to set_kernel_properties, kernel_properties::use_auto_grf. Then, we update LowerKernelProps to lower this new attribute and do sanity checks. Next, we update sycl-post-link to split and add an image property based on this property. Finally, we update program manager to check the image property and pass the correct flag for JIT. For AOT, this works through the RegisterAllocMode metadata that we add during LowerKernelProps and is kept through llvm-spirv. Note that ESIMD does not work at all yet, not for JIT nor AOT. This is because the VC backend does not support auto GRF. I have made a feature request for the VC team to add this. Signed-off-by: Sarnie, Nick --- .../llvm/SYCLLowerIR/LowerKernelProps.h | 27 ------ llvm/lib/Passes/PassBuilder.cpp | 1 - llvm/lib/Passes/PassRegistry.def | 1 - llvm/lib/SYCLLowerIR/CMakeLists.txt | 1 - .../SYCLLowerIR/CompileTimePropertiesPass.cpp | 19 +++- llvm/lib/SYCLLowerIR/ESIMD/ESIMDVerifier.cpp | 1 - llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp | 2 - llvm/lib/SYCLLowerIR/LowerKernelProps.cpp | 90 ------------------ .../kernel-attributes/register-alloc-mode.ll | 35 +++++++ llvm/test/SYCLLowerIR/lower_kernel_props.ll | 44 --------- ...simd-large-grf.ll => registerallocmode.ll} | 46 ++++----- .../tools/sycl-post-link/sycl-large-grf.ll | 60 ------------ llvm/tools/sycl-post-link/ModuleSplitter.cpp | 4 +- llvm/tools/sycl-post-link/sycl-post-link.cpp | 29 +++--- .../intel/experimental/kernel_properties.hpp | 94 ++++++++----------- .../sycl/ext/oneapi/properties/property.hpp | 3 +- sycl/include/sycl/sycl.hpp | 1 + .../program_manager/program_manager.cpp | 36 ++++--- .../{large-grf.cpp => grf.cpp} | 55 +++++------ sycl/test-e2e/ESIMD/large-grf.cpp | 25 ++--- 20 files changed, 194 insertions(+), 380 deletions(-) delete mode 100644 llvm/include/llvm/SYCLLowerIR/LowerKernelProps.h delete mode 100644 llvm/lib/SYCLLowerIR/LowerKernelProps.cpp create mode 100644 llvm/test/SYCLLowerIR/CompileTimePropertiesPass/kernel-attributes/register-alloc-mode.ll delete mode 100644 llvm/test/SYCLLowerIR/lower_kernel_props.ll rename llvm/test/tools/sycl-post-link/{sycl-esimd-large-grf.ll => registerallocmode.ll} (56%) delete mode 100644 llvm/test/tools/sycl-post-link/sycl-large-grf.ll rename sycl/test-e2e/DeviceCodeSplit/{large-grf.cpp => grf.cpp} (74%) diff --git a/llvm/include/llvm/SYCLLowerIR/LowerKernelProps.h b/llvm/include/llvm/SYCLLowerIR/LowerKernelProps.h deleted file mode 100644 index 72d405d8e61a3..0000000000000 --- a/llvm/include/llvm/SYCLLowerIR/LowerKernelProps.h +++ /dev/null @@ -1,27 +0,0 @@ -//===---- LowerKernelProps.h - lower kernel properties -----------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// Lowers SYCL kernel properties into attributes used by sycl-post-link -//===----------------------------------------------------------------------===// - -#pragma once - -#include "llvm/IR/PassManager.h" - -namespace sycl { -namespace kernel_props { -constexpr char ATTR_LARGE_GRF[] = "large-grf"; -} -} // namespace sycl -namespace llvm { -// Lowers calls to __sycl_set_kernel_properties -class SYCLLowerKernelPropsPass - : public PassInfoMixin { -public: - PreservedAnalyses run(Module &M, ModuleAnalysisManager &); -}; -} // namespace llvm diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp index 4066df582785f..1f1d088dac2ea 100644 --- a/llvm/lib/Passes/PassBuilder.cpp +++ b/llvm/lib/Passes/PassBuilder.cpp @@ -84,7 +84,6 @@ #include "llvm/SYCLLowerIR/ESIMD/ESIMDVerifier.h" #include "llvm/SYCLLowerIR/ESIMD/LowerESIMD.h" #include "llvm/SYCLLowerIR/LowerInvokeSimd.h" -#include "llvm/SYCLLowerIR/LowerKernelProps.h" #include "llvm/SYCLLowerIR/LowerWGLocalMemory.h" #include "llvm/SYCLLowerIR/LowerWGScope.h" #include "llvm/SYCLLowerIR/MutatePrintfAddrspace.h" diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def index 136884275f8f6..b320fe62e8bb9 100644 --- a/llvm/lib/Passes/PassRegistry.def +++ b/llvm/lib/Passes/PassRegistry.def @@ -129,7 +129,6 @@ MODULE_PASS("memprof-module", ModuleMemProfilerPass()) MODULE_PASS("poison-checking", PoisonCheckingPass()) MODULE_PASS("pseudo-probe-update", PseudoProbeUpdatePass()) MODULE_PASS("LowerESIMD", SYCLLowerESIMDPass()) -MODULE_PASS("lower-kernel-props", SYCLLowerKernelPropsPass()) MODULE_PASS("ESIMDLowerVecArg", ESIMDLowerVecArgPass()) MODULE_PASS("esimd-opt-call-conv", ESIMDOptimizeVecArgCallConvPass()) MODULE_PASS("esimd-verifier", ESIMDVerifierPass()) diff --git a/llvm/lib/SYCLLowerIR/CMakeLists.txt b/llvm/lib/SYCLLowerIR/CMakeLists.txt index 4d217a910333e..626e5fade46d2 100644 --- a/llvm/lib/SYCLLowerIR/CMakeLists.txt +++ b/llvm/lib/SYCLLowerIR/CMakeLists.txt @@ -59,7 +59,6 @@ add_llvm_component_library(LLVMSYCLLowerIR ESIMD/LowerESIMDSlmReservation.cpp HostPipes.cpp LowerInvokeSimd.cpp - LowerKernelProps.cpp LowerWGLocalMemory.cpp LowerWGScope.cpp MutatePrintfAddrspace.cpp diff --git a/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp b/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp index a652f3cfb0608..3ce1bd4a532e3 100644 --- a/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp +++ b/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp @@ -10,6 +10,7 @@ #include "llvm/SYCLLowerIR/CompileTimePropertiesPass.h" #include "llvm/SYCLLowerIR/DeviceGlobals.h" +#include "llvm/SYCLLowerIR/ESIMD/ESIMDUtils.h" #include "llvm/SYCLLowerIR/HostPipes.h" #include "llvm/ADT/APInt.h" @@ -27,6 +28,7 @@ namespace { constexpr StringRef SYCL_HOST_ACCESS_ATTR = "sycl-host-access"; constexpr StringRef SYCL_PIPELINED_ATTR = "sycl-pipelined"; +constexpr StringRef SYCL_REGISTER_ALLOC_MODE_ATTR = "RegisterAllocMode"; constexpr StringRef SPIRV_DECOR_MD_KIND = "spirv.Decorations"; constexpr StringRef SPIRV_PARAM_DECOR_MD_KIND = "spirv.ParameterDecorations"; @@ -175,14 +177,15 @@ MDNode *attributeToDecorateMetadata(LLVMContext &Ctx, const Attribute &Attr) { /// Tries to generate a SPIR-V execution mode metadata node from an attribute. /// If the attribute is unknown \c None will be returned. /// -/// @param M [in] the LLVM module. +/// @param F [in] the LLVM function. /// @param Attr [in] the LLVM attribute to generate metadata for. /// /// @returns a pair with the name of the resulting metadata and a pointer to /// the metadata node with its values if the attribute has a /// corresponding SPIR-V execution mode. Otherwise \c None is returned. std::optional> -attributeToExecModeMetadata(Module &M, const Attribute &Attr) { +attributeToExecModeMetadata(Function &F, const Attribute &Attr) { + Module &M = *F.getParent(); LLVMContext &Ctx = M.getContext(); const DataLayout &DLayout = M.getDataLayout(); @@ -190,6 +193,16 @@ attributeToExecModeMetadata(Module &M, const Attribute &Attr) { if (!Attr.isStringAttribute()) return std::nullopt; StringRef AttrKindStr = Attr.getKindAsString(); + + if (AttrKindStr == SYCL_REGISTER_ALLOC_MODE_ATTR && + !llvm::esimd::isESIMD(F)) { + uint32_t RegAllocModeVal = getAttributeAsInteger(Attr); + Metadata *AttrMDArgs[] = {ConstantAsMetadata::get(Constant::getIntegerValue( + Type::getInt32Ty(Ctx), APInt(32, RegAllocModeVal)))}; + return std::pair(AttrKindStr.str(), + MDNode::get(Ctx, AttrMDArgs)); + } + // Early exit if it is not a sycl-* attribute. if (!AttrKindStr.startswith("sycl-")) return std::nullopt; @@ -420,7 +433,7 @@ PreservedAnalyses CompileTimePropertiesPass::run(Module &M, } else if (MDNode *SPIRVMetadata = attributeToDecorateMetadata(Ctx, Attribute)) MDOps.push_back(SPIRVMetadata); - else if (auto NamedMetadata = attributeToExecModeMetadata(M, Attribute)) + else if (auto NamedMetadata = attributeToExecModeMetadata(F, Attribute)) NamedMDOps.push_back(*NamedMetadata); } diff --git a/llvm/lib/SYCLLowerIR/ESIMD/ESIMDVerifier.cpp b/llvm/lib/SYCLLowerIR/ESIMD/ESIMDVerifier.cpp index 9db4d6a35e7d4..0c29357f7672f 100644 --- a/llvm/lib/SYCLLowerIR/ESIMD/ESIMDVerifier.cpp +++ b/llvm/lib/SYCLLowerIR/ESIMD/ESIMDVerifier.cpp @@ -59,7 +59,6 @@ static const char *LegalSYCLFunctions[] = { "^sycl::_V1::exp<.+>", "^sycl::_V1::bit_cast<.+>", "^sycl::_V1::operator.+<.+>", - "^sycl::_V1::ext::intel::experimental::set_kernel_properties", "^sycl::_V1::ext::oneapi::sub_group::.+", "^sycl::_V1::ext::oneapi::experimental::spec_constant<.+>::.+", "^sycl::_V1::ext::oneapi::experimental::this_sub_group", diff --git a/llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp b/llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp index f14b51e8a1a37..1efccb951fdbd 100644 --- a/llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp +++ b/llvm/lib/SYCLLowerIR/ESIMD/LowerESIMD.cpp @@ -1969,8 +1969,6 @@ size_t SYCLLowerESIMDPass::runOnFunction(Function &F, ToErase.push_back(CI); continue; } - assert(!Name.startswith("__sycl_set_kernel_properties") && - "__sycl_set_kernel_properties must have been lowered"); if (Name.empty() || (!Name.startswith(ESIMD_INTRIN_PREF1) && !isDevicelibFunction(Name))) diff --git a/llvm/lib/SYCLLowerIR/LowerKernelProps.cpp b/llvm/lib/SYCLLowerIR/LowerKernelProps.cpp deleted file mode 100644 index 5d576b7edfd73..0000000000000 --- a/llvm/lib/SYCLLowerIR/LowerKernelProps.cpp +++ /dev/null @@ -1,90 +0,0 @@ -//===---- LowerKernelProps.cpp - lower __sycl_set_kernel_properties ---===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// Finds and lowers __sycl_set_kernel_properties calls: converts the call to -// function attributes and adds those attributes to all kernels which can -// potentially call this intrinsic. - -#include "llvm/SYCLLowerIR/LowerKernelProps.h" -#include "llvm/SYCLLowerIR/SYCLUtils.h" - -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Operator.h" -#include "llvm/Pass.h" - -#define DEBUG_TYPE "LowerKernelProps" - -using namespace llvm; - -namespace { - -constexpr char SET_KERNEL_PROPS_FUNC_NAME[] = - "_Z28__sycl_set_kernel_propertiesi"; - -// Kernel property identifiers. Should match ones in -// sycl/include/sycl/ext/intel/experimental/kernel_properties.hpp -enum property_ids { use_large_grf = 0 }; - -void processSetKernelPropertiesCall(CallInst &CI) { - auto F = CI.getFunction(); - auto *ArgV = CI.getArgOperand(0); - - if (!isa(ArgV)) { - llvm::report_fatal_error( - llvm::Twine(__FILE__ " ") + - "integral constant is expected for set_kernel_properties"); - } - uint64_t PropID = cast(ArgV)->getZExtValue(); - - switch (PropID) { - case property_ids::use_large_grf: - // TODO: Keep track of traversed functions to avoid repeating traversals - // over same function. - llvm::sycl::utils::traverseCallgraphUp(F, [](Function *GraphNode) { - GraphNode->addFnAttr(::sycl::kernel_props::ATTR_LARGE_GRF); - // Add RegisterAllocMode metadata with arg 2 to the kernel to tell - // IGC to compile this kernel in large GRF mode. 2 means large. - if (GraphNode->getCallingConv() == CallingConv::SPIR_KERNEL && - !GraphNode->hasMetadata("sycl_explicit_simd")) { - auto &Ctx = GraphNode->getContext(); - Metadata *AttrMDArgs[] = {ConstantAsMetadata::get( - Constant::getIntegerValue(Type::getInt32Ty(Ctx), APInt(32, 2)))}; - GraphNode->setMetadata("RegisterAllocMode", - MDNode::get(Ctx, AttrMDArgs)); - } - }); - break; - default: - assert(false && "Invalid property id"); - } -} - -} // namespace - -namespace llvm { -PreservedAnalyses SYCLLowerKernelPropsPass::run(Module &M, - ModuleAnalysisManager &MAM) { - Function *F = M.getFunction(SET_KERNEL_PROPS_FUNC_NAME); - - if (!F) { - return PreservedAnalyses::all(); - } - bool Modified = false; - SmallVector Users(F->users()); - - for (User *Usr : Users) { - // a call can be the only use of the __sycl_set_kernel_properties built-in - CallInst *CI = cast(Usr); - processSetKernelPropertiesCall(*CI); - CI->eraseFromParent(); - Modified = true; - } - return Modified ? PreservedAnalyses::none() : PreservedAnalyses::all(); -} -} // namespace llvm diff --git a/llvm/test/SYCLLowerIR/CompileTimePropertiesPass/kernel-attributes/register-alloc-mode.ll b/llvm/test/SYCLLowerIR/CompileTimePropertiesPass/kernel-attributes/register-alloc-mode.ll new file mode 100644 index 0000000000000..e0d1b6dd7aa7f --- /dev/null +++ b/llvm/test/SYCLLowerIR/CompileTimePropertiesPass/kernel-attributes/register-alloc-mode.ll @@ -0,0 +1,35 @@ +; Check we create RegisterAllocMode metadata if there is a non-ESIMD kernel with that property +; RUN: opt -passes=compile-time-properties %s -S | FileCheck %s --check-prefix CHECK-IR + +; Function Attrs: convergent norecurse +define weak_odr dso_local spir_kernel void @sycl_regallocmode() #1 { +; CHECK-IR-NOT: !RegisterAllocMode +; CHECK-IR: sycl_regallocmode() #[[#Attr1:]]{{.*}}!RegisterAllocMode ![[#MDVal:]] { +; CHECK-IR-NOT: !RegisterAllocMode +; CHECK-IR: ![[#MDVal]] = !{i32 2} +entry: + ret void +} + +; Function Attrs: convergent norecurse +define weak_odr dso_local spir_kernel void @sycl_noregallocmode() #0 { +entry: + ret void +} + +; Function Attrs: convergent norecurse +define weak_odr dso_local spir_kernel void @esimd_regallocmode() #1 !sycl_explicit_simd !1 { +entry: + ret void +} + +; Function Attrs: convergent norecurse +define weak_odr dso_local spir_kernel void @esimd_noregallocmode() #0 { +entry: + ret void +} + +attributes #0 = { convergent norecurse } +attributes #1 = { convergent norecurse "RegisterAllocMode"="2" } + +!1 = !{} diff --git a/llvm/test/SYCLLowerIR/lower_kernel_props.ll b/llvm/test/SYCLLowerIR/lower_kernel_props.ll deleted file mode 100644 index f2cef0617739c..0000000000000 --- a/llvm/test/SYCLLowerIR/lower_kernel_props.ll +++ /dev/null @@ -1,44 +0,0 @@ -; This test checks handling of the -; __sycl_set_kernel_properties(...); -; intrinsic by LowerKernelProps pass - it should: -; - determine kernels calling this intrinsic (walk up the call graph) -; - remove the intrinsic call -; - mark the kernel with corresponding attribute (only "large-grf" for now) - -; RUN: opt -passes=lower-kernel-props -S %s -o - | FileCheck %s --implicit-check-not='RegisterAllocMode' - -; ModuleID = 'large_grf.bc' -source_filename = "llvm-link" -target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" -target triple = "spir64-unknown-unknown" - -define dso_local spir_func void @_Z17large_grf_markerv() { -; CHECK: define dso_local spir_func void @_Z17large_grf_markerv() -; -- '0' constant argument means "large GRF" property: - call spir_func void @_Z28__sycl_set_kernel_propertiesi(i32 noundef 0) -; -- Check that LowerKernelProps removed the marker call above: -; CHECK-NOT: {{.*}} @_Z28__sycl_set_kernel_propertiesi - ret void -; CHECK-NEXT: ret void -} - -declare dso_local spir_func void @_Z28__sycl_set_kernel_propertiesi(i32 noundef) - -; -- This kernel calls the marker function indirectly -define weak_odr dso_local spir_kernel void @__large_grf_kernel1() !sycl_explicit_simd !0 !intel_reqd_sub_group_size !1 { -; CHECK: {{.*}} spir_kernel void @__large_grf_kernel1() #0 - call spir_func void @_Z17large_grf_markerv() - ret void -} - -; -- This kernel calls the marker function directly -define weak_odr dso_local spir_kernel void @__large_grf_kernel2() #0 !sycl_explicit_simd !0 !intel_reqd_sub_group_size !1 { -; CHECK: {{.*}} spir_kernel void @__large_grf_kernel2() #0 {{.*}} - call spir_func void @_Z28__sycl_set_kernel_propertiesi(i32 noundef 0) - ret void -} - -attributes #0 = { "large-grf" } - -!0 = !{} -!1 = !{i32 1} diff --git a/llvm/test/tools/sycl-post-link/sycl-esimd-large-grf.ll b/llvm/test/tools/sycl-post-link/registerallocmode.ll similarity index 56% rename from llvm/test/tools/sycl-post-link/sycl-esimd-large-grf.ll rename to llvm/test/tools/sycl-post-link/registerallocmode.ll index 6bae075384405..2eb92ff00b372 100644 --- a/llvm/test/tools/sycl-post-link/sycl-esimd-large-grf.ll +++ b/llvm/test/tools/sycl-post-link/registerallocmode.ll @@ -1,34 +1,41 @@ -; This test checks handling of the -; set_kernel_properties(kernel_properties::use_large_grf); -; by the post-link-tool: -; - ESIMD/SYCL splitting happens as usual -; - ESIMD module is further split into callgraphs for entry points requesting -; "large GRF" and callgraphs for entry points which are not -; - Compiler adds 'isLargeGRF' property to the ESIMD device binary -; images requesting "large GRF" +; This test checks handling of RegisterAllocMode in SYCL post link ; RUN: sycl-post-link -split=source -symbols -split-esimd -lower-esimd -S < %s -o %t.table ; RUN: FileCheck %s -input-file=%t.table -; RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes CHECK-ESIMD-LargeGRF-IR --implicit-check-not='RegisterAllocMode' +; RUN: FileCheck %s -input-file=%t_esimd_0.ll --check-prefixes CHECK-ESIMD-LargeGRF-IR ; RUN: FileCheck %s -input-file=%t_esimd_0.prop --check-prefixes CHECK-ESIMD-LargeGRF-PROP +; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-SYCL-LargeGRF-IR +; RUN: FileCheck %s -input-file=%t_0.prop --check-prefixes CHECK-SYCL-LargeGRF-PROP +; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-SYCL-LargeGRF-SYM +; RUN: FileCheck %s -input-file=%t_1.prop --check-prefixes CHECK-SYCL-PROP ; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-SYCL-SYM -; RUN: FileCheck %s -input-file=%t_esimd_1.sym --check-prefixes CHECK-ESIMD-SYM +; RUN: FileCheck %s -input-file=%t_esimd_1.prop --check-prefixes CHECK-ESIMD-PROP ; RUN: FileCheck %s -input-file=%t_esimd_0.sym --check-prefixes CHECK-ESIMD-LargeGRF-SYM ; CHECK: [Code|Properties|Symbols] ; CHECK: {{.*}}_0.ll|{{.*}}_0.prop|{{.*}}_0.sym +; CHECK: {{.*}}_esimd_0.ll|{{.*}}_esimd_0.prop|{{.*}}_esimd_0.sym ; CHECK: {{.*}}_1.ll|{{.*}}_1.prop|{{.*}}_1.sym ; CHECK: {{.*}}_esimd_1.ll|{{.*}}_esimd_1.prop|{{.*}}_esimd_1.sym ; CHECK-ESIMD-LargeGRF-PROP: isEsimdImage=1|1 -; CHECK-ESIMD-LargeGRF-PROP: isLargeGRF=1|1 +; CHECK-ESIMD-LargeGRF-PROP: RegisterAllocMode=1|2 + +; CHECK-SYCL-LargeGRF-PROP: RegisterAllocMode=1|2 + +; CHECK-SYCL-PROP-NOT: RegisterAllocMode ; CHECK-SYCL-SYM: __SYCL_kernel ; CHECK-SYCL-SYM-EMPTY: +; CHECK-SYCL-LargeGRF-SYM: __SYCL_kernel_large_grf +; CHECK-SYCL-LargeGRF-SYM-EMPTY: + ; CHECK-ESIMD-SYM: __ESIMD_kernel ; CHECK-ESIMD-SYM-EMPTY: +; CHECK-ESIMD-PROP-NOT: RegisterAllocMode + ; CHECK-ESIMD-LargeGRF-SYM: __ESIMD_large_grf_kernel ; CHECK-ESIMD-LargeGRF-SYM-EMPTY: @@ -42,29 +49,26 @@ entry: ret void } -define weak_odr dso_local spir_kernel void @__ESIMD_kernel() #0 !sycl_explicit_simd !0 !intel_reqd_sub_group_size !1 { +define weak_odr dso_local spir_kernel void @__SYCL_kernel_large_grf() #1 { +; CHECK-SYCL-LargeGRF-IR: define {{.*}} spir_kernel void @__SYCL_kernel_large_grf() #[[#Attr:]] entry: ret void } -define dso_local spir_func void @_Z17large_grf_markerv() { +define weak_odr dso_local spir_kernel void @__ESIMD_kernel() #0 !sycl_explicit_simd !0 !intel_reqd_sub_group_size !1 { + entry: - call spir_func void @_Z28__sycl_set_kernel_propertiesi(i32 noundef 0) -; -- Check that ESIMD lowering removed the marker call above: -; CHECK-ESIMD-LargeGRF-IR-NOT: {{.*}} @_Z28__sycl_set_kernel_propertiesi ret void } -declare dso_local spir_func void @_Z28__sycl_set_kernel_propertiesi(i32 noundef) - -define weak_odr dso_local spir_kernel void @__ESIMD_large_grf_kernel() #0 !sycl_explicit_simd !0 !intel_reqd_sub_group_size !1 { -; CHECK-ESIMD-LargeGRF-IR: @__ESIMD_large_grf_kernel() {{.*}} +define weak_odr dso_local spir_kernel void @__ESIMD_large_grf_kernel() #1 !sycl_explicit_simd !0 !intel_reqd_sub_group_size !1 { +; CHECK-ESIMD-LargeGRF-IR: @__ESIMD_large_grf_kernel() entry: - call spir_func void @_Z17large_grf_markerv() ret void } attributes #0 = { "sycl-module-id"="a.cpp" } +attributes #1 = { "sycl-module-id"="a.cpp" "RegisterAllocMode"="2" } !0 = !{} !1 = !{i32 1} diff --git a/llvm/test/tools/sycl-post-link/sycl-large-grf.ll b/llvm/test/tools/sycl-post-link/sycl-large-grf.ll deleted file mode 100644 index 105926200b0e9..0000000000000 --- a/llvm/test/tools/sycl-post-link/sycl-large-grf.ll +++ /dev/null @@ -1,60 +0,0 @@ -; This test checks handling of the -; set_kernel_properties(kernel_properties::use_large_grf); -; by the post-link-tool: -; - ESIMD/SYCL splitting happens as usual -; - ESIMD module is further split into callgraphs for entry points requesting -; "large GRF" and callgraphs for entry points which are not -; - Compiler adds 'isLargeGRF' property to the device binary -; images requesting "large GRF" - -; RUN: sycl-post-link -split=source -symbols -split-esimd -lower-esimd -S < %s -o %t.table -; RUN: FileCheck %s -input-file=%t.table -; RUN: FileCheck %s -input-file=%t_0.ll --check-prefixes CHECK-LARGE-GRF-IR -; RUN: FileCheck %s -input-file=%t_0.prop --check-prefixes CHECK-LARGE-GRF-PROP -; RUN: FileCheck %s -input-file=%t_1.sym --check-prefixes CHECK-SYCL-SYM -; RUN: FileCheck %s -input-file=%t_0.sym --check-prefixes CHECK-LARGE-GRF-SYM - -; CHECK: [Code|Properties|Symbols] -; CHECK: {{.*}}_0.ll|{{.*}}_0.prop|{{.*}}_0.sym -; CHECK: {{.*}}_1.ll|{{.*}}_1.prop|{{.*}}_1.sym - -; CHECK-LARGE-GRF-PROP: isLargeGRF=1|1 - -; CHECK-SYCL-SYM: __SYCL_kernel -; CHECK-SYCL-SYM-EMPTY: - -; CHECK-LARGE-GRF-SYM: __large_grf_kernel -; CHECK-LARGE-GRF-SYM-EMPTY: - -; ModuleID = 'large_grf.bc' -source_filename = "llvm-link" -target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64" -target triple = "spir64-unknown-unknown" - -define weak_odr dso_local spir_kernel void @__SYCL_kernel() #0 { -entry: - ret void -} - -define dso_local spir_func void @_Z17large_grf_markerv() { -entry: - call spir_func void @_Z28__sycl_set_kernel_propertiesi(i32 noundef 0) -; -- Check that LowerKernelProps lowering removed the marker call above: -; CHECK-LARGE-GRF-IR-NOT: {{.*}} @_Z28__sycl_set_kernel_propertiesi - ret void -} - -declare dso_local spir_func void @_Z28__sycl_set_kernel_propertiesi(i32 noundef) - -define weak_odr dso_local spir_kernel void @__large_grf_kernel() #0 { -; CHECK-LARGE-GRF-IR: @__large_grf_kernel() {{.*}} !RegisterAllocMode ![[MetadataArg:[0-9]+]] -; CHECK-LARGE-GRF-IR: ![[MetadataArg]] = !{i32 2} -entry: - call spir_func void @_Z17large_grf_markerv() - ret void -} - -attributes #0 = { "sycl-module-id"="a.cpp" } - -!0 = !{} -!1 = !{i32 1} diff --git a/llvm/tools/sycl-post-link/ModuleSplitter.cpp b/llvm/tools/sycl-post-link/ModuleSplitter.cpp index b23bb7e4c0e1d..8c12526e3c630 100644 --- a/llvm/tools/sycl-post-link/ModuleSplitter.cpp +++ b/llvm/tools/sycl-post-link/ModuleSplitter.cpp @@ -20,7 +20,6 @@ #include "llvm/IR/Module.h" #include "llvm/SYCLLowerIR/DeviceGlobals.h" #include "llvm/SYCLLowerIR/LowerInvokeSimd.h" -#include "llvm/SYCLLowerIR/LowerKernelProps.h" #include "llvm/SYCLLowerIR/SYCLUtils.h" #include "llvm/Transforms/IPO.h" #include "llvm/Transforms/IPO/GlobalDCE.h" @@ -849,8 +848,7 @@ getDeviceCodeSplitter(ModuleDesc &&MD, IRSplitMode Mode, bool IROutputOnly, // Optional features // Note: Add more rules at the end of the list to avoid chaning orders of // output files in existing tests. - Categorizer.registerSimpleFlagAttributeRule( - ::sycl::kernel_props::ATTR_LARGE_GRF, "large-grf"); + Categorizer.registerSimpleStringAttributeRule("RegisterAllocMode"); Categorizer.registerListOfIntegersInMetadataSortedRule("sycl_used_aspects"); Categorizer.registerListOfIntegersInMetadataRule("reqd_work_group_size"); Categorizer.registerSimpleStringAttributeRule( diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index a676cab38fc59..cb0465a66c929 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -35,11 +35,11 @@ #include "llvm/IRReader/IRReader.h" #include "llvm/Linker/Linker.h" #include "llvm/Passes/PassBuilder.h" +#include "llvm/SYCLLowerIR/CompileTimePropertiesPass.h" #include "llvm/SYCLLowerIR/DeviceGlobals.h" #include "llvm/SYCLLowerIR/ESIMD/LowerESIMD.h" #include "llvm/SYCLLowerIR/HostPipes.h" #include "llvm/SYCLLowerIR/LowerInvokeSimd.h" -#include "llvm/SYCLLowerIR/LowerKernelProps.h" #include "llvm/SYCLLowerIR/SYCLUtils.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileSystem.h" @@ -455,14 +455,22 @@ std::string saveModuleProperties(module_split::ModuleDesc &MD, } { - // check for large GRF property - bool HasLargeGRF = llvm::any_of(MD.entries(), [](const Function *F) { - return F->hasFnAttribute(::sycl::kernel_props::ATTR_LARGE_GRF); + StringRef RegAllocModeAttr = "RegisterAllocMode"; + uint32_t RegAllocModeVal; + + bool HasRegAllocMode = llvm::any_of(MD.entries(), [&](const Function *F) { + if (!F->hasFnAttribute(RegAllocModeAttr)) + return false; + const auto &Attr = F->getFnAttribute(RegAllocModeAttr); + RegAllocModeVal = getAttributeAsInteger(Attr); + return true; }); - - if (HasLargeGRF) - PropSet[PropSetRegTy::SYCL_MISC_PROP].insert({"isLargeGRF", true}); + if (HasRegAllocMode) { + PropSet[PropSetRegTy::SYCL_MISC_PROP].insert( + {RegAllocModeAttr, RegAllocModeVal}); + } } + // FIXME: Remove 'if' below when possible // GPU backend has a problem with accepting optimization level options in form // described by Level Zero specification (-ze-opt-level=1) when 'invoke_simd' @@ -789,11 +797,6 @@ processInputModule(std::unique_ptr M) { } Modified |= InvokeSimdMet; - // Lower kernel properties setting APIs before "large GRF" splitting, as: - // - the latter uses the result of the former - // - saves processing time - Modified |= runModulePass(*M); - DUMP_ENTRY_POINTS(*M, EmitOnlyKernelsAsEntryPoints, "Input"); // -ir-output-only assumes single module output thus no code splitting. @@ -855,8 +858,6 @@ processInputModule(std::unique_ptr M) { if (!MDesc2.isSYCL() && LowerEsimd) { assert(MDesc2.isESIMD() && "NYI"); - // ESIMD lowering also detects large-GRF kernels, so it must happen - // before large-GRF split. Modified |= lowerEsimdConstructs(MDesc2); } MMs.emplace_back(std::move(MDesc2)); diff --git a/sycl/include/sycl/ext/intel/experimental/kernel_properties.hpp b/sycl/include/sycl/ext/intel/experimental/kernel_properties.hpp index 1161ec321d4f4..1f14b91293ec8 100644 --- a/sycl/include/sycl/ext/intel/experimental/kernel_properties.hpp +++ b/sycl/include/sycl/ext/intel/experimental/kernel_properties.hpp @@ -10,72 +10,58 @@ #pragma once -#include - -#include +#include +#include namespace sycl { __SYCL_INLINE_VER_NAMESPACE(_V1) { -namespace ext::intel::experimental { - -namespace kernel_properties { - -/// -/// This namespace contains APIs to set kernel properties. -/// +namespace ext { +namespace intel { +namespace experimental { -// Implementation note: ::value fields should match property IDs -// specified in llvm/lib/SYCLLowerIR/LowerKernelProps.cpp - -namespace detail { -// Proxy to access private property classes' fields from the API code. -template struct proxy { - static constexpr int value = T::value; +enum class RegisterAllocMode : uint32_t { + AUTO = 0, + LARGE = 2, }; -} // namespace detail -/// A boolean property which requests the compiler to use large register -/// allocation mode at the expense of reducing the amount of available hardware -/// threads. -struct use_large_grf_tag { - template friend struct detail::proxy; - -private: - // Property identifier - static constexpr int value = 0; +struct register_alloc_mode_key { + template + using value_t = oneapi::experimental::property_value< + register_alloc_mode_key, std::integral_constant>; }; -__SYCL_DEPRECATED("use_double_grf is deprecated, use use_large_grf instead") -inline constexpr use_large_grf_tag use_double_grf = {}; -inline constexpr use_large_grf_tag use_large_grf = {}; +template +inline constexpr register_alloc_mode_key::value_t register_alloc_mode; +} // namespace experimental +} // namespace intel + +namespace oneapi { +namespace experimental { -} // namespace kernel_properties +template <> +struct is_property_key + : std::true_type {}; -namespace __MP11_NS = sycl::detail::boost::mp11; +namespace detail { +template <> +struct PropertyToKind { + static constexpr PropKind Kind = PropKind::RegisterAllocMode; +}; -// TODO this should be replaced with the generic SYCL compile-time properites -// mechanism once implementaion is available. -// https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/proposed/sycl_ext_oneapi_kernel_properties.asciidoc +template <> +struct IsCompileTimeProperty + : std::true_type {}; -template -void set_kernel_properties(KernelProps... props) { - // TODO check for duplicates - using Props = __MP11_NS::mp_list; - __MP11_NS::mp_for_each([&](auto Prop) { - using PropT = decltype(Prop); - constexpr bool IsLargeGRF = - std::is_same_v; - if constexpr (IsLargeGRF) { - __sycl_set_kernel_properties( - kernel_properties::detail::proxy< - kernel_properties::use_large_grf_tag>::value); - } else { - static_assert(IsLargeGRF && - "set_kernel_properties: invalid kernel property"); - } - }); -} +template +struct PropertyMetaInfo< + intel::experimental::register_alloc_mode_key::value_t> { + static constexpr const char *name = "RegisterAllocMode"; + static constexpr intel::experimental::RegisterAllocMode value = Mode; +}; -} // namespace ext::intel::experimental +} // namespace detail +} // namespace experimental +} // namespace oneapi +} // namespace ext } // __SYCL_INLINE_VER_NAMESPACE(_V1) } // namespace sycl diff --git a/sycl/include/sycl/ext/oneapi/properties/property.hpp b/sycl/include/sycl/ext/oneapi/properties/property.hpp index aeefaee9a994d..2b0d2c1c3dd3c 100644 --- a/sycl/include/sycl/ext/oneapi/properties/property.hpp +++ b/sycl/include/sycl/ext/oneapi/properties/property.hpp @@ -194,8 +194,9 @@ enum PropKind : uint32_t { ReadyLatency = 28, UsesValid = 29, UseRootSync = 30, + RegisterAllocMode = 31, // PropKindSize must always be the last value. - PropKindSize = 31, + PropKindSize = 32, }; // This trait must be specialized for all properties and must have a unique diff --git a/sycl/include/sycl/sycl.hpp b/sycl/include/sycl/sycl.hpp index 4e243e56ff52f..ab70cb6db13ea 100644 --- a/sycl/include/sycl/sycl.hpp +++ b/sycl/include/sycl/sycl.hpp @@ -63,6 +63,7 @@ #include #endif #include +#include #include #include #include diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index fe6c48e319826..331dd30b7adb3 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -374,6 +374,29 @@ static std::string getUint32PropAsOptStr(const RTDeviceBinaryImage &Img, return temp; } +static void appendCompileOptionsForRegAllocMode(std::string &CompileOpts, + const RTDeviceBinaryImage &Img, + bool IsEsimdImage) { + pi_device_binary_property Prop = Img.getProperty("RegisterAllocMode"); + if (!Prop) + return; + uint32_t PropVal = DeviceBinaryProperty(Prop).asUint32(); + // 2 means Large GRF. + if (PropVal == 2) { + if (!CompileOpts.empty()) + CompileOpts += " "; + CompileOpts += IsEsimdImage ? "-doubleGRF" : "-ze-opt-large-register-file"; + } + // 0 means Auto GRF. + // TODO: Support Auto GRF for ESIMD once vc supports it. + if (PropVal == 0 && !IsEsimdImage) { + if (!CompileOpts.empty()) + CompileOpts += " "; + // This option works for both LO AND OCL backends. + CompileOpts += "-ze-intel-enable-auto-large-GRF-mode"; + } +} + static void appendCompileOptionsFromImage(std::string &CompileOpts, const RTDeviceBinaryImage &Img, const std::vector &Devs, @@ -393,9 +416,6 @@ static void appendCompileOptionsFromImage(std::string &CompileOpts, CompileOpts += std::string(TemporaryStr); } bool isEsimdImage = getUint32PropAsBool(Img, "isEsimdImage"); - // TODO: Remove isDoubleGRF check in next ABI break - bool isLargeGRF = getUint32PropAsBool(Img, "isLargeGRF") || - getUint32PropAsBool(Img, "isDoubleGRF"); // The -vc-codegen option is always preserved for ESIMD kernels, regardless // of the contents SYCL_PROGRAM_COMPILE_OPTIONS environment variable. if (isEsimdImage) { @@ -407,14 +427,8 @@ static void appendCompileOptionsFromImage(std::string &CompileOpts, if (detail::SYCLConfig::get() == 0) CompileOpts += " -disable-finalizer-msg"; } - if (isLargeGRF) { - if (!CompileOpts.empty()) - CompileOpts += " "; - // TODO: Don't check the property or pass these flags after the next ABI - // break. The behavior is now controlled through the RegisterAllocMode - // metadata. - CompileOpts += isEsimdImage ? "-doubleGRF" : "-ze-opt-large-register-file"; - } + + appendCompileOptionsForRegAllocMode(CompileOpts, Img, isEsimdImage); const auto &PlatformImpl = detail::getSyclObjImpl(Devs[0].get_platform()); diff --git a/sycl/test-e2e/DeviceCodeSplit/large-grf.cpp b/sycl/test-e2e/DeviceCodeSplit/grf.cpp similarity index 74% rename from sycl/test-e2e/DeviceCodeSplit/large-grf.cpp rename to sycl/test-e2e/DeviceCodeSplit/grf.cpp index 893ed0aa8c35c..018ac271f2409 100644 --- a/sycl/test-e2e/DeviceCodeSplit/large-grf.cpp +++ b/sycl/test-e2e/DeviceCodeSplit/grf.cpp @@ -5,14 +5,13 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// This test verifies effect of -// set_kernel_properties(kernel_properties::use_double_grf); +// This test verifies effect of the register_alloc_mode kernel property // API call in device code: // - ESIMD/SYCL splitting happens as usual -// - SYCL module is further split into callgraphs for entry points requesting -// "double GRF" and callgraphs for entry points which are not -// - SYCL device binary images requesting "double GRF" must be compiled with -// -ze-opt-large-register-file option +// - SYCL module is further split into callgraphs for entry points for +// each value +// - SYCL device binary images are compiled with the corresponding +// compiler option // REQUIRES: gpu-intel-pvc // UNSUPPORTED: cuda || hip @@ -22,17 +21,16 @@ // RUN: %{build} -o %t.out // RUN: env SYCL_PI_TRACE=-1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-NO-VAR // RUN: env SYCL_PROGRAM_COMPILE_OPTIONS="-g" SYCL_PI_TRACE=-1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-WITH-VAR -// RUN: %{build} -DUSE_LARGE_GRF=1 -o %t.out -// RUN: env SYCL_PI_TRACE=-1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-NO-VAR -// RUN: env SYCL_PROGRAM_COMPILE_OPTIONS="-g" SYCL_PI_TRACE=-1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-WITH-VAR - +// RUN: %{build} -DUSE_AUTO_GRF=1 -o %t.out +// RUN: env SYCL_PI_TRACE=-1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-AUTO-NO-VAR +// RUN: env SYCL_PROGRAM_COMPILE_OPTIONS="-g" SYCL_PI_TRACE=-1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-AUTO-WITH-VAR #include "../helpers.hpp" #include -#include #include using namespace sycl; using namespace sycl::ext::intel::experimental; +using namespace sycl::ext::oneapi::experimental; bool checkResult(const std::vector &A, int Inc) { int err_cnt = 0; @@ -54,16 +52,6 @@ bool checkResult(const std::vector &A, int Inc) { return true; } -// Make the double GRF request from non-inlineable function - compiler should -// mark the caller kernel as "double GRF" anyway. -__attribute__((noinline)) void double_grf_marker() { -#ifdef USE_LARGE_GRF - set_kernel_properties(kernel_properties::use_large_grf); -#else - set_kernel_properties(kernel_properties::use_double_grf); -#endif -} - int main(void) { constexpr unsigned Size = 32; constexpr unsigned VL = 16; @@ -79,7 +67,8 @@ int main(void) { queue q(sycl::gpu_selector_v, exceptionHandlerHelper); auto dev = q.get_device(); - std::cout << "Running on " << dev.get_info() << "\n"; + std::cout << "Running on " << dev.get_info() + << "\n"; auto e = q.submit([&](handler &cgh) { auto PA = bufa.get_access(cgh); @@ -101,17 +90,21 @@ int main(void) { try { buffer bufa(A.data(), range<1>(Size)); +#ifdef USE_AUTO_GRF + properties prop{register_alloc_mode}; +#else + properties prop{register_alloc_mode}; +#endif queue q(sycl::gpu_selector_v, exceptionHandlerHelper); auto dev = q.get_device(); - std::cout << "Running on " << dev.get_info() << "\n"; + std::cout << "Running on " << dev.get_info() + << "\n"; auto e = q.submit([&](handler &cgh) { auto PA = bufa.get_access(cgh); - cgh.parallel_for(Size, [=](id<1> i) { - double_grf_marker(); - PA[i] += 2; - }); + cgh.parallel_for( + Size, prop, [=](id<1> i) { PA[i] += 2; }); }); e.wait(); } catch (sycl::exception const &e) { @@ -120,9 +113,9 @@ int main(void) { } if (checkResult(A, 4)) { - std::cout << "DoubleGRF kernel passed\n"; + std::cout << "SpecifiedGRF kernel passed\n"; } else { - std::cout << "DoubleGRF kernel failed\n"; + std::cout << "SpecifiedGRF kernel failed\n"; return 1; } @@ -140,7 +133,9 @@ int main(void) { // CHECK-LABEL: ---> piProgramBuild( // CHECK-NO-VAR: -ze-opt-large-register-file // CHECK-WITH-VAR: -g -ze-opt-large-register-file +// CHECK-AUTO-NO-VAR: -ze-intel-enable-auto-large-GRF-mode +// CHECK-AUTO-WITH-VAR: -g -ze-intel-enable-auto-large-GRF-mode // CHECK: ) ---> pi_result : PI_SUCCESS // CHECK-LABEL: ---> piKernelCreate( -// CHECK: : {{.*}}DoubleGRF +// CHECK: : {{.*}}SpecifiedGRF // CHECK: ) ---> pi_result : PI_SUCCESS diff --git a/sycl/test-e2e/ESIMD/large-grf.cpp b/sycl/test-e2e/ESIMD/large-grf.cpp index 0e1af71c628f8..dc812774efb95 100644 --- a/sycl/test-e2e/ESIMD/large-grf.cpp +++ b/sycl/test-e2e/ESIMD/large-grf.cpp @@ -5,14 +5,13 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// This test verifies effect of -// set_kernel_properties(kernel_properties::use_large_grf); +// This test verifies effect of the register_alloc_mode kernel property // API call in device code: // - ESIMD/SYCL splitting happens as usual -// - ESIMD module is further split into callgraphs for entry points requesting -// "large GRF" and callgraphs for entry points which are not -// - ESIMD device binary images requesting "large GRF" must be compiled with -// -doubleGRF option +// - ESIMD module is further split into callgraphs for entry points for +// each value +// - ESIMD device binary images are compiled with the corresponding +// compiler option // REQUIRES: gpu-intel-pvc // TODO/FIXME: esimd_emulator does not support online compilation that @@ -26,7 +25,6 @@ #include #include -#include #include using namespace sycl; @@ -54,12 +52,6 @@ bool checkResult(const std::vector &A, int Inc) { return true; } -// Make the large GRF request from non-inlineable function - compiler should -// mark the caller kernel as "large GRF" anyway. -__attribute__((noinline)) void large_grf_marker() { - set_kernel_properties(kernel_properties::use_large_grf); -} - int main(void) { constexpr unsigned Size = 32; constexpr unsigned VL = 16; @@ -128,15 +120,16 @@ int main(void) { try { buffer bufa(A.data(), range<1>(Size)); queue q(esimd_test::ESIMDSelector, esimd_test::createExceptionHandler()); - + sycl::ext::oneapi::experimental::properties prop{ + sycl::ext::intel::experimental::register_alloc_mode< + sycl::ext::intel::experimental::RegisterAllocMode::LARGE>}; auto dev = q.get_device(); std::cout << "Running on " << dev.get_info() << "\n"; auto e = q.submit([&](handler &cgh) { auto PA = bufa.get_access(cgh); cgh.parallel_for( - Size, [=](id<1> i) SYCL_ESIMD_KERNEL { - large_grf_marker(); + Size, prop, [=](id<1> i) SYCL_ESIMD_KERNEL { unsigned int offset = i * VL * sizeof(float); simd va; va.copy_from(PA, offset); From acc7dd258e60a99a42db25ec3d1f521df2a7f255 Mon Sep 17 00:00:00 2001 From: "Sarnie, Nick" Date: Tue, 16 May 2023 08:02:52 -0700 Subject: [PATCH 2/5] address runtime fedback Signed-off-by: Sarnie, Nick --- .../SYCLLowerIR/CompileTimePropertiesPass.cpp | 20 ++++++------ .../kernel-attributes/register-alloc-mode.ll | 2 +- llvm/tools/sycl-post-link/ModuleSplitter.cpp | 2 +- llvm/tools/sycl-post-link/sycl-post-link.cpp | 2 +- sycl/CMakeLists.txt | 2 +- .../intel/experimental/kernel_properties.hpp | 32 ++++++++----------- .../program_manager/program_manager.cpp | 15 ++++++--- sycl/test-e2e/DeviceCodeSplit/grf.cpp | 4 +-- sycl/test-e2e/ESIMD/large-grf.cpp | 2 +- 9 files changed, 41 insertions(+), 40 deletions(-) diff --git a/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp b/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp index 3ce1bd4a532e3..b378a5c0df077 100644 --- a/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp +++ b/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp @@ -28,7 +28,7 @@ namespace { constexpr StringRef SYCL_HOST_ACCESS_ATTR = "sycl-host-access"; constexpr StringRef SYCL_PIPELINED_ATTR = "sycl-pipelined"; -constexpr StringRef SYCL_REGISTER_ALLOC_MODE_ATTR = "RegisterAllocMode"; +constexpr StringRef SYCL_REGISTER_ALLOC_MODE_ATTR = "sycl-register-alloc-mode"; constexpr StringRef SPIRV_DECOR_MD_KIND = "spirv.Decorations"; constexpr StringRef SPIRV_PARAM_DECOR_MD_KIND = "spirv.ParameterDecorations"; @@ -194,15 +194,6 @@ attributeToExecModeMetadata(Function &F, const Attribute &Attr) { return std::nullopt; StringRef AttrKindStr = Attr.getKindAsString(); - if (AttrKindStr == SYCL_REGISTER_ALLOC_MODE_ATTR && - !llvm::esimd::isESIMD(F)) { - uint32_t RegAllocModeVal = getAttributeAsInteger(Attr); - Metadata *AttrMDArgs[] = {ConstantAsMetadata::get(Constant::getIntegerValue( - Type::getInt32Ty(Ctx), APInt(32, RegAllocModeVal)))}; - return std::pair(AttrKindStr.str(), - MDNode::get(Ctx, AttrMDArgs)); - } - // Early exit if it is not a sycl-* attribute. if (!AttrKindStr.startswith("sycl-")) return std::nullopt; @@ -280,6 +271,15 @@ attributeToExecModeMetadata(Function &F, const Attribute &Attr) { return std::pair("ip_interface", getIpInterface("csr", Ctx, Attr)); + if (AttrKindStr == SYCL_REGISTER_ALLOC_MODE_ATTR && + !llvm::esimd::isESIMD(F)) { + uint32_t RegAllocModeVal = getAttributeAsInteger(Attr); + Metadata *AttrMDArgs[] = {ConstantAsMetadata::get(Constant::getIntegerValue( + Type::getInt32Ty(Ctx), APInt(32, RegAllocModeVal)))}; + return std::pair("RegisterAllocMode", + MDNode::get(Ctx, AttrMDArgs)); + } + return std::nullopt; } diff --git a/llvm/test/SYCLLowerIR/CompileTimePropertiesPass/kernel-attributes/register-alloc-mode.ll b/llvm/test/SYCLLowerIR/CompileTimePropertiesPass/kernel-attributes/register-alloc-mode.ll index e0d1b6dd7aa7f..d7b27b33643b4 100644 --- a/llvm/test/SYCLLowerIR/CompileTimePropertiesPass/kernel-attributes/register-alloc-mode.ll +++ b/llvm/test/SYCLLowerIR/CompileTimePropertiesPass/kernel-attributes/register-alloc-mode.ll @@ -30,6 +30,6 @@ entry: } attributes #0 = { convergent norecurse } -attributes #1 = { convergent norecurse "RegisterAllocMode"="2" } +attributes #1 = { convergent norecurse "sycl-register-alloc-mode"="2" } !1 = !{} diff --git a/llvm/tools/sycl-post-link/ModuleSplitter.cpp b/llvm/tools/sycl-post-link/ModuleSplitter.cpp index 8c12526e3c630..49aa88b6290db 100644 --- a/llvm/tools/sycl-post-link/ModuleSplitter.cpp +++ b/llvm/tools/sycl-post-link/ModuleSplitter.cpp @@ -848,7 +848,7 @@ getDeviceCodeSplitter(ModuleDesc &&MD, IRSplitMode Mode, bool IROutputOnly, // Optional features // Note: Add more rules at the end of the list to avoid chaning orders of // output files in existing tests. - Categorizer.registerSimpleStringAttributeRule("RegisterAllocMode"); + Categorizer.registerSimpleStringAttributeRule("sycl-register-alloc-mode"); Categorizer.registerListOfIntegersInMetadataSortedRule("sycl_used_aspects"); Categorizer.registerListOfIntegersInMetadataRule("reqd_work_group_size"); Categorizer.registerSimpleStringAttributeRule( diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index cb0465a66c929..5866d3e0ff07a 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -455,7 +455,7 @@ std::string saveModuleProperties(module_split::ModuleDesc &MD, } { - StringRef RegAllocModeAttr = "RegisterAllocMode"; + StringRef RegAllocModeAttr = "sycl-register-alloc-mode"; uint32_t RegAllocModeVal; bool HasRegAllocMode = llvm::any_of(MD.entries(), [&](const Function *F) { diff --git a/sycl/CMakeLists.txt b/sycl/CMakeLists.txt index 34f93b9f94453..31b05dee0f234 100644 --- a/sycl/CMakeLists.txt +++ b/sycl/CMakeLists.txt @@ -36,7 +36,7 @@ set(SYCL_MAJOR_VERSION 7) set(SYCL_MINOR_VERSION 0) set(SYCL_PATCH_VERSION 0) -set(SYCL_DEV_ABI_VERSION 1) +set(SYCL_DEV_ABI_VERSION 2) if (SYCL_ADD_DEV_VERSION_POSTFIX) set(SYCL_VERSION_POSTFIX "-${SYCL_DEV_ABI_VERSION}") endif() diff --git a/sycl/include/sycl/ext/intel/experimental/kernel_properties.hpp b/sycl/include/sycl/ext/intel/experimental/kernel_properties.hpp index 1f14b91293ec8..bb04520cee096 100644 --- a/sycl/include/sycl/ext/intel/experimental/kernel_properties.hpp +++ b/sycl/include/sycl/ext/intel/experimental/kernel_properties.hpp @@ -16,28 +16,25 @@ namespace sycl { __SYCL_INLINE_VER_NAMESPACE(_V1) { namespace ext { -namespace intel { -namespace experimental { +namespace intel::experimental { -enum class RegisterAllocMode : uint32_t { - AUTO = 0, - LARGE = 2, +enum class register_alloc_mode_enum : uint32_t { + automatic = 0, + large = 2, }; struct register_alloc_mode_key { - template + template using value_t = oneapi::experimental::property_value< - register_alloc_mode_key, std::integral_constant>; + register_alloc_mode_key, + std::integral_constant>; }; -template +template inline constexpr register_alloc_mode_key::value_t register_alloc_mode; -} // namespace experimental -} // namespace intel - -namespace oneapi { -namespace experimental { +} // namespace intel::experimental +namespace oneapi::experimental { template <> struct is_property_key : std::true_type {}; @@ -52,16 +49,15 @@ template <> struct IsCompileTimeProperty : std::true_type {}; -template +template struct PropertyMetaInfo< intel::experimental::register_alloc_mode_key::value_t> { - static constexpr const char *name = "RegisterAllocMode"; - static constexpr intel::experimental::RegisterAllocMode value = Mode; + static constexpr const char *name = "sycl-register-alloc-mode"; + static constexpr intel::experimental::register_alloc_mode_enum value = Mode; }; } // namespace detail -} // namespace experimental -} // namespace oneapi +} // namespace oneapi::experimental } // namespace ext } // __SYCL_INLINE_VER_NAMESPACE(_V1) } // namespace sycl diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 331dd30b7adb3..e65c08fd31d0a 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -377,19 +378,23 @@ static std::string getUint32PropAsOptStr(const RTDeviceBinaryImage &Img, static void appendCompileOptionsForRegAllocMode(std::string &CompileOpts, const RTDeviceBinaryImage &Img, bool IsEsimdImage) { - pi_device_binary_property Prop = Img.getProperty("RegisterAllocMode"); + pi_device_binary_property Prop = Img.getProperty("sycl-register-alloc-mode"); if (!Prop) return; uint32_t PropVal = DeviceBinaryProperty(Prop).asUint32(); - // 2 means Large GRF. - if (PropVal == 2) { + if (PropVal == + static_cast( + ext::intel::experimental::register_alloc_mode_enum::large)) { if (!CompileOpts.empty()) CompileOpts += " "; + // This option works for both LO AND OCL backends. CompileOpts += IsEsimdImage ? "-doubleGRF" : "-ze-opt-large-register-file"; } - // 0 means Auto GRF. // TODO: Support Auto GRF for ESIMD once vc supports it. - if (PropVal == 0 && !IsEsimdImage) { + if (PropVal == + static_cast( + ext::intel::experimental::register_alloc_mode_enum::automatic) && + !IsEsimdImage) { if (!CompileOpts.empty()) CompileOpts += " "; // This option works for both LO AND OCL backends. diff --git a/sycl/test-e2e/DeviceCodeSplit/grf.cpp b/sycl/test-e2e/DeviceCodeSplit/grf.cpp index 018ac271f2409..56d2d65956367 100644 --- a/sycl/test-e2e/DeviceCodeSplit/grf.cpp +++ b/sycl/test-e2e/DeviceCodeSplit/grf.cpp @@ -91,9 +91,9 @@ int main(void) { try { buffer bufa(A.data(), range<1>(Size)); #ifdef USE_AUTO_GRF - properties prop{register_alloc_mode}; + properties prop{register_alloc_mode}; #else - properties prop{register_alloc_mode}; + properties prop{register_alloc_mode}; #endif queue q(sycl::gpu_selector_v, exceptionHandlerHelper); diff --git a/sycl/test-e2e/ESIMD/large-grf.cpp b/sycl/test-e2e/ESIMD/large-grf.cpp index dc812774efb95..14f2c45b5b793 100644 --- a/sycl/test-e2e/ESIMD/large-grf.cpp +++ b/sycl/test-e2e/ESIMD/large-grf.cpp @@ -122,7 +122,7 @@ int main(void) { queue q(esimd_test::ESIMDSelector, esimd_test::createExceptionHandler()); sycl::ext::oneapi::experimental::properties prop{ sycl::ext::intel::experimental::register_alloc_mode< - sycl::ext::intel::experimental::RegisterAllocMode::LARGE>}; + sycl::ext::intel::experimental::register_alloc_mode_enum::large>}; auto dev = q.get_device(); std::cout << "Running on " << dev.get_info() << "\n"; From 942ff8c8824a1f472cebe86156767ca7e6a26bee Mon Sep 17 00:00:00 2001 From: "Sarnie, Nick" Date: Tue, 16 May 2023 09:27:22 -0700 Subject: [PATCH 3/5] whitespace Signed-off-by: Sarnie, Nick --- llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp b/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp index b378a5c0df077..557a1590f185a 100644 --- a/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp +++ b/llvm/lib/SYCLLowerIR/CompileTimePropertiesPass.cpp @@ -193,7 +193,6 @@ attributeToExecModeMetadata(Function &F, const Attribute &Attr) { if (!Attr.isStringAttribute()) return std::nullopt; StringRef AttrKindStr = Attr.getKindAsString(); - // Early exit if it is not a sycl-* attribute. if (!AttrKindStr.startswith("sycl-")) return std::nullopt; From 96b292d3a69a0639a512e8d79449df43f05ba6f9 Mon Sep 17 00:00:00 2001 From: "Sarnie, Nick" Date: Tue, 16 May 2023 10:23:23 -0700 Subject: [PATCH 4/5] use detail namespace Signed-off-by: Sarnie, Nick --- .../kernel_properties.hpp | 30 ++++++++----------- sycl/include/sycl/sycl.hpp | 1 - .../program_manager/program_manager.cpp | 10 ++----- sycl/test-e2e/DeviceCodeSplit/grf.cpp | 2 ++ sycl/test-e2e/ESIMD/large-grf.cpp | 5 ++-- 5 files changed, 20 insertions(+), 28 deletions(-) rename sycl/include/sycl/{ext/intel/experimental => detail}/kernel_properties.hpp (65%) diff --git a/sycl/include/sycl/ext/intel/experimental/kernel_properties.hpp b/sycl/include/sycl/detail/kernel_properties.hpp similarity index 65% rename from sycl/include/sycl/ext/intel/experimental/kernel_properties.hpp rename to sycl/include/sycl/detail/kernel_properties.hpp index bb04520cee096..01569922c39e5 100644 --- a/sycl/include/sycl/ext/intel/experimental/kernel_properties.hpp +++ b/sycl/include/sycl/detail/kernel_properties.hpp @@ -15,9 +15,7 @@ namespace sycl { __SYCL_INLINE_VER_NAMESPACE(_V1) { -namespace ext { -namespace intel::experimental { - +namespace detail { enum class register_alloc_mode_enum : uint32_t { automatic = 0, large = 2, @@ -25,39 +23,35 @@ enum class register_alloc_mode_enum : uint32_t { struct register_alloc_mode_key { template - using value_t = oneapi::experimental::property_value< + using value_t = sycl::ext::oneapi::experimental::property_value< register_alloc_mode_key, std::integral_constant>; }; template inline constexpr register_alloc_mode_key::value_t register_alloc_mode; -} // namespace intel::experimental +} // namespace detail -namespace oneapi::experimental { +namespace ext::oneapi::experimental { template <> -struct is_property_key - : std::true_type {}; +struct is_property_key : std::true_type { +}; namespace detail { -template <> -struct PropertyToKind { +template <> struct PropertyToKind { static constexpr PropKind Kind = PropKind::RegisterAllocMode; }; template <> -struct IsCompileTimeProperty +struct IsCompileTimeProperty : std::true_type {}; -template -struct PropertyMetaInfo< - intel::experimental::register_alloc_mode_key::value_t> { +template +struct PropertyMetaInfo> { static constexpr const char *name = "sycl-register-alloc-mode"; - static constexpr intel::experimental::register_alloc_mode_enum value = Mode; + static constexpr sycl::detail::register_alloc_mode_enum value = Mode; }; - } // namespace detail -} // namespace oneapi::experimental -} // namespace ext +} // namespace ext::oneapi::experimental } // __SYCL_INLINE_VER_NAMESPACE(_V1) } // namespace sycl diff --git a/sycl/include/sycl/sycl.hpp b/sycl/include/sycl/sycl.hpp index ab70cb6db13ea..4e243e56ff52f 100644 --- a/sycl/include/sycl/sycl.hpp +++ b/sycl/include/sycl/sycl.hpp @@ -63,7 +63,6 @@ #include #endif #include -#include #include #include #include diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index e65c08fd31d0a..010f57494fd68 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -22,12 +22,12 @@ #include #include #include +#include #include #include #include #include #include -#include #include #include @@ -382,18 +382,14 @@ static void appendCompileOptionsForRegAllocMode(std::string &CompileOpts, if (!Prop) return; uint32_t PropVal = DeviceBinaryProperty(Prop).asUint32(); - if (PropVal == - static_cast( - ext::intel::experimental::register_alloc_mode_enum::large)) { + if (PropVal == static_cast(register_alloc_mode_enum::large)) { if (!CompileOpts.empty()) CompileOpts += " "; // This option works for both LO AND OCL backends. CompileOpts += IsEsimdImage ? "-doubleGRF" : "-ze-opt-large-register-file"; } // TODO: Support Auto GRF for ESIMD once vc supports it. - if (PropVal == - static_cast( - ext::intel::experimental::register_alloc_mode_enum::automatic) && + if (PropVal == static_cast(register_alloc_mode_enum::automatic) && !IsEsimdImage) { if (!CompileOpts.empty()) CompileOpts += " "; diff --git a/sycl/test-e2e/DeviceCodeSplit/grf.cpp b/sycl/test-e2e/DeviceCodeSplit/grf.cpp index 56d2d65956367..26ce23b18f8b1 100644 --- a/sycl/test-e2e/DeviceCodeSplit/grf.cpp +++ b/sycl/test-e2e/DeviceCodeSplit/grf.cpp @@ -26,9 +26,11 @@ // RUN: env SYCL_PROGRAM_COMPILE_OPTIONS="-g" SYCL_PI_TRACE=-1 %{run} %t.out 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-AUTO-WITH-VAR #include "../helpers.hpp" #include +#include #include using namespace sycl; +using namespace sycl::detail; using namespace sycl::ext::intel::experimental; using namespace sycl::ext::oneapi::experimental; diff --git a/sycl/test-e2e/ESIMD/large-grf.cpp b/sycl/test-e2e/ESIMD/large-grf.cpp index 14f2c45b5b793..079834fc07c07 100644 --- a/sycl/test-e2e/ESIMD/large-grf.cpp +++ b/sycl/test-e2e/ESIMD/large-grf.cpp @@ -24,10 +24,12 @@ #include "esimd_test_utils.hpp" #include +#include #include #include using namespace sycl; +using namespace sycl::detail; using namespace sycl::ext::intel::esimd; using namespace sycl::ext::intel::experimental; using namespace sycl::ext::intel::experimental::esimd; @@ -121,8 +123,7 @@ int main(void) { buffer bufa(A.data(), range<1>(Size)); queue q(esimd_test::ESIMDSelector, esimd_test::createExceptionHandler()); sycl::ext::oneapi::experimental::properties prop{ - sycl::ext::intel::experimental::register_alloc_mode< - sycl::ext::intel::experimental::register_alloc_mode_enum::large>}; + register_alloc_mode}; auto dev = q.get_device(); std::cout << "Running on " << dev.get_info() << "\n"; From 0865098ed7fb813ba34b425e71976959d3abd525 Mon Sep 17 00:00:00 2001 From: "Sarnie, Nick" Date: Tue, 16 May 2023 11:14:44 -0700 Subject: [PATCH 5/5] fix lit test Signed-off-by: Sarnie, Nick --- llvm/test/tools/sycl-post-link/registerallocmode.ll | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/llvm/test/tools/sycl-post-link/registerallocmode.ll b/llvm/test/tools/sycl-post-link/registerallocmode.ll index 2eb92ff00b372..1afdc3023a6df 100644 --- a/llvm/test/tools/sycl-post-link/registerallocmode.ll +++ b/llvm/test/tools/sycl-post-link/registerallocmode.ll @@ -19,11 +19,11 @@ ; CHECK: {{.*}}_esimd_1.ll|{{.*}}_esimd_1.prop|{{.*}}_esimd_1.sym ; CHECK-ESIMD-LargeGRF-PROP: isEsimdImage=1|1 -; CHECK-ESIMD-LargeGRF-PROP: RegisterAllocMode=1|2 +; CHECK-ESIMD-LargeGRF-PROP: sycl-register-alloc-mode=1|2 -; CHECK-SYCL-LargeGRF-PROP: RegisterAllocMode=1|2 +; CHECK-SYCL-LargeGRF-PROP: sycl-register-alloc-mode=1|2 -; CHECK-SYCL-PROP-NOT: RegisterAllocMode +; CHECK-SYCL-PROP-NOT: sycl-register-alloc-mode ; CHECK-SYCL-SYM: __SYCL_kernel ; CHECK-SYCL-SYM-EMPTY: @@ -34,7 +34,7 @@ ; CHECK-ESIMD-SYM: __ESIMD_kernel ; CHECK-ESIMD-SYM-EMPTY: -; CHECK-ESIMD-PROP-NOT: RegisterAllocMode +; CHECK-ESIMD-PROP-NOT: sycl-register-alloc-mode ; CHECK-ESIMD-LargeGRF-SYM: __ESIMD_large_grf_kernel ; CHECK-ESIMD-LargeGRF-SYM-EMPTY: @@ -68,7 +68,7 @@ entry: } attributes #0 = { "sycl-module-id"="a.cpp" } -attributes #1 = { "sycl-module-id"="a.cpp" "RegisterAllocMode"="2" } +attributes #1 = { "sycl-module-id"="a.cpp" "sycl-register-alloc-mode"="2" } !0 = !{} !1 = !{i32 1}