From 9ed078bbd44fb3020cfa45fd4794557e4bc3ca25 Mon Sep 17 00:00:00 2001 From: Elizabeth Andrews Date: Tue, 8 Dec 2020 08:17:14 -0800 Subject: [PATCH] [SYCL] Add Clang support for FPGA loop fusion function attributes This patch adds support for FPGA function attributes loop_fuse and loop_fuse_independent. [[intel::loop_fuse(N)]] is a strong request, to the extent possible, to fuse loops within the function, that are contained in at most N-1 other loops within the function. If the optional parameter N is omitted, it is a strong request, to the extent possible, to fuse loops within the function that are not contained in any other loop within the function. [[intel::loop_fuse_independent(N)]] is used to guarantee that fusion safety analysis can ignore negative-distance dependences between these loops. FrontEnd Specifications: The attributes take one optional parameter, a constant integral expression between 0 and 1024*1024. The paramter may be a template parameter. The same function definition can have atmost one of these two attributes. The attributes can be applied explictly to kernel. However, attributes should not be propagated to callers i.e it should not be propagated from device functions to kernel. LLVM IR is function metadata as follows: define i32 @foo() !loop_fuse !0 !0 = !{i32 N, i32 D} where N is the value specified by the optional attribute argument. If the optional argument is omitted, N is set to 1. D is equal to 0 for [[intel::loop_fuse]] and 1 for [[intel::loop_fuse_independent]]. Signed-off-by: Elizabeth Andrews --- clang/include/clang/Basic/Attr.td | 34 ++++++ clang/include/clang/Basic/AttrDocs.td | 51 +++++++++ .../include/clang/Basic/AttributeCommonInfo.h | 4 +- clang/include/clang/Sema/Sema.h | 6 ++ clang/lib/CodeGen/CodeGenFunction.cpp | 39 +++++++ clang/lib/Sema/SemaDecl.cpp | 5 + clang/lib/Sema/SemaDeclAttr.cpp | 86 ++++++++++++--- clang/lib/Sema/SemaSYCL.cpp | 19 ++++ .../lib/Sema/SemaTemplateInstantiateDecl.cpp | 17 ++- clang/test/CodeGenSYCL/loop_fuse_device.cpp | 52 +++++++++ clang/test/CodeGenSYCL/loop_fuse_host.cpp | 44 ++++++++ .../test/CodeGenSYCL/loop_fuse_ind_device.cpp | 52 +++++++++ clang/test/CodeGenSYCL/loop_fuse_ind_host.cpp | 44 ++++++++ clang/test/SemaSYCL/loop_fusion.cpp | 102 ++++++++++++++++++ 14 files changed, 540 insertions(+), 15 deletions(-) create mode 100644 clang/test/CodeGenSYCL/loop_fuse_device.cpp create mode 100644 clang/test/CodeGenSYCL/loop_fuse_host.cpp create mode 100644 clang/test/CodeGenSYCL/loop_fuse_ind_device.cpp create mode 100644 clang/test/CodeGenSYCL/loop_fuse_ind_host.cpp create mode 100644 clang/test/SemaSYCL/loop_fusion.cpp diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 77aa73dca918..2a8324157a21 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -1300,6 +1300,40 @@ def SYCLIntelNoGlobalWorkOffset : InheritableAttr { let PragmaAttributeSupport = 0; } +def SYCLIntelLoopFuse : InheritableAttr { + let Spellings = [CXX11<"intel","loop_fuse">]; + let Args = [ExprArgument<"Value", /*optional=*/ 1>]; + let LangOpts = [SYCLIsDevice, SYCLIsHost]; + let Subjects = SubjectList<[Function], ErrorDiag>; + let Documentation = [SYCLIntelLoopFuseDocs]; + let PragmaAttributeSupport = 0; + let AdditionalMembers = [{ + static unsigned getMinValue() { + return 0; + } + static unsigned getMaxValue() { + return 1024*1024; + } + }]; +} + +def SYCLIntelLoopFuseIndependent : InheritableAttr { + let Spellings = [CXX11<"intel","loop_fuse_independent">]; + let Args = [ExprArgument<"Value", /*optional=*/ 1>]; + let LangOpts = [SYCLIsDevice, SYCLIsHost]; + let Subjects = SubjectList<[Function], ErrorDiag>; + let Documentation = [SYCLIntelLoopFuseIndependentDocs]; + let PragmaAttributeSupport = 0; + let AdditionalMembers = [{ + static unsigned getMinValue() { + return 0; + } + static unsigned getMaxValue() { + return 1024*1024; + } + }]; +} + def C11NoReturn : InheritableAttr { let Spellings = [Keyword<"_Noreturn">]; let Subjects = SubjectList<[Function], ErrorDiag>; diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 807bfc51d8ca..14678bb925fc 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -2608,6 +2608,57 @@ loop should not be fused with any adjacent loop. }]; } +def SYCLIntelLoopFuseDocs : Documentation { + let Category = DocCatFunction; + let Heading = "intel::loop_fuse"; + let Content = [{ +``[[intel::loop_fuse(N)]]`` attribute applies to a function/lambda function. It +is a strong request, to the extent possible, to fuse the loops within the +function, that are contained in at most N-1 other loops within the function. If +the optional parameter N is omitted, it is a strong request, to the extent possible, +to fuse loops within the function that are not contained in any other loop within +the function. This attribute should be passed through to the FPGA backend and +ignored by the emulator. ``[[intel::loop_fuse(N)]]`` should not be propagated to +the caller. + +.. code-block:: c++ + + [[intel::loop_fuse(N)]] + int foo() {} + +``[[intel::loop_fuse(N)]]`` takes one optional parameter, a constant integral +expression N with value greater than or equal to 0. The parameter N may be a +template parameter. + + }]; +} + +def SYCLIntelLoopFuseIndependentDocs : Documentation { + let Category = DocCatFunction; + let Heading = "intel::loop_fuse_independent"; + let Content = [{ +``[[intel::loop_fuse_independent(N)]]`` attribute applies to a function/lambda function. +It is a strong request, to the extent possible, to fuse the loops within the +function, that are contained in at most N-1 other loops within the function. It also +guarantees that fusion safety analysis can ignore negative-distance dependences between +these loops. If the optional parameter N is omitted, it is a strong request, to the extent +possible, to fuse loops within the function that are not contained in any other loop within +the function. This attribute should be passed through to the FPGA backend and +ignored by the emulator. ``[[intel::loop_fuse_independent(N)]]`` should not be propagated +to the caller. + +.. code-block:: c++ + + [[intel::loop_fuse_independent(N)]] + int foo() {} + +``[[intel::loop_fuse_independent(N)]]`` takes one optional parameter, a constant integral +expression N with value greater than or equal to 0. The parameter N may be a template +parameter. + + }]; +} + def SYCLDeviceIndirectlyCallableDocs : Documentation { let Category = DocCatFunction; let Heading = "intel::device_indirectly_callable"; diff --git a/clang/include/clang/Basic/AttributeCommonInfo.h b/clang/include/clang/Basic/AttributeCommonInfo.h index e58613d61af1..374119613628 100644 --- a/clang/include/clang/Basic/AttributeCommonInfo.h +++ b/clang/include/clang/Basic/AttributeCommonInfo.h @@ -166,7 +166,9 @@ class AttributeCommonInfo { ParsedAttr == AT_SYCLIntelMaxWorkGroupSize || ParsedAttr == AT_SYCLIntelMaxGlobalWorkDim || ParsedAttr == AT_SYCLIntelNoGlobalWorkOffset || - ParsedAttr == AT_SYCLIntelUseStallEnableClusters) + ParsedAttr == AT_SYCLIntelUseStallEnableClusters || + ParsedAttr == AT_SYCLIntelLoopFuse || + ParsedAttr == AT_SYCLIntelLoopFuseIndependent) return true; return false; diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index dcf0252bfb1f..a86c8980c887 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -3356,6 +3356,12 @@ class Sema final { WebAssemblyImportModuleAttr *mergeImportModuleAttr( Decl *D, const WebAssemblyImportModuleAttr &AL); + SYCLIntelLoopFuseAttr * + mergeSYCLIntelLoopFuseAttr(Decl *D, const SYCLIntelLoopFuseAttr &Attr, + Expr *E); + SYCLIntelLoopFuseIndependentAttr *mergeSYCLIntelLoopFuseIndependentAttr( + Decl *D, const SYCLIntelLoopFuseIndependentAttr &Attr, Expr *E); + void mergeDeclAttributes(NamedDecl *New, Decl *Old, AvailabilityMergeKind AMK = AMK_Redeclaration); void MergeTypedefNameDecl(Scope *S, TypedefNameDecl *New, diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index a17887d4b5cf..3d35e927729a 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -929,6 +929,45 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, if (getLangOpts().SYCLIsHost && D && D->hasAttr()) Fn->addFnAttr("sycl_kernel"); + if (getLangOpts().SYCL && D && (D->hasAttr())) { + auto *A = D->getAttr(); + Expr *E = A->getValue(); + + // Emit '1' if optional argument is omitted. + llvm::ConstantInt *Value = Builder.getInt32(1); + + // Emit argument if specified. + if (E) + Value = Builder.getInt32( + E->getIntegerConstantExpr(D->getASTContext())->getSExtValue()); + + llvm::Metadata *AttrMDArgs[] = { + llvm::ConstantAsMetadata::get(Value), + llvm::ConstantAsMetadata::get(Builder.getInt32(0))}; + Fn->setMetadata("loop_fuse", + llvm::MDNode::get(getLLVMContext(), AttrMDArgs)); + } + + if (getLangOpts().SYCL && D && + (D->hasAttr())) { + auto *A = D->getAttr(); + Expr *E = A->getValue(); + + // Emit '1' if optional argument is omitted. + llvm::ConstantInt *Value = Builder.getInt32(1); + + // Emit argument if specified. + if (E) + Value = Builder.getInt32( + E->getIntegerConstantExpr(D->getASTContext())->getSExtValue()); + + llvm::Metadata *AttrMDArgs[] = { + llvm::ConstantAsMetadata::get(Value), + llvm::ConstantAsMetadata::get(Builder.getInt32(1))}; + Fn->setMetadata("loop_fuse", + llvm::MDNode::get(getLLVMContext(), AttrMDArgs)); + } + if (getLangOpts().OpenCL || getLangOpts().SYCLIsDevice) { // Add metadata for a kernel function. if (const FunctionDecl *FD = dyn_cast_or_null(D)) { diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index ccac46196769..e6f250d1e923 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -2614,6 +2614,11 @@ static bool mergeDeclAttribute(Sema &S, NamedDecl *D, NewAttr = S.mergeImportModuleAttr(D, *IMA); else if (const auto *INA = dyn_cast(Attr)) NewAttr = S.mergeImportNameAttr(D, *INA); + else if (const auto *LFA = dyn_cast(Attr)) + NewAttr = S.mergeSYCLIntelLoopFuseAttr(D, *LFA, LFA->getValue()); + else if (const auto *LFIA = dyn_cast(Attr)) + NewAttr = + S.mergeSYCLIntelLoopFuseIndependentAttr(D, *LFIA, LFIA->getValue()); else if (Attr->shouldInheritEvenIfAlreadyPresent() || !DeclHasAttr(D, Attr)) NewAttr = cast(Attr->clone(S.Context)); diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index f2143bb8ad77..ab1eb589a9cf 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -318,6 +318,19 @@ static bool checkAttrMutualExclusion(Sema &S, Decl *D, const Attr &AL) { return false; } +/// Give a warning for duplicate attributes, return true if duplicate. +template +static bool checkForDuplicateAttribute(Sema &S, Decl *D, + const ParsedAttr &Attr) { + // Give a warning for duplicates but not if it's one we've implicitly added. + auto *A = D->getAttr(); + if (A && !A->isImplicit()) { + S.Diag(Attr.getLoc(), diag::warn_duplicate_attribute_exact) << A; + return true; + } + return false; +} + static bool checkDeprecatedSYCLAttributeSpelling(Sema &S, const ParsedAttr &Attr) { if (Attr.getScopeName()->isStr("intelfpga")) @@ -3116,6 +3129,58 @@ static void handleMaxGlobalWorkDimAttr(Sema &S, Decl *D, S.Context, Attr, MaxGlobalWorkDim)); } +SYCLIntelLoopFuseAttr * +Sema::mergeSYCLIntelLoopFuseAttr(Decl *D, const SYCLIntelLoopFuseAttr &Attr, + Expr *E) { + if (checkAttrMutualExclusion(*this, D, + Attr)) + return nullptr; + + if (D->hasAttr()) + return nullptr; + + return ::new (Context) SYCLIntelLoopFuseAttr(Context, Attr, E); +} + +SYCLIntelLoopFuseIndependentAttr *Sema::mergeSYCLIntelLoopFuseIndependentAttr( + Decl *D, const SYCLIntelLoopFuseIndependentAttr &Attr, Expr *E) { + if (checkAttrMutualExclusion(*this, D, Attr)) + return nullptr; + + if (D->hasAttr()) + return nullptr; + + return ::new (Context) SYCLIntelLoopFuseIndependentAttr(Context, Attr, E); +} + +// Handles loop_fuse and loop_fuse_independent. +// These attributes are incompatible with eachother. +template +static void handleLoopFusionAttr(Sema &S, Decl *D, const ParsedAttr &Attr) { + if (D->isInvalidDecl()) + return; + + if (checkForDuplicateAttribute(S, D, Attr)) + return; + + if (checkAttrMutualExclusion(S, D, Attr)) + return; + + unsigned NumArgs = Attr.getNumArgs(); + if (NumArgs > 1) { + S.Diag(Attr.getLoc(), diag::warn_attribute_too_many_arguments) << Attr << 0; + return; + } + + // Handle optional attribute argument. + if (Attr.isArgExpr(0)) + // Attribute argument specified. + S.AddOneConstantValueAttr(D, Attr, Attr.getArgAsExpr(0)); + else + // Attribute argument not specified. + D->addAttr(::new (S.Context) AttrType(S.Context, Attr)); +} + static void handleVecTypeHint(Sema &S, Decl *D, const ParsedAttr &AL) { if (!AL.hasParsedType()) { S.Diag(AL.getLoc(), diag::err_attribute_wrong_number_arguments) << AL << 1; @@ -5281,19 +5346,6 @@ static void handleTypeTagForDatatypeAttr(Sema &S, Decl *D, AL.getMustBeNull())); } -/// Give a warning for duplicate attributes, return true if duplicate. -template -static bool checkForDuplicateAttribute(Sema &S, Decl *D, - const ParsedAttr &Attr) { - // Give a warning for duplicates but not if it's one we've implicitly added. - auto *A = D->getAttr(); - if (A && !A->isImplicit()) { - S.Diag(Attr.getLoc(), diag::warn_duplicate_attribute_exact) << A; - return true; - } - return false; -} - static void handleNoGlobalWorkOffsetAttr(Sema &S, Decl *D, const ParsedAttr &Attr) { if (S.LangOpts.SYCLIsHost) @@ -8419,6 +8471,14 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, case ParsedAttr::AT_SYCLIntelUseStallEnableClusters: handleUseStallEnableClustersAttr(S, D, AL); break; + case ParsedAttr::AT_SYCLIntelLoopFuse: + handleLoopFusionAttr(S, D, AL); + break; + case ParsedAttr::AT_SYCLIntelLoopFuseIndependent: + handleLoopFusionAttr(S, D, AL); + break; case ParsedAttr::AT_VecTypeHint: handleVecTypeHint(S, D, AL); break; diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 5ab884860dcd..be9babed7590 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -566,6 +566,23 @@ class MarkDeviceFunction : public RecursiveASTVisitor { (KernelBody != FD) && !FD->hasAttr()) FD->addAttr(SYCLSimdAttr::CreateImplicit(SemaRef.getASTContext())); + // Attribute "loop_fuse" can be applied explicitly on kernel function. + // Attribute should not be propagated from device functions to kernel + if (auto *A = FD->getAttr()) { + if (ParentFD == SYCLKernel) { + Attrs.insert(A); + } + } + + // Attribute "loop_fuse_independent" can be applied explicitly on kernel + // function. Attribute should not be propagated from device functions to + // kernel + if (auto *A = FD->getAttr()) { + if (ParentFD == SYCLKernel) { + Attrs.insert(A); + } + } + // TODO: vec_len_hint should be handled here CallGraphNode *N = SYCLCG.getNode(FD); @@ -3282,6 +3299,8 @@ void Sema::MarkDevice(void) { case attr::Kind::SYCLIntelMaxGlobalWorkDim: case attr::Kind::SYCLIntelNoGlobalWorkOffset: case attr::Kind::SYCLIntelUseStallEnableClusters: + case attr::Kind::SYCLIntelLoopFuse: + case attr::Kind::SYCLIntelLoopFuseIndependent: case attr::Kind::SYCLSimd: { if ((A->getKind() == attr::Kind::SYCLSimd) && KernelBody && !KernelBody->getAttr()) { diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index 7ca9fb2588bd..608f8fb7b553 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -769,6 +769,18 @@ void Sema::InstantiateAttrs(const MultiLevelTemplateArgumentList &TemplateArgs, *this, TemplateArgs, SYCLIntelSchedulerTargetFmaxMhz, New); continue; } + if (const auto *SYCLIntelLoopFuse = + dyn_cast(TmplAttr)) { + instantiateIntelSYCLFunctionAttr( + *this, TemplateArgs, SYCLIntelLoopFuse, New); + continue; + } + if (const auto *SYCLIntelLoopFuseIndependent = + dyn_cast(TmplAttr)) { + instantiateIntelSYCLFunctionAttr( + *this, TemplateArgs, SYCLIntelLoopFuseIndependent, New); + continue; + } // Existing DLL attribute on the instantiation takes precedence. if (TmplAttr->getKind() == attr::DLLExport || TmplAttr->getKind() == attr::DLLImport) { @@ -6193,7 +6205,10 @@ static void processSYCLKernel(Sema &S, FunctionDecl *FD, MangleContext &MC) { if (S.LangOpts.SYCLIsDevice) { S.ConstructOpenCLKernel(FD, MC); } else if (S.LangOpts.SYCLIsHost) { - CXXRecordDecl *CRD = (*FD->param_begin())->getType()->getAsCXXRecordDecl(); + QualType KernelParamTy = (*FD->param_begin())->getType(); + const CXXRecordDecl *CRD = (KernelParamTy->isReferenceType() + ? KernelParamTy->getPointeeCXXRecordDecl() + : KernelParamTy->getAsCXXRecordDecl()); for (auto *Method : CRD->methods()) if (Method->getOverloadedOperator() == OO_Call && !Method->hasAttr()) diff --git a/clang/test/CodeGenSYCL/loop_fuse_device.cpp b/clang/test/CodeGenSYCL/loop_fuse_device.cpp new file mode 100644 index 000000000000..f9029ee861aa --- /dev/null +++ b/clang/test/CodeGenSYCL/loop_fuse_device.cpp @@ -0,0 +1,52 @@ +// RUN: %clang_cc1 -fsycl -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown-sycldevice -emit-llvm -o - %s | FileCheck %s + +#include "sycl.hpp" + +using namespace cl::sycl; +queue q; + +[[intel::loop_fuse(5)]] void foo() {} + +template +class KernelFunctor5 { +public: + [[intel::loop_fuse(SIZE)]] void operator()() const {} +}; + +void bar() { + + q.submit([&](handler &h) { + // Test template argument + KernelFunctor5<5> f5; + h.single_task(f5); + + // Test different argument sizes. + // Emit 1 if there is no argument. + h.single_task( + []() [[intel::loop_fuse]]{}); + h.single_task( + []() [[intel::loop_fuse(0)]]{}); + h.single_task( + []() [[intel::loop_fuse(1)]]{}); + h.single_task( + []() [[intel::loop_fuse(10)]]{}); + + // Test attribute is not propagated. + h.single_task( + []() { foo(); }); + }); +} + +// CHECK: define spir_kernel void @"{{.*}}kernel_name_1"() {{.*}} !loop_fuse ![[LF5:[0-9]+]] +// CHECK: define spir_kernel void @"{{.*}}kernel_name_2"() {{.*}} !loop_fuse ![[LF1:[0-9]+]] +// CHECK: define spir_kernel void @"{{.*}}kernel_name_3"() {{.*}} !loop_fuse ![[LF0:[0-9]+]] +// CHECK: define spir_kernel void @"{{.*}}kernel_name_4"() {{.*}} !loop_fuse ![[LF1]] +// CHECK: define spir_kernel void @"{{.*}}kernel_name_5"() {{.*}} !loop_fuse ![[LF10:[0-9]+]] +// CHECK: define spir_kernel void @"{{.*}}kernel_name_6"() +// CHECK-NOT: !loop_fuse +// CHECK-SAME: { +// CHECK: define spir_func void @{{.*}}foo{{.*}} !loop_fuse ![[LF5]] +// CHECK: ![[LF5]] = !{i32 5, i32 0} +// CHECK: ![[LF1]] = !{i32 1, i32 0} +// CHECK: ![[LF0]] = !{i32 0, i32 0} +// CHECK: ![[LF10]] = !{i32 10, i32 0} diff --git a/clang/test/CodeGenSYCL/loop_fuse_host.cpp b/clang/test/CodeGenSYCL/loop_fuse_host.cpp new file mode 100644 index 000000000000..8d9b110bd9e6 --- /dev/null +++ b/clang/test/CodeGenSYCL/loop_fuse_host.cpp @@ -0,0 +1,44 @@ +// RUN: %clang_cc1 -fsycl -fsycl-is-host -triple -x86_64-unknown-linux-gnu -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s + +template +__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { + kernelFunc(); +} + +template +class KernelFunctor5 { +public: + [[intel::loop_fuse(SIZE)]] void operator()() const {} +}; + +[[intel::loop_fuse]] void func1() {} +[[intel::loop_fuse(0)]] void func2() {} +[[intel::loop_fuse(1)]] void func3() {} +[[intel::loop_fuse(10)]] void func4() {} + +void foo() { + + KernelFunctor5<5> f5; + kernel(f5); + + kernel( + []() [[intel::loop_fuse(10)]]{}); + + kernel( + []() { func4(); }); +} + +// CHECK: define void @{{.*}}func1{{.*}} !loop_fuse ![[LF1:[0-9]+]] +// CHECK: define void @{{.*}}func2{{.*}} !loop_fuse ![[LF0:[0-9]+]] +// CHECK: define void @{{.*}}func3{{.*}} !loop_fuse ![[LF1]] +// CHECK: define void @{{.*}}func4{{.*}} !loop_fuse ![[LF10:[0-9]+]] +// CHECK: define linkonce_odr void @{{.*}}KernelFunctor5{{.*}} !loop_fuse ![[LF5:[0-9]+]] +// CHECK: define internal void @"{{.*}}foo{{.*}}"(%class.anon* %this){{.*}}!loop_fuse ![[LF10]] +// CHECK: define internal void @"{{.*}}foo{{.*}}"(%class.anon.0* %this) +// CHECK-NOT: !loop_fuse +// CHECK-SAME: { + +// CHECK: ![[LF1]] = !{i32 1, i32 0} +// CHECK: ![[LF0]] = !{i32 0, i32 0} +// CHECK: ![[LF10]] = !{i32 10, i32 0} +// CHECK: ![[LF5]] = !{i32 5, i32 0} diff --git a/clang/test/CodeGenSYCL/loop_fuse_ind_device.cpp b/clang/test/CodeGenSYCL/loop_fuse_ind_device.cpp new file mode 100644 index 000000000000..57b75d4f9c98 --- /dev/null +++ b/clang/test/CodeGenSYCL/loop_fuse_ind_device.cpp @@ -0,0 +1,52 @@ +// RUN: %clang_cc1 -fsycl -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown-sycldevice -emit-llvm -o - %s | FileCheck %s + +#include "sycl.hpp" + +using namespace cl::sycl; +queue q; + +[[intel::loop_fuse_independent(5)]] void foo() {} + +template +class KernelFunctor5 { +public: + [[intel::loop_fuse_independent(SIZE)]] void operator()() const {} +}; + +void bar() { + + q.submit([&](handler &h) { + // Test template argument + KernelFunctor5<5> f5; + h.single_task(f5); + + // Test different argument sizes. + // Emit 1 if there is no argument. + h.single_task( + []() [[intel::loop_fuse_independent]]{}); + h.single_task( + []() [[intel::loop_fuse_independent(0)]]{}); + h.single_task( + []() [[intel::loop_fuse_independent(1)]]{}); + h.single_task( + []() [[intel::loop_fuse_independent(10)]]{}); + + // Test attribute is not propagated. + h.single_task( + []() { foo(); }); + }); +} + +// CHECK: define spir_kernel void @"{{.*}}kernel_name_1"() {{.*}} !loop_fuse ![[LFI5:[0-9]+]] +// CHECK: define spir_kernel void @"{{.*}}kernel_name_2"() {{.*}} !loop_fuse ![[LFI1:[0-9]+]] +// CHECK: define spir_kernel void @"{{.*}}kernel_name_3"() {{.*}} !loop_fuse ![[LFI0:[0-9]+]] +// CHECK: define spir_kernel void @"{{.*}}kernel_name_4"() {{.*}} !loop_fuse ![[LFI1]] +// CHECK: define spir_kernel void @"{{.*}}kernel_name_5"() {{.*}} !loop_fuse ![[LFI10:[0-9]+]] +// CHECK: define spir_kernel void @"{{.*}}kernel_name_6"() +// CHECK-NOT: !loop_fuse +// CHECK-SAME: { +// CHECK: define spir_func void @{{.*}}foo{{.*}} !loop_fuse ![[LFI5]] +// CHECK: ![[LFI5]] = !{i32 5, i32 1} +// CHECK: ![[LFI1]] = !{i32 1, i32 1} +// CHECK: ![[LFI0]] = !{i32 0, i32 1} +// CHECK: ![[LFI10]] = !{i32 10, i32 1} diff --git a/clang/test/CodeGenSYCL/loop_fuse_ind_host.cpp b/clang/test/CodeGenSYCL/loop_fuse_ind_host.cpp new file mode 100644 index 000000000000..59f8b237aee0 --- /dev/null +++ b/clang/test/CodeGenSYCL/loop_fuse_ind_host.cpp @@ -0,0 +1,44 @@ +// RUN: %clang_cc1 -fsycl -fsycl-is-host -triple -x86_64-unknown-linux-gnu -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s + +template +__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { + kernelFunc(); +} + +template +class KernelFunctor5 { +public: + [[intel::loop_fuse_independent(SIZE)]] void operator()() const {} +}; + +[[intel::loop_fuse_independent]] void func1() {} +[[intel::loop_fuse_independent(0)]] void func2() {} +[[intel::loop_fuse_independent(1)]] void func3() {} +[[intel::loop_fuse_independent(10)]] void func4() {} + +void foo() { + + KernelFunctor5<5> f5; + kernel(f5); + + kernel( + []() [[intel::loop_fuse_independent(10)]]{}); + + kernel( + []() { func4(); }); +} + +// CHECK: define void @{{.*}}func1{{.*}} !loop_fuse ![[LFI1:[0-9]+]] +// CHECK: define void @{{.*}}func2{{.*}} !loop_fuse ![[LFI0:[0-9]+]] +// CHECK: define void @{{.*}}func3{{.*}} !loop_fuse ![[LFI1]] +// CHECK: define void @{{.*}}func4{{.*}} !loop_fuse ![[LFI10:[0-9]+]] +// CHECK: define linkonce_odr void @{{.*}}KernelFunctor5{{.*}} !loop_fuse ![[LFI5:[0-9]+]] +// CHECK: define internal void @"{{.*}}foo{{.*}}"(%class.anon* %this){{.*}}!loop_fuse ![[LFI10]] +// CHECK: define internal void @"{{.*}}foo{{.*}}"(%class.anon.0* %this) +// CHECK-NOT: !loop_fuse +// CHECK-SAME: { + +// CHECK: ![[LFI1]] = !{i32 1, i32 1} +// CHECK: ![[LFI0]] = !{i32 0, i32 1} +// CHECK: ![[LFI10]] = !{i32 10, i32 1} +// CHECK: ![[LFI5]] = !{i32 5, i32 1} diff --git a/clang/test/SemaSYCL/loop_fusion.cpp b/clang/test/SemaSYCL/loop_fusion.cpp new file mode 100644 index 000000000000..873cef88b961 --- /dev/null +++ b/clang/test/SemaSYCL/loop_fusion.cpp @@ -0,0 +1,102 @@ +// RUN: %clang_cc1 -fsycl -fsycl-is-device -internal-isystem %S/Inputs -Wno-sycl-2017-compat -ast-dump -verify %s | FileCheck %s + +#include "sycl.hpp" + +using namespace cl::sycl; +queue q; + +// CHECK: FunctionDecl {{.*}} func1 'void ()' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: SYCLIntelLoopFuseAttr +// CHECK-NEXT: NULL +[[intel::loop_fuse]] void func1() {} + +// CHECK: FunctionDecl {{.*}} func2 'void ()' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: SYCLIntelLoopFuseAttr +// CHECK-NEXT: ConstantExpr +// CHECK-NEXT: value: Int 0 +[[intel::loop_fuse(0)]] void func2() {} + +// CHECK: FunctionDecl {{.*}} func3 'void ()' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: SYCLIntelLoopFuseIndependentAttr +// CHECK-NEXT: NULL +[[intel::loop_fuse_independent]] void func3() {} + +// CHECK: FunctionDecl {{.*}} func4 'void ()' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: SYCLIntelLoopFuseIndependentAttr +// CHECK-NEXT: ConstantExpr +// CHECK-NEXT: value: Int 3 +[[intel::loop_fuse_independent(3)]] void func4() {} + +class KernelFunctor { +public: + void operator()() const { + func1(); + func3(); + } +}; + +template +class KernelFunctor2 { +public: + [[intel::loop_fuse(N)]] void operator()() const { + } +}; + +void foo() { + q.submit([&](handler &h) { + // CHECK: FunctionDecl {{.*}}kernel_name_1 'void ()' + // CHECK-NOT: SYCLIntelLoopFuseAttr + // CHECK-NOT: SYCLIntelLoopFuseIndependentAttr + KernelFunctor f1; + h.single_task(f1); + + // CHECK: FunctionDecl {{.*}}kernel_name_2 'void ()' + // CHECK: SYCLIntelLoopFuseAttr + // CHECK-NEXT: SubstNonTypeTemplateParmExpr + // CHECK-NEXT: NonTypeTemplateParmDecl + // CHECK-NEXT: IntegerLiteral {{.*}} 'int' 3 + KernelFunctor2<3> f2; + h.single_task(f2); + + // CHECK: FunctionDecl {{.*}}kernel_name_3 'void ()' + // CHECK: SYCLIntelLoopFuseIndependentAttr + h.single_task( + []() [[intel::loop_fuse_independent]]{}); + }); + + [[intel::loop_fuse]] int testVar = 0; // expected-error{{'loop_fuse' attribute only applies to functions}} +} + +[[intel::loop_fuse(1048577)]] void func5() {} // expected-error{{'loop_fuse' attribute requires integer constant between 0 and 1048576 inclusive}} +[[intel::loop_fuse_independent(-1)]] void func6() {} // expected-error{{'loop_fuse_independent' attribute requires integer constant between 0 and 1048576 inclusive}} + +[[intel::loop_fuse]] [[intel::loop_fuse(10)]] void func7() {} // expected-warning {{attribute 'loop_fuse' is already applied}} +[[intel::loop_fuse_independent]] [[intel::loop_fuse_independent]] void func8() {} // // expected-warning {{attribute 'loop_fuse_independent' is already applied}} + +// expected-error@+2 {{'loop_fuse_independent' and 'loop_fuse' attributes are not compatible}} +// expected-note@+1 {{conflicting attribute is here}} +[[intel::loop_fuse]] [[intel::loop_fuse_independent]] void func9(); + +// expected-error@+2 {{'loop_fuse' and 'loop_fuse_independent' attributes are not compatible}} +// expected-note@+1 {{conflicting attribute is here}} +[[intel::loop_fuse_independent]] [[intel::loop_fuse]] void func10(); + +// expected-error@+2 {{'loop_fuse' and 'loop_fuse_independent' attributes are not compatible}} +// expected-note@+2 {{conflicting attribute is here}} +[[intel::loop_fuse]] void func11(); +[[intel::loop_fuse_independent]] void func11() {} + +// expected-error@+2 {{'loop_fuse_independent' and 'loop_fuse' attributes are not compatible}} +// expected-note@+2 {{conflicting attribute is here}} +[[intel::loop_fuse_independent]] void func12(); +[[intel::loop_fuse]] void func12() {} + +[[intel::loop_fuse]] void func13(); +[[intel::loop_fuse]] void func13() {} + +[[intel::loop_fuse_independent]] void func14(); +[[intel::loop_fuse_independent]] void func14() {}