diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 51820842e7e62..4d93d7252aa74 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -1313,6 +1313,40 @@ def SYCLIntelNoGlobalWorkOffset : InheritableAttr { let PragmaAttributeSupport = 0; } +def SYCLIntelLoopFuse : InheritableAttr { + let Spellings = [CXX11<"intel","loop_fuse">]; + let Args = [ExprArgument<"Value", /*optional=*/ 1>]; + let LangOpts = [SYCLIsDevice, SYCLIsHost]; + let Subjects = SubjectList<[Function], ErrorDiag>; + let Documentation = [SYCLIntelLoopFuseDocs]; + let PragmaAttributeSupport = 0; + let AdditionalMembers = [{ + static unsigned getMinValue() { + return 0; + } + static unsigned getMaxValue() { + return 1024*1024; + } + }]; +} + +def SYCLIntelLoopFuseIndependent : InheritableAttr { + let Spellings = [CXX11<"intel","loop_fuse_independent">]; + let Args = [ExprArgument<"Value", /*optional=*/ 1>]; + let LangOpts = [SYCLIsDevice, SYCLIsHost]; + let Subjects = SubjectList<[Function], ErrorDiag>; + let Documentation = [SYCLIntelLoopFuseIndependentDocs]; + let PragmaAttributeSupport = 0; + let AdditionalMembers = [{ + static unsigned getMinValue() { + return 0; + } + static unsigned getMaxValue() { + return 1024*1024; + } + }]; +} + def C11NoReturn : InheritableAttr { let Spellings = [Keyword<"_Noreturn">]; let Subjects = SubjectList<[Function], ErrorDiag>; diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 7a180ee646b46..20dba47ad85fc 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -2608,6 +2608,57 @@ loop should not be fused with any adjacent loop. }]; } +def SYCLIntelLoopFuseDocs : Documentation { + let Category = DocCatFunction; + let Heading = "intel::loop_fuse"; + let Content = [{ +``[[intel::loop_fuse(N)]]`` attribute applies to a function/lambda function. It +is a strong request, to the extent possible, to fuse the loops within the +function, that are contained in at most N-1 other loops within the function. If +the optional parameter N is omitted, it is a strong request, to the extent possible, +to fuse loops within the function that are not contained in any other loop within +the function. This attribute should be passed through to the FPGA backend and +ignored by the emulator. ``[[intel::loop_fuse(N)]]`` should not be propagated to +the caller. + +.. code-block:: c++ + + [[intel::loop_fuse(N)]] + int foo() {} + +``[[intel::loop_fuse(N)]]`` takes one optional parameter, a constant integral +expression N with value greater than or equal to 0. The parameter N may be a +template parameter. + + }]; +} + +def SYCLIntelLoopFuseIndependentDocs : Documentation { + let Category = DocCatFunction; + let Heading = "intel::loop_fuse_independent"; + let Content = [{ +``[[intel::loop_fuse_independent(N)]]`` attribute applies to a function/lambda function. +It is a strong request, to the extent possible, to fuse the loops within the +function, that are contained in at most N-1 other loops within the function. It also +guarantees that fusion safety analysis can ignore negative-distance dependences between +these loops. If the optional parameter N is omitted, it is a strong request, to the extent +possible, to fuse loops within the function that are not contained in any other loop within +the function. This attribute should be passed through to the FPGA backend and +ignored by the emulator. ``[[intel::loop_fuse_independent(N)]]`` should not be propagated +to the caller. + +.. code-block:: c++ + + [[intel::loop_fuse_independent(N)]] + int foo() {} + +``[[intel::loop_fuse_independent(N)]]`` takes one optional parameter, a constant integral +expression N with value greater than or equal to 0. The parameter N may be a template +parameter. + + }]; +} + def SYCLDeviceIndirectlyCallableDocs : Documentation { let Category = DocCatFunction; let Heading = "intel::device_indirectly_callable"; diff --git a/clang/include/clang/Basic/AttributeCommonInfo.h b/clang/include/clang/Basic/AttributeCommonInfo.h index e58613d61af18..3741196136280 100644 --- a/clang/include/clang/Basic/AttributeCommonInfo.h +++ b/clang/include/clang/Basic/AttributeCommonInfo.h @@ -166,7 +166,9 @@ class AttributeCommonInfo { ParsedAttr == AT_SYCLIntelMaxWorkGroupSize || ParsedAttr == AT_SYCLIntelMaxGlobalWorkDim || ParsedAttr == AT_SYCLIntelNoGlobalWorkOffset || - ParsedAttr == AT_SYCLIntelUseStallEnableClusters) + ParsedAttr == AT_SYCLIntelUseStallEnableClusters || + ParsedAttr == AT_SYCLIntelLoopFuse || + ParsedAttr == AT_SYCLIntelLoopFuseIndependent) return true; return false; diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 8555b685ab82c..df335bf488a65 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -3356,6 +3356,12 @@ class Sema final { WebAssemblyImportModuleAttr *mergeImportModuleAttr( Decl *D, const WebAssemblyImportModuleAttr &AL); + SYCLIntelLoopFuseAttr * + mergeSYCLIntelLoopFuseAttr(Decl *D, const SYCLIntelLoopFuseAttr &Attr, + Expr *E); + SYCLIntelLoopFuseIndependentAttr *mergeSYCLIntelLoopFuseIndependentAttr( + Decl *D, const SYCLIntelLoopFuseIndependentAttr &Attr, Expr *E); + void mergeDeclAttributes(NamedDecl *New, Decl *Old, AvailabilityMergeKind AMK = AMK_Redeclaration); void MergeTypedefNameDecl(Scope *S, TypedefNameDecl *New, diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index f94a6a6973b6a..8bab8cf60d50b 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -950,6 +950,45 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, if (getLangOpts().SYCLIsHost && D && D->hasAttr()) Fn->addFnAttr("sycl_kernel"); + if (getLangOpts().SYCL && D && (D->hasAttr())) { + auto *A = D->getAttr(); + Expr *E = A->getValue(); + + // Emit '1' if optional argument is omitted. + llvm::ConstantInt *Value = Builder.getInt32(1); + + // Emit argument if specified. + if (E) + Value = Builder.getInt32( + E->getIntegerConstantExpr(D->getASTContext())->getSExtValue()); + + llvm::Metadata *AttrMDArgs[] = { + llvm::ConstantAsMetadata::get(Value), + llvm::ConstantAsMetadata::get(Builder.getInt32(0))}; + Fn->setMetadata("loop_fuse", + llvm::MDNode::get(getLLVMContext(), AttrMDArgs)); + } + + if (getLangOpts().SYCL && D && + (D->hasAttr())) { + auto *A = D->getAttr(); + Expr *E = A->getValue(); + + // Emit '1' if optional argument is omitted. + llvm::ConstantInt *Value = Builder.getInt32(1); + + // Emit argument if specified. + if (E) + Value = Builder.getInt32( + E->getIntegerConstantExpr(D->getASTContext())->getSExtValue()); + + llvm::Metadata *AttrMDArgs[] = { + llvm::ConstantAsMetadata::get(Value), + llvm::ConstantAsMetadata::get(Builder.getInt32(1))}; + Fn->setMetadata("loop_fuse", + llvm::MDNode::get(getLLVMContext(), AttrMDArgs)); + } + if (getLangOpts().OpenCL || getLangOpts().SYCLIsDevice) { // Add metadata for a kernel function. if (const FunctionDecl *FD = dyn_cast_or_null(D)) { diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 82468a6cc4303..84dfd9e71c599 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -2614,6 +2614,11 @@ static bool mergeDeclAttribute(Sema &S, NamedDecl *D, NewAttr = S.mergeImportModuleAttr(D, *IMA); else if (const auto *INA = dyn_cast(Attr)) NewAttr = S.mergeImportNameAttr(D, *INA); + else if (const auto *LFA = dyn_cast(Attr)) + NewAttr = S.mergeSYCLIntelLoopFuseAttr(D, *LFA, LFA->getValue()); + else if (const auto *LFIA = dyn_cast(Attr)) + NewAttr = + S.mergeSYCLIntelLoopFuseIndependentAttr(D, *LFIA, LFIA->getValue()); else if (Attr->shouldInheritEvenIfAlreadyPresent() || !DeclHasAttr(D, Attr)) NewAttr = cast(Attr->clone(S.Context)); diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index a40c7458cbb2e..0ea35864b35a0 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -318,6 +318,19 @@ static bool checkAttrMutualExclusion(Sema &S, Decl *D, const Attr &AL) { return false; } +/// Give a warning for duplicate attributes, return true if duplicate. +template +static bool checkForDuplicateAttribute(Sema &S, Decl *D, + const ParsedAttr &Attr) { + // Give a warning for duplicates but not if it's one we've implicitly added. + auto *A = D->getAttr(); + if (A && !A->isImplicit()) { + S.Diag(Attr.getLoc(), diag::warn_duplicate_attribute_exact) << A; + return true; + } + return false; +} + static bool checkDeprecatedSYCLAttributeSpelling(Sema &S, const ParsedAttr &Attr) { if (Attr.getScopeName()->isStr("intelfpga")) @@ -3091,6 +3104,58 @@ static void handleMaxGlobalWorkDimAttr(Sema &S, Decl *D, E); } +SYCLIntelLoopFuseAttr * +Sema::mergeSYCLIntelLoopFuseAttr(Decl *D, const SYCLIntelLoopFuseAttr &Attr, + Expr *E) { + if (checkAttrMutualExclusion(*this, D, + Attr)) + return nullptr; + + if (D->hasAttr()) + return nullptr; + + return ::new (Context) SYCLIntelLoopFuseAttr(Context, Attr, E); +} + +SYCLIntelLoopFuseIndependentAttr *Sema::mergeSYCLIntelLoopFuseIndependentAttr( + Decl *D, const SYCLIntelLoopFuseIndependentAttr &Attr, Expr *E) { + if (checkAttrMutualExclusion(*this, D, Attr)) + return nullptr; + + if (D->hasAttr()) + return nullptr; + + return ::new (Context) SYCLIntelLoopFuseIndependentAttr(Context, Attr, E); +} + +// Handles loop_fuse and loop_fuse_independent. +// These attributes are incompatible with eachother. +template +static void handleLoopFusionAttr(Sema &S, Decl *D, const ParsedAttr &Attr) { + if (D->isInvalidDecl()) + return; + + if (checkForDuplicateAttribute(S, D, Attr)) + return; + + if (checkAttrMutualExclusion(S, D, Attr)) + return; + + unsigned NumArgs = Attr.getNumArgs(); + if (NumArgs > 1) { + S.Diag(Attr.getLoc(), diag::warn_attribute_too_many_arguments) << Attr << 0; + return; + } + + // Handle optional attribute argument. + if (Attr.isArgExpr(0)) + // Attribute argument specified. + S.AddOneConstantValueAttr(D, Attr, Attr.getArgAsExpr(0)); + else + // Attribute argument not specified. + D->addAttr(::new (S.Context) AttrType(S.Context, Attr)); +} + static void handleVecTypeHint(Sema &S, Decl *D, const ParsedAttr &AL) { if (!AL.hasParsedType()) { S.Diag(AL.getLoc(), diag::err_attribute_wrong_number_arguments) << AL << 1; @@ -5256,19 +5321,6 @@ static void handleTypeTagForDatatypeAttr(Sema &S, Decl *D, AL.getMustBeNull())); } -/// Give a warning for duplicate attributes, return true if duplicate. -template -static bool checkForDuplicateAttribute(Sema &S, Decl *D, - const ParsedAttr &Attr) { - // Give a warning for duplicates but not if it's one we've implicitly added. - auto *A = D->getAttr(); - if (A && !A->isImplicit()) { - S.Diag(Attr.getLoc(), diag::warn_duplicate_attribute_exact) << A; - return true; - } - return false; -} - static void handleNoGlobalWorkOffsetAttr(Sema &S, Decl *D, const ParsedAttr &Attr) { if (S.LangOpts.SYCLIsHost) @@ -8388,6 +8440,14 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, case ParsedAttr::AT_SYCLIntelUseStallEnableClusters: handleUseStallEnableClustersAttr(S, D, AL); break; + case ParsedAttr::AT_SYCLIntelLoopFuse: + handleLoopFusionAttr(S, D, AL); + break; + case ParsedAttr::AT_SYCLIntelLoopFuseIndependent: + handleLoopFusionAttr(S, D, AL); + break; case ParsedAttr::AT_VecTypeHint: handleVecTypeHint(S, D, AL); break; diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index c15e58e9d28b1..2d04c8a9fb597 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -566,6 +566,23 @@ class MarkDeviceFunction : public RecursiveASTVisitor { (KernelBody != FD) && !FD->hasAttr()) FD->addAttr(SYCLSimdAttr::CreateImplicit(SemaRef.getASTContext())); + // Attribute "loop_fuse" can be applied explicitly on kernel function. + // Attribute should not be propagated from device functions to kernel + if (auto *A = FD->getAttr()) { + if (ParentFD == SYCLKernel) { + Attrs.insert(A); + } + } + + // Attribute "loop_fuse_independent" can be applied explicitly on kernel + // function. Attribute should not be propagated from device functions to + // kernel + if (auto *A = FD->getAttr()) { + if (ParentFD == SYCLKernel) { + Attrs.insert(A); + } + } + // TODO: vec_len_hint should be handled here CallGraphNode *N = SYCLCG.getNode(FD); @@ -3283,6 +3300,8 @@ void Sema::MarkDevice(void) { case attr::Kind::SYCLIntelMaxGlobalWorkDim: case attr::Kind::SYCLIntelNoGlobalWorkOffset: case attr::Kind::SYCLIntelUseStallEnableClusters: + case attr::Kind::SYCLIntelLoopFuse: + case attr::Kind::SYCLIntelLoopFuseIndependent: case attr::Kind::SYCLSimd: { if ((A->getKind() == attr::Kind::SYCLSimd) && KernelBody && !KernelBody->getAttr()) { diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index 5130bad1f7b5c..5ef117cd6716d 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -769,6 +769,16 @@ void Sema::InstantiateAttrs(const MultiLevelTemplateArgumentList &TemplateArgs, *this, TemplateArgs, SYCLIntelSchedulerTargetFmaxMhz, New); continue; } + if (const auto *SYCLIntelLoopFuse = + dyn_cast(TmplAttr)) { + instantiateIntelSYCLFunctionAttr( + *this, TemplateArgs, SYCLIntelLoopFuse, New); + continue; + } + if (const auto *SYCLIntelLoopFuseIndependent = + dyn_cast(TmplAttr)) { + instantiateIntelSYCLFunctionAttr( + *this, TemplateArgs, SYCLIntelLoopFuseIndependent, New); if (const auto *SYCLIntelMaxGlobalWorkDim = dyn_cast(TmplAttr)) { instantiateIntelSYCLFunctionAttr( @@ -6198,7 +6208,10 @@ static void processSYCLKernel(Sema &S, FunctionDecl *FD, MangleContext &MC) { if (S.LangOpts.SYCLIsDevice) { S.ConstructOpenCLKernel(FD, MC); } else if (S.LangOpts.SYCLIsHost) { - CXXRecordDecl *CRD = (*FD->param_begin())->getType()->getAsCXXRecordDecl(); + QualType KernelParamTy = (*FD->param_begin())->getType(); + const CXXRecordDecl *CRD = (KernelParamTy->isReferenceType() + ? KernelParamTy->getPointeeCXXRecordDecl() + : KernelParamTy->getAsCXXRecordDecl()); for (auto *Method : CRD->methods()) if (Method->getOverloadedOperator() == OO_Call && !Method->hasAttr()) diff --git a/clang/test/CodeGenSYCL/loop_fuse_device.cpp b/clang/test/CodeGenSYCL/loop_fuse_device.cpp new file mode 100644 index 0000000000000..f9029ee861aa0 --- /dev/null +++ b/clang/test/CodeGenSYCL/loop_fuse_device.cpp @@ -0,0 +1,52 @@ +// RUN: %clang_cc1 -fsycl -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown-sycldevice -emit-llvm -o - %s | FileCheck %s + +#include "sycl.hpp" + +using namespace cl::sycl; +queue q; + +[[intel::loop_fuse(5)]] void foo() {} + +template +class KernelFunctor5 { +public: + [[intel::loop_fuse(SIZE)]] void operator()() const {} +}; + +void bar() { + + q.submit([&](handler &h) { + // Test template argument + KernelFunctor5<5> f5; + h.single_task(f5); + + // Test different argument sizes. + // Emit 1 if there is no argument. + h.single_task( + []() [[intel::loop_fuse]]{}); + h.single_task( + []() [[intel::loop_fuse(0)]]{}); + h.single_task( + []() [[intel::loop_fuse(1)]]{}); + h.single_task( + []() [[intel::loop_fuse(10)]]{}); + + // Test attribute is not propagated. + h.single_task( + []() { foo(); }); + }); +} + +// CHECK: define spir_kernel void @"{{.*}}kernel_name_1"() {{.*}} !loop_fuse ![[LF5:[0-9]+]] +// CHECK: define spir_kernel void @"{{.*}}kernel_name_2"() {{.*}} !loop_fuse ![[LF1:[0-9]+]] +// CHECK: define spir_kernel void @"{{.*}}kernel_name_3"() {{.*}} !loop_fuse ![[LF0:[0-9]+]] +// CHECK: define spir_kernel void @"{{.*}}kernel_name_4"() {{.*}} !loop_fuse ![[LF1]] +// CHECK: define spir_kernel void @"{{.*}}kernel_name_5"() {{.*}} !loop_fuse ![[LF10:[0-9]+]] +// CHECK: define spir_kernel void @"{{.*}}kernel_name_6"() +// CHECK-NOT: !loop_fuse +// CHECK-SAME: { +// CHECK: define spir_func void @{{.*}}foo{{.*}} !loop_fuse ![[LF5]] +// CHECK: ![[LF5]] = !{i32 5, i32 0} +// CHECK: ![[LF1]] = !{i32 1, i32 0} +// CHECK: ![[LF0]] = !{i32 0, i32 0} +// CHECK: ![[LF10]] = !{i32 10, i32 0} diff --git a/clang/test/CodeGenSYCL/loop_fuse_host.cpp b/clang/test/CodeGenSYCL/loop_fuse_host.cpp new file mode 100644 index 0000000000000..8d9b110bd9e60 --- /dev/null +++ b/clang/test/CodeGenSYCL/loop_fuse_host.cpp @@ -0,0 +1,44 @@ +// RUN: %clang_cc1 -fsycl -fsycl-is-host -triple -x86_64-unknown-linux-gnu -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s + +template +__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { + kernelFunc(); +} + +template +class KernelFunctor5 { +public: + [[intel::loop_fuse(SIZE)]] void operator()() const {} +}; + +[[intel::loop_fuse]] void func1() {} +[[intel::loop_fuse(0)]] void func2() {} +[[intel::loop_fuse(1)]] void func3() {} +[[intel::loop_fuse(10)]] void func4() {} + +void foo() { + + KernelFunctor5<5> f5; + kernel(f5); + + kernel( + []() [[intel::loop_fuse(10)]]{}); + + kernel( + []() { func4(); }); +} + +// CHECK: define void @{{.*}}func1{{.*}} !loop_fuse ![[LF1:[0-9]+]] +// CHECK: define void @{{.*}}func2{{.*}} !loop_fuse ![[LF0:[0-9]+]] +// CHECK: define void @{{.*}}func3{{.*}} !loop_fuse ![[LF1]] +// CHECK: define void @{{.*}}func4{{.*}} !loop_fuse ![[LF10:[0-9]+]] +// CHECK: define linkonce_odr void @{{.*}}KernelFunctor5{{.*}} !loop_fuse ![[LF5:[0-9]+]] +// CHECK: define internal void @"{{.*}}foo{{.*}}"(%class.anon* %this){{.*}}!loop_fuse ![[LF10]] +// CHECK: define internal void @"{{.*}}foo{{.*}}"(%class.anon.0* %this) +// CHECK-NOT: !loop_fuse +// CHECK-SAME: { + +// CHECK: ![[LF1]] = !{i32 1, i32 0} +// CHECK: ![[LF0]] = !{i32 0, i32 0} +// CHECK: ![[LF10]] = !{i32 10, i32 0} +// CHECK: ![[LF5]] = !{i32 5, i32 0} diff --git a/clang/test/CodeGenSYCL/loop_fuse_ind_device.cpp b/clang/test/CodeGenSYCL/loop_fuse_ind_device.cpp new file mode 100644 index 0000000000000..57b75d4f9c98a --- /dev/null +++ b/clang/test/CodeGenSYCL/loop_fuse_ind_device.cpp @@ -0,0 +1,52 @@ +// RUN: %clang_cc1 -fsycl -fsycl-is-device -internal-isystem %S/Inputs -triple spir64-unknown-unknown-sycldevice -emit-llvm -o - %s | FileCheck %s + +#include "sycl.hpp" + +using namespace cl::sycl; +queue q; + +[[intel::loop_fuse_independent(5)]] void foo() {} + +template +class KernelFunctor5 { +public: + [[intel::loop_fuse_independent(SIZE)]] void operator()() const {} +}; + +void bar() { + + q.submit([&](handler &h) { + // Test template argument + KernelFunctor5<5> f5; + h.single_task(f5); + + // Test different argument sizes. + // Emit 1 if there is no argument. + h.single_task( + []() [[intel::loop_fuse_independent]]{}); + h.single_task( + []() [[intel::loop_fuse_independent(0)]]{}); + h.single_task( + []() [[intel::loop_fuse_independent(1)]]{}); + h.single_task( + []() [[intel::loop_fuse_independent(10)]]{}); + + // Test attribute is not propagated. + h.single_task( + []() { foo(); }); + }); +} + +// CHECK: define spir_kernel void @"{{.*}}kernel_name_1"() {{.*}} !loop_fuse ![[LFI5:[0-9]+]] +// CHECK: define spir_kernel void @"{{.*}}kernel_name_2"() {{.*}} !loop_fuse ![[LFI1:[0-9]+]] +// CHECK: define spir_kernel void @"{{.*}}kernel_name_3"() {{.*}} !loop_fuse ![[LFI0:[0-9]+]] +// CHECK: define spir_kernel void @"{{.*}}kernel_name_4"() {{.*}} !loop_fuse ![[LFI1]] +// CHECK: define spir_kernel void @"{{.*}}kernel_name_5"() {{.*}} !loop_fuse ![[LFI10:[0-9]+]] +// CHECK: define spir_kernel void @"{{.*}}kernel_name_6"() +// CHECK-NOT: !loop_fuse +// CHECK-SAME: { +// CHECK: define spir_func void @{{.*}}foo{{.*}} !loop_fuse ![[LFI5]] +// CHECK: ![[LFI5]] = !{i32 5, i32 1} +// CHECK: ![[LFI1]] = !{i32 1, i32 1} +// CHECK: ![[LFI0]] = !{i32 0, i32 1} +// CHECK: ![[LFI10]] = !{i32 10, i32 1} diff --git a/clang/test/CodeGenSYCL/loop_fuse_ind_host.cpp b/clang/test/CodeGenSYCL/loop_fuse_ind_host.cpp new file mode 100644 index 0000000000000..59f8b237aee0c --- /dev/null +++ b/clang/test/CodeGenSYCL/loop_fuse_ind_host.cpp @@ -0,0 +1,44 @@ +// RUN: %clang_cc1 -fsycl -fsycl-is-host -triple -x86_64-unknown-linux-gnu -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s + +template +__attribute__((sycl_kernel)) void kernel(const Func &kernelFunc) { + kernelFunc(); +} + +template +class KernelFunctor5 { +public: + [[intel::loop_fuse_independent(SIZE)]] void operator()() const {} +}; + +[[intel::loop_fuse_independent]] void func1() {} +[[intel::loop_fuse_independent(0)]] void func2() {} +[[intel::loop_fuse_independent(1)]] void func3() {} +[[intel::loop_fuse_independent(10)]] void func4() {} + +void foo() { + + KernelFunctor5<5> f5; + kernel(f5); + + kernel( + []() [[intel::loop_fuse_independent(10)]]{}); + + kernel( + []() { func4(); }); +} + +// CHECK: define void @{{.*}}func1{{.*}} !loop_fuse ![[LFI1:[0-9]+]] +// CHECK: define void @{{.*}}func2{{.*}} !loop_fuse ![[LFI0:[0-9]+]] +// CHECK: define void @{{.*}}func3{{.*}} !loop_fuse ![[LFI1]] +// CHECK: define void @{{.*}}func4{{.*}} !loop_fuse ![[LFI10:[0-9]+]] +// CHECK: define linkonce_odr void @{{.*}}KernelFunctor5{{.*}} !loop_fuse ![[LFI5:[0-9]+]] +// CHECK: define internal void @"{{.*}}foo{{.*}}"(%class.anon* %this){{.*}}!loop_fuse ![[LFI10]] +// CHECK: define internal void @"{{.*}}foo{{.*}}"(%class.anon.0* %this) +// CHECK-NOT: !loop_fuse +// CHECK-SAME: { + +// CHECK: ![[LFI1]] = !{i32 1, i32 1} +// CHECK: ![[LFI0]] = !{i32 0, i32 1} +// CHECK: ![[LFI10]] = !{i32 10, i32 1} +// CHECK: ![[LFI5]] = !{i32 5, i32 1} diff --git a/clang/test/SemaSYCL/loop_fusion.cpp b/clang/test/SemaSYCL/loop_fusion.cpp new file mode 100644 index 0000000000000..873cef88b961d --- /dev/null +++ b/clang/test/SemaSYCL/loop_fusion.cpp @@ -0,0 +1,102 @@ +// RUN: %clang_cc1 -fsycl -fsycl-is-device -internal-isystem %S/Inputs -Wno-sycl-2017-compat -ast-dump -verify %s | FileCheck %s + +#include "sycl.hpp" + +using namespace cl::sycl; +queue q; + +// CHECK: FunctionDecl {{.*}} func1 'void ()' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: SYCLIntelLoopFuseAttr +// CHECK-NEXT: NULL +[[intel::loop_fuse]] void func1() {} + +// CHECK: FunctionDecl {{.*}} func2 'void ()' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: SYCLIntelLoopFuseAttr +// CHECK-NEXT: ConstantExpr +// CHECK-NEXT: value: Int 0 +[[intel::loop_fuse(0)]] void func2() {} + +// CHECK: FunctionDecl {{.*}} func3 'void ()' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: SYCLIntelLoopFuseIndependentAttr +// CHECK-NEXT: NULL +[[intel::loop_fuse_independent]] void func3() {} + +// CHECK: FunctionDecl {{.*}} func4 'void ()' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: SYCLIntelLoopFuseIndependentAttr +// CHECK-NEXT: ConstantExpr +// CHECK-NEXT: value: Int 3 +[[intel::loop_fuse_independent(3)]] void func4() {} + +class KernelFunctor { +public: + void operator()() const { + func1(); + func3(); + } +}; + +template +class KernelFunctor2 { +public: + [[intel::loop_fuse(N)]] void operator()() const { + } +}; + +void foo() { + q.submit([&](handler &h) { + // CHECK: FunctionDecl {{.*}}kernel_name_1 'void ()' + // CHECK-NOT: SYCLIntelLoopFuseAttr + // CHECK-NOT: SYCLIntelLoopFuseIndependentAttr + KernelFunctor f1; + h.single_task(f1); + + // CHECK: FunctionDecl {{.*}}kernel_name_2 'void ()' + // CHECK: SYCLIntelLoopFuseAttr + // CHECK-NEXT: SubstNonTypeTemplateParmExpr + // CHECK-NEXT: NonTypeTemplateParmDecl + // CHECK-NEXT: IntegerLiteral {{.*}} 'int' 3 + KernelFunctor2<3> f2; + h.single_task(f2); + + // CHECK: FunctionDecl {{.*}}kernel_name_3 'void ()' + // CHECK: SYCLIntelLoopFuseIndependentAttr + h.single_task( + []() [[intel::loop_fuse_independent]]{}); + }); + + [[intel::loop_fuse]] int testVar = 0; // expected-error{{'loop_fuse' attribute only applies to functions}} +} + +[[intel::loop_fuse(1048577)]] void func5() {} // expected-error{{'loop_fuse' attribute requires integer constant between 0 and 1048576 inclusive}} +[[intel::loop_fuse_independent(-1)]] void func6() {} // expected-error{{'loop_fuse_independent' attribute requires integer constant between 0 and 1048576 inclusive}} + +[[intel::loop_fuse]] [[intel::loop_fuse(10)]] void func7() {} // expected-warning {{attribute 'loop_fuse' is already applied}} +[[intel::loop_fuse_independent]] [[intel::loop_fuse_independent]] void func8() {} // // expected-warning {{attribute 'loop_fuse_independent' is already applied}} + +// expected-error@+2 {{'loop_fuse_independent' and 'loop_fuse' attributes are not compatible}} +// expected-note@+1 {{conflicting attribute is here}} +[[intel::loop_fuse]] [[intel::loop_fuse_independent]] void func9(); + +// expected-error@+2 {{'loop_fuse' and 'loop_fuse_independent' attributes are not compatible}} +// expected-note@+1 {{conflicting attribute is here}} +[[intel::loop_fuse_independent]] [[intel::loop_fuse]] void func10(); + +// expected-error@+2 {{'loop_fuse' and 'loop_fuse_independent' attributes are not compatible}} +// expected-note@+2 {{conflicting attribute is here}} +[[intel::loop_fuse]] void func11(); +[[intel::loop_fuse_independent]] void func11() {} + +// expected-error@+2 {{'loop_fuse_independent' and 'loop_fuse' attributes are not compatible}} +// expected-note@+2 {{conflicting attribute is here}} +[[intel::loop_fuse_independent]] void func12(); +[[intel::loop_fuse]] void func12() {} + +[[intel::loop_fuse]] void func13(); +[[intel::loop_fuse]] void func13() {} + +[[intel::loop_fuse_independent]] void func14(); +[[intel::loop_fuse_independent]] void func14() {}