From 7974c35e0e838ed98b85ccaf7abceb67cd01c860 Mon Sep 17 00:00:00 2001 From: rdeodhar Date: Mon, 8 Jun 2020 17:02:36 -0700 Subject: [PATCH 01/14] Support for arrays as kernel parameters. Signed-off-by: rdeodhar --- clang/lib/Sema/SemaSYCL.cpp | 201 ++++++-- .../CodeGenSYCL/kernel-param-acc-array-ih.cpp | 55 +++ .../CodeGenSYCL/kernel-param-acc-array.cpp | 74 +++ .../kernel-param-member-acc-array-ih.cpp | 58 +++ .../kernel-param-member-acc-array.cpp | 84 ++++ .../CodeGenSYCL/kernel-param-pod-array-ih.cpp | 51 ++ .../CodeGenSYCL/kernel-param-pod-array.cpp | 38 ++ .../test/SemaSYCL/array-kernel-param-neg.cpp | 56 +++ clang/test/SemaSYCL/array-kernel-param.cpp | 94 ++++ sycl/doc/Array_Kernel_Parameters.md | 435 ++++++++++++++++++ .../array_param/array-kernel-param-run.cpp | 250 ++++++++++ 11 files changed, 1350 insertions(+), 46 deletions(-) create mode 100755 clang/test/CodeGenSYCL/kernel-param-acc-array-ih.cpp create mode 100644 clang/test/CodeGenSYCL/kernel-param-acc-array.cpp create mode 100644 clang/test/CodeGenSYCL/kernel-param-member-acc-array-ih.cpp create mode 100644 clang/test/CodeGenSYCL/kernel-param-member-acc-array.cpp create mode 100755 clang/test/CodeGenSYCL/kernel-param-pod-array-ih.cpp create mode 100755 clang/test/CodeGenSYCL/kernel-param-pod-array.cpp create mode 100755 clang/test/SemaSYCL/array-kernel-param-neg.cpp create mode 100755 clang/test/SemaSYCL/array-kernel-param.cpp create mode 100755 sycl/doc/Array_Kernel_Parameters.md create mode 100755 sycl/test/array_param/array-kernel-param-run.cpp diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index fb420be750a29..5e5ef9a1ffabc 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -878,7 +878,6 @@ template class SyclKernelFieldHandler { virtual void leaveField(const CXXRecordDecl *, const CXXBaseSpecifier &) {} virtual void enterField(const CXXRecordDecl *, FieldDecl *) {} virtual void leaveField(const CXXRecordDecl *, FieldDecl *) {} - virtual void enterArray(const CXXBaseSpecifier &) {} virtual void enterArray() {} virtual void nextElement(QualType) {} virtual void leaveArray(QualType, int64_t) {} @@ -897,10 +896,10 @@ class SyclKernelFieldChecker if (const auto *CAT = dyn_cast(FieldTy)) { QualType ET = CAT->getElementType(); return checkNotCopyableToKernel(FD, ET); - } else - return Diag.Report(FD->getLocation(), - diag::err_sycl_non_constant_array_type) - << FieldTy; + } + return Diag.Report(FD->getLocation(), + diag::err_sycl_non_constant_array_type) + << FieldTy; } if (SemaRef.getASTContext().getLangOpts().SYCLStdLayoutKernelParams) @@ -1125,6 +1124,30 @@ class SyclKernelDeclCreator return true; } + bool handleArrayType(FieldDecl *FD, QualType FieldTy) final { + // if (!Util::isArrayOfSpecialSyclType(FieldTy)) { + if (!cast(FieldTy) + ->getElementType() + ->isStructureOrClassType()) { + // Wrap the array in a struct. + RecordDecl *NewClass = + SemaRef.getASTContext().buildImplicitRecord("wrapped_array"); + NewClass->startDefinition(); + FieldDecl *Field = FieldDecl::Create( + SemaRef.getASTContext(), NewClass, SourceLocation(), SourceLocation(), + /*Id=*/nullptr, FieldTy, + SemaRef.getASTContext().getTrivialTypeSourceInfo(FieldTy, + SourceLocation()), + /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); + Field->setAccess(AS_public); + NewClass->addDecl(Field); + NewClass->completeDefinition(); + QualType ST = SemaRef.getASTContext().getRecordType(NewClass); + addParam(FD, ST); + } + return true; + } + bool handleScalarType(FieldDecl *FD, QualType FieldTy) final { addParam(FD, FieldTy); return true; @@ -1260,6 +1283,41 @@ class SyclKernelBodyCreator InitExprs.push_back(MemberInit.get()); } + void createExprForArray(FieldDecl *FD) { + ParmVarDecl *KernelParameter = + DeclCreator.getParamVarDeclsForCurrentField()[0]; + QualType ParamType = KernelParameter->getOriginalType(); + CXXRecordDecl *WrapperStruct = ParamType->getAsCXXRecordDecl(); + // The first and only field of the wrapper struct is the array + FieldDecl *Array = *(WrapperStruct->field_begin()); + auto DRE = DeclRefExpr::Create(SemaRef.Context, NestedNameSpecifierLoc(), + SourceLocation(), KernelParameter, false, + DeclarationNameInfo(), ParamType, VK_LValue); + DeclAccessPair ArrayDAP = DeclAccessPair::make(Array, AS_none); + Expr *InitExpr = MemberExpr::Create( + SemaRef.Context, DRE, false, SourceLocation(), NestedNameSpecifierLoc(), + SourceLocation(), Array, ArrayDAP, + DeclarationNameInfo(Array->getDeclName(), SourceLocation()), nullptr, + Array->getType(), VK_LValue, OK_Ordinary, NOUR_None); + InitializationKind InitKind = InitializationKind::CreateDirect( + SourceLocation(), SourceLocation(), SourceLocation()); + InitializedEntity Entity = InitializedEntity::InitializeLambdaCapture( + nullptr, Array->getType(), SourceLocation()); + InitializationSequence InitSeq(SemaRef, Entity, InitKind, InitExpr); + ExprResult MemberInit = + InitSeq.Perform(SemaRef, Entity, InitKind, InitExpr); + InitExprs.push_back(MemberInit.get()); + } + + void createExprForArrayElement(size_t ArrayIndex) { + Expr *ArrayBase = MemberExprBases.back(); + ExprResult IndexExpr = + SemaRef.ActOnIntegerConstant(SourceLocation(), ArrayIndex); + ExprResult ElementBase = SemaRef.CreateBuiltinArraySubscriptExpr( + ArrayBase, SourceLocation(), IndexExpr.get(), SourceLocation()); + MemberExprBases.push_back(ElementBase.get()); + } + void createSpecialMethodCall(const CXXRecordDecl *SpecialClass, Expr *Base, const std::string &MethodName, FieldDecl *Field) { @@ -1276,9 +1334,7 @@ class SyclKernelBodyCreator ParamDREs[I] = SemaRef.BuildDeclRefExpr(KernelParameters[I], ParamType, VK_LValue, SourceLocation()); } - - MemberExpr *SpecialObjME = BuildMemberExpr(Base, Field); - MemberExpr *MethodME = BuildMemberExpr(SpecialObjME, Method); + MemberExpr *MethodME = BuildMemberExpr(Base, Method); QualType ResultTy = Method->getReturnType(); ExprValueKind VK = Expr::getValueKindForType(ResultTy); @@ -1313,7 +1369,7 @@ class SyclKernelBodyCreator bool handleSpecialType(FieldDecl *FD, QualType Ty) { const auto *RecordDecl = Ty->getAsCXXRecordDecl(); // Perform initialization only if it is field of kernel object - if (MemberExprBases.size() == 1) { + if (MemberExprBases.size() == 2) { InitializedEntity Entity = InitializedEntity::InitializeMember(FD, &VarEntity); // Initialize with the default constructor. @@ -1371,9 +1427,10 @@ class SyclKernelBodyCreator bool handleSyclStreamType(FieldDecl *FD, QualType Ty) final { const auto *StreamDecl = Ty->getAsCXXRecordDecl(); createExprForStructOrScalar(FD); - createSpecialMethodCall(StreamDecl, MemberExprBases.back(), InitMethodName, - FD); - createSpecialMethodCall(StreamDecl, MemberExprBases.back(), + size_t NumBases = MemberExprBases.size(); + createSpecialMethodCall(StreamDecl, MemberExprBases[NumBases - 2], + InitMethodName, FD); + createSpecialMethodCall(StreamDecl, MemberExprBases[NumBases - 2], FinalizeMethodName, FD); return true; } @@ -1399,16 +1456,53 @@ class SyclKernelBodyCreator return true; } - void enterStruct(const CXXRecordDecl *, FieldDecl *FD) final { - MemberExprBases.push_back(BuildMemberExpr(MemberExprBases.back(), FD)); + bool handleArrayType(FieldDecl *FD, QualType FieldTy) final { + if (!cast(FieldTy) + ->getElementType() + ->isStructureOrClassType()) { + createExprForArray(FD); + } + return true; } - void leaveStruct(const CXXRecordDecl *, FieldDecl *FD) final { - MemberExprBases.pop_back(); + void enterField(const CXXRecordDecl *RD, FieldDecl *FD) final { + if (!FD->getType()->isReferenceType()) + MemberExprBases.push_back(BuildMemberExpr(MemberExprBases.back(), FD)); } - using SyclKernelFieldHandler::enterStruct; - using SyclKernelFieldHandler::leaveStruct; + void leaveField(const CXXRecordDecl *, FieldDecl *FD) final { + if (!FD->getType()->isReferenceType()) + MemberExprBases.pop_back(); + } + + void enterArray() final { + Expr *ArrayBase = MemberExprBases.back(); + ExprResult IndexExpr = SemaRef.ActOnIntegerConstant(SourceLocation(), 0); + ExprResult ElementBase = SemaRef.CreateBuiltinArraySubscriptExpr( + ArrayBase, SourceLocation(), IndexExpr.get(), SourceLocation()); + MemberExprBases.push_back(ElementBase.get()); + } + + void nextElement(QualType) final { + ArraySubscriptExpr *LastArrayRef = + dyn_cast(MemberExprBases.back()); + MemberExprBases.pop_back(); + Expr *LastIdx = LastArrayRef->getIdx(); + llvm::APSInt Result; + SemaRef.VerifyIntegerConstantExpression(LastIdx, &Result); + Expr *ArrayBase = MemberExprBases.back(); + ExprResult IndexExpr = SemaRef.ActOnIntegerConstant( + SourceLocation(), Result.getExtValue() + 1); + ExprResult ElementBase = SemaRef.CreateBuiltinArraySubscriptExpr( + ArrayBase, SourceLocation(), IndexExpr.get(), SourceLocation()); + MemberExprBases.push_back(ElementBase.get()); + } + + void leaveArray(QualType, int64_t) final { MemberExprBases.pop_back(); } + + using SyclKernelFieldHandler::enterArray; + using SyclKernelFieldHandler::enterField; + using SyclKernelFieldHandler::leaveField; }; class SyclKernelIntHeaderCreator @@ -1419,23 +1513,20 @@ class SyclKernelIntHeaderCreator const CXXRecordDecl *CurStruct = nullptr; int64_t CurOffset = 0; - uint64_t getOffset(const CXXRecordDecl *RD) const { - assert(CurOffset && - "Cannot have a base class without setting the active struct"); - const ASTRecordLayout &Layout = - SemaRef.getASTContext().getASTRecordLayout(CurStruct); - return CurOffset + Layout.getBaseClassOffset(RD).getQuantity(); - } - uint64_t getOffset(const FieldDecl *FD) const { - return CurOffset + SemaRef.getASTContext().getFieldOffset(FD) / 8; - } - - void addParam(const FieldDecl *FD, QualType FieldTy, + void addParam(const FieldDecl *FD, QualType ArgTy, SYCLIntegrationHeader::kernel_param_kind_t Kind) { - uint64_t Size = - SemaRef.getASTContext().getTypeSizeInChars(FieldTy).getQuantity(); + uint64_t Size; + const ConstantArrayType *CAT = + SemaRef.getASTContext().getAsConstantArrayType(ArgTy); + if (CAT) { + QualType ET = CAT->getElementType(); + Size = static_cast(CAT->getSize().getZExtValue()) * + SemaRef.getASTContext().getTypeSizeInChars(ET).getQuantity(); + } else { + Size = SemaRef.getASTContext().getTypeSizeInChars(ArgTy).getQuantity(); + } Header.addParamDesc(Kind, static_cast(Size), - static_cast(getOffset(FD))); + static_cast(CurOffset)); } public: @@ -1456,8 +1547,7 @@ class SyclKernelIntHeaderCreator int Dims = static_cast( AccTy->getTemplateArgs()[1].getAsIntegral().getExtValue()); int Info = getAccessTarget(AccTy) | (Dims << 11); - Header.addParamDesc(SYCLIntegrationHeader::kind_accessor, Info, - getOffset(BC.getType()->getAsCXXRecordDecl())); + Header.addParamDesc(SYCLIntegrationHeader::kind_accessor, Info, CurOffset); return true; } @@ -1469,8 +1559,7 @@ class SyclKernelIntHeaderCreator int Dims = static_cast( AccTy->getTemplateArgs()[1].getAsIntegral().getExtValue()); int Info = getAccessTarget(AccTy) | (Dims << 11); - Header.addParamDesc(SYCLIntegrationHeader::kind_accessor, Info, - getOffset(FD)); + Header.addParamDesc(SYCLIntegrationHeader::kind_accessor, Info, CurOffset); return true; } @@ -1511,10 +1600,21 @@ class SyclKernelIntHeaderCreator addParam(FD, FieldTy, SYCLIntegrationHeader::kind_pointer); return true; } + + bool handleArrayType(FieldDecl *FD, QualType FieldTy) final { + // if (!Util::isArrayOfSpecialSyclType(FieldTy)) + if (!cast(FieldTy) + ->getElementType() + ->isStructureOrClassType()) + addParam(FD, FieldTy, SYCLIntegrationHeader::kind_std_layout); + return true; + } + bool handleStructType(FieldDecl *FD, QualType FieldTy) final { addParam(FD, FieldTy, SYCLIntegrationHeader::kind_std_layout); return true; } + bool handleScalarType(FieldDecl *FD, QualType FieldTy) final { addParam(FD, FieldTy, SYCLIntegrationHeader::kind_std_layout); return true; @@ -1524,6 +1624,7 @@ class SyclKernelIntHeaderCreator addParam(FD, FieldTy, SYCLIntegrationHeader::kind_std_layout); return true; } + bool handleSyclStreamType(const CXXBaseSpecifier &BC, QualType FieldTy) final { // FIXME SYCL stream should be usable as a base type @@ -1531,32 +1632,40 @@ class SyclKernelIntHeaderCreator return true; } - // Keep track of the current struct offset. - void enterStruct(const CXXRecordDecl *RD, FieldDecl *FD) final { - CurStruct = FD->getType()->getAsCXXRecordDecl(); + void enterField(const CXXRecordDecl *RD, FieldDecl *FD) final { CurOffset += SemaRef.getASTContext().getFieldOffset(FD) / 8; } - void leaveStruct(const CXXRecordDecl *RD, FieldDecl *FD) final { - CurStruct = RD; + void leaveField(const CXXRecordDecl *, FieldDecl *FD) final { CurOffset -= SemaRef.getASTContext().getFieldOffset(FD) / 8; } - void enterStruct(const CXXRecordDecl *RD, const CXXBaseSpecifier &BS) final { - CurStruct = BS.getType()->getAsCXXRecordDecl(); + void enterField(const CXXRecordDecl *RD, const CXXBaseSpecifier &BS) final { const ASTRecordLayout &Layout = SemaRef.getASTContext().getASTRecordLayout(RD); CurOffset += Layout.getBaseClassOffset(BS.getType()->getAsCXXRecordDecl()) .getQuantity(); } - void leaveStruct(const CXXRecordDecl *RD, const CXXBaseSpecifier &BS) final { - CurStruct = RD; + void leaveField(const CXXRecordDecl *RD, const CXXBaseSpecifier &BS) final { const ASTRecordLayout &Layout = SemaRef.getASTContext().getASTRecordLayout(RD); CurOffset -= Layout.getBaseClassOffset(BS.getType()->getAsCXXRecordDecl()) .getQuantity(); } + + void nextElement(QualType ET) final { + CurOffset += SemaRef.getASTContext().getTypeSizeInChars(ET).getQuantity(); + } + + void leaveArray(QualType ET, int64_t Count) final { + int64_t ArraySize = + SemaRef.getASTContext().getTypeSizeInChars(ET).getQuantity(); + if (!ET->isArrayType()) { + ArraySize *= Count; + } + CurOffset -= ArraySize; + } }; } // namespace diff --git a/clang/test/CodeGenSYCL/kernel-param-acc-array-ih.cpp b/clang/test/CodeGenSYCL/kernel-param-acc-array-ih.cpp new file mode 100755 index 0000000000000..fbaffee9f9b92 --- /dev/null +++ b/clang/test/CodeGenSYCL/kernel-param-acc-array-ih.cpp @@ -0,0 +1,55 @@ +// RUN: %clang -I %S/Inputs -fsycl-device-only -Xclang -fsycl-int-header=%t.h %s -c -o %T/kernel.spv +// RUN: FileCheck -input-file=%t.h %s + +// This test checks the integration header generated when +// the kernel argument is an Accessor array. + +// CHECK: #include + +// CHECK: class kernel_A; + +// CHECK: __SYCL_INLINE_NAMESPACE(cl) { +// CHECK-NEXT: namespace sycl { +// CHECK-NEXT: namespace detail { + +// CHECK: static constexpr +// CHECK-NEXT: const char* const kernel_names[] = { +// CHECK-NEXT: "_ZTSZ4mainE8kernel_A" +// CHECK-NEXT: }; + +// CHECK: static constexpr +// CHECK-NEXT: const kernel_param_desc_t kernel_signatures[] = { +// CHECK-NEXT: //--- _ZTSZ4mainE8kernel_A +// CHECK-NEXT: { kernel_param_kind_t::kind_accessor, 4062, 0 }, +// CHECK-NEXT: { kernel_param_kind_t::kind_accessor, 4062, 12 }, +// CHECK-EMPTY: +// CHECK-NEXT: }; + +// CHECK: static constexpr +// CHECK-NEXT: const unsigned kernel_signature_start[] = { +// CHECK-NEXT: 0 // _ZTSZ4mainE8kernel_A +// CHECK-NEXT: }; + +// CHECK: template <> struct KernelInfo { + +#include + +using namespace cl::sycl; + +template +__attribute__((sycl_kernel)) void a_kernel(Func kernelFunc) { + kernelFunc(); +} + +int main() { + + using Accessor = + accessor; + + Accessor acc[2]; + + a_kernel( + [=]() { + acc[1].use(); + }); +} diff --git a/clang/test/CodeGenSYCL/kernel-param-acc-array.cpp b/clang/test/CodeGenSYCL/kernel-param-acc-array.cpp new file mode 100644 index 0000000000000..09d1f48f86907 --- /dev/null +++ b/clang/test/CodeGenSYCL/kernel-param-acc-array.cpp @@ -0,0 +1,74 @@ +// RUN: %clang_cc1 -fsycl -fsycl-is-device -I %S/Inputs -triple spir64-unknown-unknown-sycldevice -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s + +// This test checks a kernel argument that is an Accessor array + +#include + +using namespace cl::sycl; + +template +__attribute__((sycl_kernel)) void a_kernel(Func kernelFunc) { + kernelFunc(); +} + +int main() { + + using Accessor = + accessor; + Accessor acc[2]; + + a_kernel( + [=]() { + acc[1].use(); + }); +} + +// Check kernel_A parameters +// CHECK: define spir_kernel void @{{.*}}kernel_A +// CHECK-SAME: i32 addrspace(1)* [[MEM_ARG1:%[a-zA-Z0-9_]+]], +// CHECK-SAME: %"struct.{{.*}}.cl::sycl::range"* byval{{.*}}align 4 [[ACC_RANGE1:%[a-zA-Z0-9_]+_1]], +// CHECK-SAME: %"struct.{{.*}}.cl::sycl::range"* byval{{.*}}align 4 [[MEM_RANGE1:%[a-zA-Z0-9_]+_2]], +// CHECK-SAME: %"struct.{{.*}}.cl::sycl::id"* byval{{.*}}align 4 [[OFFSET1:%[a-zA-Z0-9_]+_3]], +// CHECK-SAME: i32 addrspace(1)* [[MEM_ARG2:%[a-zA-Z0-9_]+_4]], +// CHECK-SAME: %"struct.{{.*}}.cl::sycl::range"* byval{{.*}}align 4 [[ACC_RANGE2:%[a-zA-Z0-9_]+_6]], +// CHECK-SAME: %"struct.{{.*}}.cl::sycl::range"* byval{{.*}}align 4 [[MEM_RANGE2:%[a-zA-Z0-9_]+_7]], +// CHECK-SAME: %"struct.{{.*}}.cl::sycl::id"* byval{{.*}}align 4 [[OFFSET2:%[a-zA-Z0-9_]+_8]]) + +// Check alloca for pointer arguments +// CHECK: [[MEM_ARG1]].addr{{[0-9]*}} = alloca i32 addrspace(1)*, align 8 +// CHECK: [[MEM_ARG1]].addr{{[0-9]*}} = alloca i32 addrspace(1)*, align 8 + +// Check lambda object alloca +// CHECK: [[LOCAL_OBJECT:%0]] = alloca %"class.{{.*}}.anon", align 4 + +// Check allocas for ranges +// CHECK: [[ACC_RANGE1:%[a-zA-Z0-9_.]+]] = alloca %"struct.{{.*}}.cl::sycl::range" +// CHECK: [[MEM_RANGE1:%[a-zA-Z0-9_.]+]] = alloca %"struct.{{.*}}.cl::sycl::range" +// CHECK: [[OFFSET1:%[a-zA-Z0-9_.]+]] = alloca %"struct.{{.*}}.cl::sycl::id" +// CHECK: [[ACC_RANGE2:%[a-zA-Z0-9_.]+]] = alloca %"struct.{{.*}}.cl::sycl::range" +// CHECK: [[MEM_RANGE2:%[a-zA-Z0-9_.]+]] = alloca %"struct.{{.*}}.cl::sycl::range" +// CHECK: [[OFFSET2:%[a-zA-Z0-9_.]+]] = alloca %"struct.{{.*}}.cl::sycl::id" + +// Check accessor array GEP for acc[0] +// CHECK: [[ACCESSOR_ARRAY1:%[a-zA-Z0-9_]+]] = getelementptr inbounds %"class.{{.*}}.anon", %"class.{{.*}}.anon"* [[LOCAL_OBJECT]], i32 0, i32 0 +// CHECK: [[Z0:%[a-zA-Z0-9_]*]] = getelementptr inbounds{{.*}}[[ACCESSOR_ARRAY1]], i64 0, i64 0 + +// Check load from kernel pointer argument alloca +// CHECK: [[MEM_LOAD1:%[a-zA-Z0-9_]+]] = load i32 addrspace(1)*, i32 addrspace(1)** [[MEM_ARG1]].addr{{[0-9]*}} + +// Check acc[0] __init method call +// CHECK: [[ACC_CAST1:%[0-9]+]] = addrspacecast %"class{{.*}}accessor"* [[Z0]] to %"class{{.*}}accessor" addrspace(4)* + +// CHECK: call spir_func void @{{.*}}__init{{.*}}(%"class.{{.*}}.cl::sycl::accessor" addrspace(4)* [[ACC_CAST1]], i32 addrspace(1)* [[MEM_LOAD1]], %"struct.{{.*}}.cl::sycl::range"* byval({{.*}}) align 4 [[ACC_RANGE1]], %"struct.{{.*}}.cl::sycl::range"* byval({{.*}}) align 4 [[MEM_RANGE1]], %"struct.{{.*}}.cl::sycl::id"* byval({{.*}}) align 4 [[OFFSET1]]) + +// Check accessor array GEP for acc[1] +// CHECK: [[ACCESSOR_ARRAY2:%[a-zA-Z0-9_]+]] = getelementptr inbounds %"class.{{.*}}.anon", %"class.{{.*}}.anon"* [[LOCAL_OBJECT]], i32 0, i32 0 +// CHECK: [[Z1:%[a-zA-Z0-9_]*]] = getelementptr inbounds{{.*}}[[ACCESSOR_ARRAY2]], i64 0, i64 1 + +// Check load from kernel pointer argument alloca +// CHECK: [[MEM_LOAD2:%[a-zA-Z0-9_]+]] = load i32 addrspace(1)*, i32 addrspace(1)** [[MEM_ARG1]].addr{{[0-9]*}} + +// Check acc[1] __init method call +// CHECK: [[ACC_CAST2:%[0-9]+]] = addrspacecast %"class{{.*}}accessor"* [[Z1]] to %"class{{.*}}accessor" addrspace(4)* + +// CHECK: call spir_func void @{{.*}}__init{{.*}}(%"class.{{.*}}.cl::sycl::accessor" addrspace(4)* [[ACC_CAST2]], i32 addrspace(1)* [[MEM_LOAD2]], %"struct.{{.*}}.cl::sycl::range"* byval({{.*}}) align 4 [[ACC_RANGE2]], %"struct.{{.*}}.cl::sycl::range"* byval({{.*}}) align 4 [[MEM_RANGE2]], %"struct.{{.*}}.cl::sycl::id"* byval({{.*}}) align 4 [[OFFSET2]]) diff --git a/clang/test/CodeGenSYCL/kernel-param-member-acc-array-ih.cpp b/clang/test/CodeGenSYCL/kernel-param-member-acc-array-ih.cpp new file mode 100644 index 0000000000000..649d6fd88f0b8 --- /dev/null +++ b/clang/test/CodeGenSYCL/kernel-param-member-acc-array-ih.cpp @@ -0,0 +1,58 @@ +// RUN: %clang -I %S/Inputs -fsycl-device-only -Xclang -fsycl-int-header=%t.h %s -c -o %T/kernel.spv +// RUN: FileCheck -input-file=%t.h %s + +// This test checks the integration header when kernel argument +// is a struct containing an Accessor array. + +// CHECK: #include + +// CHECK: class kernel_C; + +// CHECK: __SYCL_INLINE_NAMESPACE(cl) { +// CHECK-NEXT: namespace sycl { +// CHECK-NEXT: namespace detail { + +// CHECK: static constexpr +// CHECK-NEXT: const char* const kernel_names[] = { +// CHECK-NEXT: "_ZTSZ4mainE8kernel_C" +// CHECK-NEXT: }; + +// CHECK: static constexpr +// CHECK-NEXT: const kernel_param_desc_t kernel_signatures[] = { +// CHECK-NEXT: //--- _ZTSZ4mainE8kernel_C +// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 24, 0 }, +// CHECK-NEXT: { kernel_param_kind_t::kind_accessor, 4062, 0 }, +// CHECK-NEXT: { kernel_param_kind_t::kind_accessor, 4062, 12 }, +// CHECK-EMPTY: +// CHECK-NEXT: }; + +// CHECK: static constexpr +// CHECK-NEXT: const unsigned kernel_signature_start[] = { +// CHECK-NEXT: 0 // _ZTSZ4mainE8kernel_C +// CHECK-NEXT: }; + +// CHECK: template <> struct KernelInfo { + +#include + +using namespace cl::sycl; + +template +__attribute__((sycl_kernel)) void a_kernel(Func kernelFunc) { + kernelFunc(); +} + +int main() { + + using Accessor = + accessor; + + struct struct_acc_t { + Accessor member_acc[2]; + } struct_acc; + + a_kernel( + [=]() { + struct_acc.member_acc[1].use(); + }); +} diff --git a/clang/test/CodeGenSYCL/kernel-param-member-acc-array.cpp b/clang/test/CodeGenSYCL/kernel-param-member-acc-array.cpp new file mode 100644 index 0000000000000..d72691eb0e279 --- /dev/null +++ b/clang/test/CodeGenSYCL/kernel-param-member-acc-array.cpp @@ -0,0 +1,84 @@ +// RUN: %clang_cc1 -fsycl -fsycl-is-device -I %S/Inputs -fsycl-int-header=%t.h -triple spir64-unknown-unknown-sycldevice -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s + +// This test checks a kernel with struct parameter that contains an Accessor array. + +#include + +using namespace cl::sycl; + +template +__attribute__((sycl_kernel)) void a_kernel(Func kernelFunc) { + kernelFunc(); +} + +int main() { + + using Accessor = + accessor; + + struct struct_acc_t { + Accessor member_acc[2]; + } struct_acc; + + a_kernel( + [=]() { + struct_acc.member_acc[1].use(); + }); +} + +// CHECK kernel_C parameters +// CHECK: define spir_kernel void @{{.*}}kernel_C +// CHECK-SAME: %struct.{{.*}}.struct_acc_t* byval(%struct.{{.*}}.struct_acc_t) align 4 [[STRUCT:%[a-zA-Z0-9_]+]], +// CHECK-SAME: i32 addrspace(1)* [[MEM_ARG1:%[a-zA-Z0-9_]+]], +// CHECK-SAME: %"struct.{{.*}}.cl::sycl::range"* byval({{.*}}) align 4 [[ACC_RANGE1:%[a-zA-Z0-9_]+1]], +// CHECK-SAME: %"struct.{{.*}}.cl::sycl::range"* byval({{.*}}) align 4 [[MEM_RANGE1:%[a-zA-Z0-9_]+2]], +// CHECK-SAME: %"struct.{{.*}}.cl::sycl::id"* byval({{.*}}) align 4 [[OFFSET1:%[a-zA-Z0-9_]+3]], +// CHECK-SAME: i32 addrspace(1)* [[MEM_ARG2:%[a-zA-Z0-9_]+4]], +// CHECK-SAME: %"struct.{{.*}}.cl::sycl::range"* byval({{.*}}) align 4 [[ACC_RANGE2:%[a-zA-Z0-9_]+6]], +// CHECK-SAME: %"struct.{{.*}}.cl::sycl::range"* byval({{.*}}) align 4 [[MEM_RANGE2:%[a-zA-Z0-9_]+7]], +// CHECK-SAME: %"struct.{{.*}}.cl::sycl::id"* byval({{.*}}) align 4 [[OFFSET2:%[a-zA-Z0-9_]+8]]) + +// Check alloca for pointer arguments +// CHECK: [[MEM_ARG1]].addr{{[0-9]*}} = alloca i32 addrspace(1)*, align 8 +// CHECK: [[MEM_ARG1]].addr{{[0-9]*}} = alloca i32 addrspace(1)*, align 8 + +// Check lambda object alloca +// CHECK: [[LOCAL_OBJECT:%0]] = alloca %"class.{{.*}}.anon", align 4 + +// Check allocas for ranges +// CHECK: [[ACC_RANGE1:%[a-zA-Z0-9_.]+]] = alloca %"struct.{{.*}}.cl::sycl::range" +// CHECK: [[MEM_RANGE1:%[a-zA-Z0-9_.]+]] = alloca %"struct.{{.*}}.cl::sycl::range" +// CHECK: [[OFFSET1:%[a-zA-Z0-9_.]+]] = alloca %"struct.{{.*}}.cl::sycl::id" +// CHECK: [[ACC_RANGE2:%[a-zA-Z0-9_.]+]] = alloca %"struct.{{.*}}.cl::sycl::range" +// CHECK: [[MEM_RANGE2:%[a-zA-Z0-9_.]+]] = alloca %"struct.{{.*}}.cl::sycl::range" +// CHECK: [[OFFSET2:%[a-zA-Z0-9_.]+]] = alloca %"struct.{{.*}}.cl::sycl::id" + +// Check init of local struct +// CHECK: [[L_STRUCT_ADDR:%[a-zA-Z0-9_]+]] = getelementptr inbounds %"class.{{.*}}.anon", %"class.{{.*}}.anon"* [[LOCAL_OBJECT]], i32 0, i32 0 +// CHECK: [[MEMCPY_DST:%[0-9a-zA-Z_]+]] = bitcast %struct.{{.*}}struct_acc_t* [[L_STRUCT_ADDR]] to i8* +// CHECK: [[MEMCPY_SRC:%[0-9a-zA-Z_]+]] = bitcast %struct.{{.*}}struct_acc_t* %{{[0-9a-zA-Z_]+}} to i8* +// CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[MEMCPY_DST]], i8* align 4 [[MEMCPY_SRC]], i64 24, i1 false) + +// Check accessor array GEP for member_acc[0] +// CHECK: [[ACCESSOR_ARRAY1:%[a-zA-Z0-9_]+]] = getelementptr inbounds %"class.{{.*}}.anon", %"class.{{.*}}.anon"* [[LOCAL_OBJECT]], i32 0, i32 0 +// CHECK: [[MEMBER1:%[a-zA-Z_]+]] = getelementptr inbounds %struct.{{.*}}.struct_acc_t, %struct.{{.*}}.struct_acc_t* [[ACCESSOR_ARRAY1]], i32 0, i32 0 +// CHECK: [[Z0:%[a-zA-Z0-9_]*]] = getelementptr inbounds [2 x %"class.{{.*}}.cl::sycl::accessor"], [2 x %"class.{{.*}}.cl::sycl::accessor"]* [[MEMBER1]], i64 0, i64 0 + +// Check load from kernel pointer argument alloca +// CHECK: [[MEM_LOAD1:%[a-zA-Z0-9_]+]] = load i32 addrspace(1)*, i32 addrspace(1)** [[MEM_ARG1]].addr{{[0-9]*}} + +// Check acc[0] __init method call +// CHECK: [[ACC_CAST1:%[0-9]+]] = addrspacecast %"class{{.*}}accessor"* [[Z0]] to %"class{{.*}}accessor" addrspace(4)* +// CHECK: call spir_func void @{{.*}}__init{{.*}}(%"class.{{.*}}.cl::sycl::accessor" addrspace(4)* [[ACC_CAST1]], i32 addrspace(1)* [[MEM_LOAD1]], %"struct.{{.*}}.cl::sycl::range"* byval({{.*}}) align 4 [[ACC_RANGE1]], %"struct.{{.*}}.cl::sycl::range"* byval({{.*}}) align 4 [[MEM_RANGE1]], %"struct.{{.*}}.cl::sycl::id"* byval({{.*}}) align 4 [[OFFSET1]]) + +// Check accessor array GEP for member_acc[1] +// CHECK: [[ACCESSOR_ARRAY2:%[a-zA-Z0-9_]+]] = getelementptr inbounds %"class.{{.*}}.anon", %"class.{{.*}}.anon"* [[LOCAL_OBJECT]], i32 0, i32 0 +// CHECK: [[MEMBER2:%[a-zA-Z0-9_]+]] = getelementptr inbounds %struct.{{.*}}.struct_acc_t, %struct.{{.*}}.struct_acc_t* [[ACCESSOR_ARRAY2]], i32 0, i32 0 +// CHECK: [[Z1:%[a-zA-Z0-9_]*]] = getelementptr inbounds [2 x %"class.{{.*}}.cl::sycl::accessor"], [2 x %"class.{{.*}}.cl::sycl::accessor"]* [[MEMBER2]], i64 0, i64 1 + +// Check load from kernel pointer argument alloca +// CHECK: [[MEM_LOAD2:%[a-zA-Z0-9_]+]] = load i32 addrspace(1)*, i32 addrspace(1)** [[MEM_ARG1]].addr{{[0-9]*}} + +// Check acc[1] __init method call +// CHECK: [[ACC_CAST2:%[0-9]+]] = addrspacecast %"class{{.*}}accessor"* [[Z1]] to %"class{{.*}}accessor" addrspace(4)* +// CHECK: call spir_func void @{{.*}}__init{{.*}}(%"class.{{.*}}.cl::sycl::accessor" addrspace(4)* [[ACC_CAST2]], i32 addrspace(1)* [[MEM_LOAD2]], %"struct.{{.*}}.cl::sycl::range"* byval({{.*}}) align 4 [[ACC_RANGE2]], %"struct.{{.*}}.cl::sycl::range"* byval({{.*}}) align 4 [[MEM_RANGE2]], %"struct.{{.*}}.cl::sycl::id"* byval({{.*}}) align 4 [[OFFSET2]]) diff --git a/clang/test/CodeGenSYCL/kernel-param-pod-array-ih.cpp b/clang/test/CodeGenSYCL/kernel-param-pod-array-ih.cpp new file mode 100755 index 0000000000000..799a0fb9183f1 --- /dev/null +++ b/clang/test/CodeGenSYCL/kernel-param-pod-array-ih.cpp @@ -0,0 +1,51 @@ +// RUN: %clang -I %S/Inputs -fsycl-device-only -Xclang -fsycl-int-header=%t.h %s -c -o %T/kernel.spv +// RUN: FileCheck -input-file=%t.h %s + +// This test checks the integration header generated for a kernel +// with an argument that is a POD array. + +// CHECK: #include + +// CHECK: class kernel_B; + +// CHECK: __SYCL_INLINE_NAMESPACE(cl) { +// CHECK-NEXT: namespace sycl { +// CHECK-NEXT: namespace detail { + +// CHECK: static constexpr +// CHECK-NEXT: const char* const kernel_names[] = { +// CHECK-NEXT: "_ZTSZ4mainE8kernel_B" +// CHECK-NEXT: }; + +// CHECK: static constexpr +// CHECK-NEXT: const kernel_param_desc_t kernel_signatures[] = { +// CHECK-NEXT: //--- _ZTSZ4mainE8kernel_B +// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 400, 0 }, +// CHECK-EMPTY: +// CHECK-NEXT: }; + +// CHECK: static constexpr +// CHECK-NEXT: const unsigned kernel_signature_start[] = { +// CHECK-NEXT: 0 // _ZTSZ4mainE8kernel_B +// CHECK-NEXT: }; + +// CHECK: template <> struct KernelInfo { + +#include + +using namespace cl::sycl; + +template +__attribute__((sycl_kernel)) void a_kernel(Func kernelFunc) { + kernelFunc(); +} + +int main() { + + int a[100]; + + a_kernel( + [=]() { + int local = a[3]; + }); +} diff --git a/clang/test/CodeGenSYCL/kernel-param-pod-array.cpp b/clang/test/CodeGenSYCL/kernel-param-pod-array.cpp new file mode 100755 index 0000000000000..7549afa16c91d --- /dev/null +++ b/clang/test/CodeGenSYCL/kernel-param-pod-array.cpp @@ -0,0 +1,38 @@ +// RUN: %clang_cc1 -fsycl -fsycl-is-device -I %S/Inputs -triple spir64-unknown-unknown-sycldevice -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s + +// This test checks a kernel with an argument that is a POD array. + +#include + +using namespace cl::sycl; + +template +__attribute__((sycl_kernel)) void a_kernel(Func kernelFunc) { + kernelFunc(); +} + +int main() { + + int a[100]; + + a_kernel( + [=]() { + int local = a[3]; + }); +} + +// Check kernel_B parameters +// CHECK: define spir_kernel void @{{.*}}kernel_B +// CHECK-SAME: %struct.{{.*}}.wrapped_array* byval{{.*}}align 4 [[ARG_STRUCT:%[a-zA-Z0-9_]+]] + +// Check local lambda object alloca +// CHECK: [[LOCAL_OBJECT:%0]] = alloca %"class.{{.*}}.anon", align 4 + +// Check init of local array +// CHECK: [[ARRAY1:%[a-zA-Z0-9_]+]] = getelementptr inbounds %"class.{{.*}}.anon", %"class.{{.*}}.anon"* [[LOCAL_OBJECT]], i32 0, i32 0 + +// CHECK: [[ARRAY2:%[a-zA-Z0-9_]+]] = getelementptr inbounds %struct.{{.*}}.wrapped_array, %struct.{{.*}}.wrapped_array* [[ARG_STRUCT]], i32 0, i32 0 + +// CHECK: %{{[a-zA-Z0-9._]+}} = getelementptr inbounds [100 x i32], [100 x i32]* [[ARRAY1]], i64 0, i64 0 + +// CHECK: %{{[a-zA-Z0-9_]+}} = getelementptr inbounds [100 x i32], [100 x i32]* [[ARRAY2]], i64 0, i64 diff --git a/clang/test/SemaSYCL/array-kernel-param-neg.cpp b/clang/test/SemaSYCL/array-kernel-param-neg.cpp new file mode 100755 index 0000000000000..b7f669ecd6671 --- /dev/null +++ b/clang/test/SemaSYCL/array-kernel-param-neg.cpp @@ -0,0 +1,56 @@ +// RUN: %clang_cc1 -fsycl -fsycl-is-device -fcxx-exceptions -verify -fsyntax-only %s + +// This test checks if compiler reports compilation error on an attempt to pass +// an array of non-trivially copyable structs as SYCL kernel parameter or +// a non-constant size array. + +struct A { + int i; +}; + +struct B { + int i; + B(int _i) : i(_i) {} + B(const B &x) : i(x.i) {} +}; + +struct C : A { + const A C2; + C() : A{0}, C2{2} {} +}; + +struct D { + int i; + ~D(); +}; + +class E { + // expected-error@+1 {{kernel parameter is not a constant size array}} + int i[]; + +public: + int operator()() { return i[0]; } +}; + +template +__attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { + kernelFunc(); +} + +void test() { + A cs1[10]; + B nsl1[4] = {1, 2, 3, 4}; + C cs2[6]; + D nsl2[5]; + E es; + kernel_single_task([=] { + int a = cs1[6].i; + // expected-error@+1 {{kernel parameter has non-trivially copy constructible class/struct type}} + int b = nsl1[2].i; + int c = cs2[0].i; + // expected-error@+1 {{kernel parameter has non-trivially destructible class/struct type}} + int d = nsl2[4].i; + }); + + kernel_single_task(es); +} diff --git a/clang/test/SemaSYCL/array-kernel-param.cpp b/clang/test/SemaSYCL/array-kernel-param.cpp new file mode 100755 index 0000000000000..a7157f479b997 --- /dev/null +++ b/clang/test/SemaSYCL/array-kernel-param.cpp @@ -0,0 +1,94 @@ +// RUN: %clang_cc1 -I %S/Inputs -fsycl -fsycl-is-device -ast-dump %s | FileCheck %s + +// This test checks that compiler generates correct kernel arguments for +// arrays, Accessor arrays, and structs containing Accessors. + +#include + +using namespace cl::sycl; + +template +__attribute__((sycl_kernel)) void a_kernel(Func kernelFunc) { + kernelFunc(); +} + +int main() { + + using Accessor = + accessor; + + Accessor acc[2]; + int a[100]; + struct struct_acc_t { + Accessor member_acc[4]; + } struct_acc; + + a_kernel( + [=]() { + acc[1].use(); + }); + + a_kernel( + [=]() { + int local = a[3]; + }); + + a_kernel( + [=]() { + struct_acc.member_acc[2].use(); + }); +} + +// Check kernel_A parameters +// CHECK: FunctionDecl {{.*}}kernel_A{{.*}} 'void (__global int *, cl::sycl::range<1>, cl::sycl::range<1>, cl::sycl::id<1>, __global int *, cl::sycl::range<1>, cl::sycl::range<1>, cl::sycl::id<1>)' +// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_ '__global int *' +// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_ 'cl::sycl::range<1>' +// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_ 'cl::sycl::range<1>' +// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_ 'cl::sycl::id<1>' +// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_ '__global int *' +// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_ 'cl::sycl::range<1>' +// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_ 'cl::sycl::range<1>' +// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_ 'cl::sycl::id<1>' +// CHECK: CXXMemberCallExpr {{.*}} 'void' +// CHECK-NEXT: MemberExpr {{.*}}__init +// CHECK: CXXMemberCallExpr {{.*}} 'void' +// CHECK-NEXT: MemberExpr {{.*}}__init + +// Check kernel_B parameters +// CHECK: FunctionDecl {{.*}}kernel_B{{.*}} 'void (wrapped_array)' +// CHECK-NEXT: ParmVarDecl {{.*}} 'wrapped_array' +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: DeclStmt +// CHECK-NEXT: VarDecl +// CHECK-NEXT: InitListExpr +// CHECK-NEXT: ArrayInitLoopExpr {{.*}} 'int [100]' + +// Check kernel_C parameters +// CHECK: FunctionDecl {{.*}}kernel_C{{.*}} 'void (struct {{.*}}, __global int *, cl::sycl::range<1>, cl::sycl::range<1>, cl::sycl::id<1>, __global int *, cl::sycl::range<1>, cl::sycl::range<1>, cl::sycl::id<1>, __global int *, cl::sycl::range<1>, cl::sycl::range<1>, cl::sycl::id<1>, __global int *, cl::sycl::range<1>, cl::sycl::range<1>, cl::sycl::id<1>)' +// CHECK-NEXT: ParmVarDecl {{.*}} 'struct {{.*}}' +// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc '__global int *' +// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'cl::sycl::range<1>' +// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'cl::sycl::range<1>' +// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'cl::sycl::id<1>' +// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc '__global int *' +// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'cl::sycl::range<1>' +// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'cl::sycl::range<1>' +// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'cl::sycl::id<1>' +// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc '__global int *' +// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'cl::sycl::range<1>' +// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'cl::sycl::range<1>' +// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'cl::sycl::id<1>' +// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc '__global int *' +// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'cl::sycl::range<1>' +// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'cl::sycl::range<1>' +// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'cl::sycl::id<1>' + +// Check that four accessor init functions are called +// CHECK: CXXMemberCallExpr {{.*}} 'void' +// CHECK-NEXT: MemberExpr {{.*}}__init +// CHECK: CXXMemberCallExpr {{.*}} 'void' +// CHECK-NEXT: MemberExpr {{.*}}__init +// CHECK: CXXMemberCallExpr {{.*}} 'void' +// CHECK-NEXT: MemberExpr {{.*}}__init +// CHECK: CXXMemberCallExpr {{.*}} 'void' +// CHECK-NEXT: MemberExpr {{.*}}__init diff --git a/sycl/doc/Array_Kernel_Parameters.md b/sycl/doc/Array_Kernel_Parameters.md new file mode 100755 index 0000000000000..22ec0b32c513b --- /dev/null +++ b/sycl/doc/Array_Kernel_Parameters.md @@ -0,0 +1,435 @@ +

Array Parameters of SYCL Kernels

+ +

Introduction

+ +This document describes the changes to support passing arrays to SYCL kernels +and special treatment of Accessor arrays. +The following cases are handled: + +1. arrays of standard-layout type as top-level arguments +2. arrays of Accessors as top-level arguments +3. arrays of accessors within structs that are top-level arguments + +The motivation for this correction to kernel parameters processing is to +bring consistency to the treatment of arrays. +On the CPU, a lambda function is allowed to access an element of an array +defined outside the lambda. The implementation captures the entire array +by value. A user would naturally expect this to work in SYCL as well. +However, the current implementation flags references to arrays defined +outside a SYCL kernel as errors. + +The first few sections describe the current design. +The last three sections describe the design to support 1. to 3. above. +The implementation of this design is confined to three functions in the +file `SemaSYCL.cpp`. + +

A SYCL Kernel

+ +The SYCL constructs `single_task`, `parallel_for`, and +`parallel_for_work_group` each take a function object or a lambda function + as one of their arguments. The code within the function object or +lambda function is executed on the device. +To enable execution of the kernel on OpenCL devices, the lambda/function object +is converted into the format of an OpenCL kernel. + +

SYCL Kernel Code Generation

+ +Consider a source code example that captures an int, a struct and an accessor +by value: + +```C++ +constexpr size_t c_num_items = 10; +range<1> num_items{c_num_items}; // range<1>(num_items) + +int main() +{ + int output[c_num_items]; + queue myQueue; + + int i = 55; + struct S { + int m; + } s = { 66 }; + auto outBuf = buffer(&output[0], num_items); + + myQueue.submit([&](handler &cgh) { + auto outAcc = outBuf.get_access(cgh); + cgh.parallel_for(num_items, [=](cl::sycl::id<1> index) { + outAcc[index] = i + s.m; + }); + }); + + return 0; +} +``` + +The input to the code generation routines is a function object that represents +the kernel. In pseudo-code: + +```C++ +struct Capture { + Accessor outAcc; + int i; + struct S s; + () { + outAcc[index] = i + s.m; + } +} +``` + +On the CPU a call to such a lambda function would look like this: +```C++ +()(struct Capture* this); +``` + +When offloading the kernel to a device, the lambda/function object's +function operator cannot be directly called with a capture object address. +Instead, the code generated for the device is in the form of a +�kernel caller� and a �kernel callee�. +The callee is a clone of the SYCL kernel object. +The caller is generated in the form of an OpenCL kernel function. +It receives the lambda capture object in pieces, assembles the pieces +into the original lambda capture object and then calls the callee: + +```C++ +spir_kernel void caller( + __global int* AccData, // arg1 of Accessor init function + range<1> AccR1, // arg2 of Accessor init function + range<1> AccR2, // arg3 of Accessor init function + id<1> I, // arg4 of Accessor init function + int i, + struct S s +) +{ + // Local capture object + struct Capture local; + + // Reassemble capture object from parts + local.i = i; + local.s = s; + // Call accessor�s init function + Accessor::init(&local.outAcc, AccData, AccR1, AccR2, I); + + // Call the kernel body + callee(&local, id<1> wi); +} + +spir_func void callee(struct Capture* this, id<1> wi) +{ +} +``` + +As may be observed from the example above, standard-layout lambda capture +components are passed by value to the device as separate parameters. +This includes scalars, pointers, and standard-layout structs. +Certain SYCL struct types that are not standard-layout, +such as Accessors and Samplers, are treated specially. +The arguments to their init functions are passed as separate parameters +and used within the kernel caller function to initialize Accessors/Samplers +on the device by calling their init functions using the received arguments. + +There is one other aspect of code generation. An �integration header� +is generated for use during host compilation. +This header file contains entries for each kernel. +Among the items it defines is a table of sizes and offsets of the +kernel parameters. +For the source example above the integration header contains the +following snippet: + +```C++ +// array representing signatures of all kernels defined in the +// corresponding source +static constexpr +const kernel_param_desc_t kernel_signatures[] = { + //--- _ZTSZZ4mainENKUlRN2cl4sycl7handlerEE19->18clES2_E6Worker + { kernel_param_kind_t::kind_accessor, 4062, 0 }, + { kernel_param_kind_t::kind_std_layout, 4, 32 }, + { kernel_param_kind_t::kind_std_layout, 4, 36 }, +}; +``` + +Each entry in the kernel_signatures table contains three values: +1) an encoding of the type of capture object member +2) a field that encodes additional properties, and +3) an offset within a block of memory where the value of that +4) kernel argument is placed. + +The previous sections described how kernel arguments are handled today. +The next three sections describe support for arrays. + +

Fix 1: Kernel Arguments that are Standard-Layout Arrays

+ +As described earlier, each variable captured by a lambda that comprises a +SYCL kernel becomes a parameter of the kernel caller function. +For arrays, simply allowing them through would result in a +function parameter of array type. This is not supported in C++. +Therefore, the array needing capture is wrapped in a struct for +the purposes of passing to the device. Once received on the device +within its wrapper, the array is copied into the local capture object. +All references to the array within the kernel body are directed to +the non-wrapped array which is a member of the local capture object. + +

Source code fragment:

+ +```C++ + int array[100]; + auto outBuf = buffer(&output[0], num_items); + + myQueue.submit([&](handler &cgh) { + auto outAcc = outBuf.get_access(cgh); + cgh.parallel_for(num_items, [=](cl::sycl::id<1> index) { + outAcc[index] = array[index.get(0)]; + }); + }); +``` + +

Integration header produced:

+ +```C++ +static constexpr +const kernel_param_desc_t kernel_signatures[] = { + //--- _ZTSZZ4mainENKUlRN2cl4sycl7handlerEE16->18clES2_E6Worker + { kernel_param_kind_t::kind_accessor, 4062, 0 }, + { kernel_param_kind_t::kind_std_layout, 400, 32 }, +}; +``` + +

The changes to device code made to support this extension, in pseudo-code:

+ +```C++ +struct Capture { + Accessor outAcc; + int array[100]; + () { + // Body + } +} + +struct wrapper { + int array[100]; +}; +spir_kernel void caller( + __global int* AccData, // arg1 of Accessor init function + range<1> AccR1, // arg2 of Accessor init function + range<1> AccR2, // arg3 of Accessor init function + id<1> I, // arg4 of Accessor init function + struct wrapper w_s // Pass the array wrapped in a struct +) +{ + // Local capture object + struct Capture local; + + // Reassemble capture object from parts + // Initialize array using existing clang Initialization mechanisms + local.array = w_s; + // Call accessor�s init function + Accessor::init(&local.outAcc, AccData, AccR1, AccR2, I); + + callee(&local, id<1> wi); +} +``` + +The sharp-eyed reviewer of `SemaSYCL.cpp` will notice that the array +is actually double-wrapped in structs. This was done simply to preserve +the interface to an existing function (`CreateAndAddPrmDsc`) which +processes each kernel caller parameter as a capture object member. +The object being added to a list in `CreateAndAddPrmDsc` is `Fld`, +which is expected to be a field of some struct. So a wrapped struct +cannot be passed to this function. A double-wrapped struct is needed +as shown below. This does not affect the generated code. + +```C++ +struct { + struct { + int array[100]; + } +} +``` + +This could be changed but it would mean changes to the `CreateAndAddPrmDsc` +implementation, to all its callers and to the place where the list created +by it is processed. +By wrapping the array twice, the inner, single-wrapped array appears as a +member of a struct and meets the requirements of the existing code. + +

Fix 2: Kernel Arguments that are Arrays of Accessors

+ +Arrays of accessors are supported in a manner similar to that of a plain +Accessor. For each accessor array element, the four values required to +call its init function are passed as separate arguments to the kernel. +Reassembly within the kernel caller is serialized by accessor array element. + +

Source code fragment:

+ +```C++ + myQueue.submit([&](handler &cgh) { + using Accessor = + accessor; + Accessor inAcc[2] = {in_buffer1.get_access(cgh), + in_buffer2.get_access(cgh)}; + auto outAcc = out_buffer.get_access(cgh); + + cgh.parallel_for(num_items, [=](cl::sycl::id<1> index) { + outAcc[index] = inAcc[0][index] + inAcc[1][index]; + }); + }); +``` + +

Integration header:

+ +```C++ +static constexpr +const kernel_param_desc_t kernel_signatures[] = { + //--- _ZTSZZ4mainENKUlRN2cl4sycl7handlerEE20->18clES2_E6Worker + { kernel_param_kind_t::kind_accessor, 4062, 0 }, + { kernel_param_kind_t::kind_accessor, 4062, 32 }, + { kernel_param_kind_t::kind_accessor, 4062, 64 }, +}; +``` + +

Device code generated in pseudo-code form:

+ +```C++ +struct Capture { + Accessor outAcc; + Accessor inAcc[2]; + () { + // Body + } +} + +spir_kernel void caller( + __global int* outAccData, // args of OutAcc + range<1> outAccR1, + range<1> outAccR2, + id<1> outI, + __global int* inAccData_0, // args of inAcc[0] + range<1> inAccR1_0, + range<1> inAccR2_0, + id<1> inI_0, + __global int* inAccData_1, // args of inAcc[1] + range<1> inAccR1_1, + range<1> inAccR2_1, + id<1> inI_1, +) +{ + // Local capture object + struct Capture local; + + // Reassemble capture object from parts + // Call outAcc accessor�s init function + Accessor::init(&local.outAcc, outAccData, outAccR1, outAccR2, outI); + + // Call inAcc[0] accessor�s init function + Accessor::init(&local.inAcc[0], inAccData_0, inAccR1_0, inAccR2_0, inI_0); + + // Call inAcc[1] accessor�s init function + Accessor::init(&local.inAcc[1], inAccData_1, inAccR1_1, inAccR2_1, inI_1); + + callee(&local, id<1> wi); +} +``` + +

Fix 3: Accessor Arrays within Structs

+ +*Individual* Accessors within structs were already supported. +Struct parameters of kernels that are structs are traversed member +by member, recursively, to enumerate member structs that are one of +the SYCL special types: Accessors and Samplers. For each special +struct encountered in the scan, arguments of their init functions +are added as separate arguments to the kernel. +However, *arrays* of accessors within structs were not supported. +Building on the support for single Accessors within structs, +the extension to arrays of Accessors/Samplers within structs +is straightforward. Each element of such arrays is treated as +an individual object, and the arguments of its init function +are added to the kernel arguments in sequence. +Within the kernel caller function, the lambda object is reassembled +in a manner similar to other instances of Accessor arrays. + + +

Source code fragment:

+ +```C++ + myQueue.submit([&](handler &cgh) { + using Accessor = + accessor; + struct S { + int m; + Accessor inAcc[2]; + } s = { 55, + {in_buffer1.get_access(cgh), + in_buffer2.get_access(cgh)} + }; + auto outAcc = out_buffer.get_access(cgh); + + cgh.parallel_for(num_items, [=](cl::sycl::id<1> index) { + outAcc[index] = s.m + s.inAcc[0][index] + s.inAcc[1][index]; + }); +}); +``` + +

Integration header:

+ +```C++ +static constexpr +const kernel_param_desc_t kernel_signatures[] = { + //--- _ZTSZZ4mainENKUlRN2cl4sycl7handlerEE20->18clES2_E6Worker + { kernel_param_kind_t::kind_accessor, 4062, 0 }, + { kernel_param_kind_t::kind_std_layout, 72, 32 }, + { kernel_param_kind_t::kind_accessor, 4062, 40 }, + { kernel_param_kind_t::kind_accessor, 4062, 72 }, + +}; +``` + +

Device code generated in pseudo-code form:

+ +```C++ +struct Capture { + Accessor outAcc; + struct S s; + () { + // Body + } +} + +spir_kernel void caller( + __global int* outAccData, // args of OutAcc + range<1> outAccR1, + range<1> outAccR2, + id<1> outI, + struct S s, // the struct S + __global int* inAccData_0, // args of s.inAcc[0] + range<1> inAccR1_0, + range<1> inAccR2_0, + id<1> inI_0, + __global int* inAccData_1, // args of s.inAcc[1] + range<1> inAccR1_1, + range<1> inAccR2_1, + id<1> inI_1, +) +{ + // Local capture object + struct Capture local; + + // Reassemble capture object from parts + // Copy struct argument contents to local copy + // Accessor array will be initialized by calling init functions + local.s = s; + + // Call outAcc accessor�s init function + Accessor::init( + &local.outAcc, outAccData, outAccR1, outAccR2, outI); + + // Call s.inAcc[0] accessor�s init function + Accessor::init( + &local.s.inAcc[0], inAccData_0, inAccR1_0, inAccR2_0, inI_0); + + // Call s.inAcc[1] accessor�s init function + Accessor::init( + &local.s.inAcc[1], inAccData_1, inAccR1_1, inAccR2_1, inI_1); + + callee(&local, id<1> wi); +} +``` diff --git a/sycl/test/array_param/array-kernel-param-run.cpp b/sycl/test/array_param/array-kernel-param-run.cpp new file mode 100755 index 0000000000000..179901a072985 --- /dev/null +++ b/sycl/test/array_param/array-kernel-param-run.cpp @@ -0,0 +1,250 @@ +// This test checks kernel execution with array kernel parameters. + +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: env SYCL_DEVICE_TYPE=HOST %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + +#include +#include + +using namespace cl::sycl; + +constexpr size_t c_num_items = 100; +range<1> num_items{c_num_items}; // range<1>(num_items) + +// Change if tests are added/removed +static int testCount = 5; +static int passCount; + +template +static bool verify_1D(const char *name, int X, T A, T A_ref) { + int ErrCnt = 0; + + for (int i = 0; i < X; i++) { + if (A_ref[i] != A[i]) { + if (++ErrCnt < 10) { + std::cout << name << " mismatch at " << i << ". Expected " << A_ref[i] + << " result is " << A[i] << "\n"; + } + } + } + + if (ErrCnt == 0) { + return true; + } + std::cout << " Failed. Failure rate: " << ErrCnt << "/" << X << "(" + << ErrCnt / (float)X * 100.f << "%)\n"; + return false; +} + +template +void init(T &A, int value, int increment) { + for (int i = 0; i < c_num_items; i++) { + A[i] = value; + value += increment; + } +} + +bool test_one_array(queue &myQueue) { + int input1[c_num_items]; + int output[c_num_items]; + int ref[c_num_items]; + init(input1, 1, 1); + init(output, 51, 1); + init(ref, 2, 1); + + auto out_buffer = buffer(&output[0], num_items); + + myQueue.submit([&](handler &cgh) { + auto output_accessor = out_buffer.get_access(cgh); + + cgh.parallel_for(num_items, [=](cl::sycl::id<1> index) { + output_accessor[index] = input1[index] + 1; + }); + }); + const auto HostAccessor = out_buffer.get_access(); + + return verify_1D("One array", c_num_items, output, ref); +} + +bool test_two_arrays(queue &myQueue) { + int input1[c_num_items]; + int input2[c_num_items]; + int output[c_num_items]; + int ref[c_num_items]; + init(input1, 1, 1); + init(input2, 22, 1); + init(ref, 23, 2); + + auto out_buffer = buffer(&output[0], num_items); + + myQueue.submit([&](handler &cgh) { + auto output_accessor = out_buffer.get_access(cgh); + + cgh.parallel_for(num_items, [=](cl::sycl::id<1> index) { + output_accessor[index] = input1[index] + input2[index]; + }); + }); + const auto HostAccessor = out_buffer.get_access(); + + return verify_1D("Two arrays", c_num_items, output, ref); +} + +bool test_accessor_arrays_1(queue &myQueue) { + std::array input1; + std::array input2; + std::array ref; + init(input1, 1, 1); + init(input2, 22, 1); + init(ref, 24, 1); + + auto in_buffer1 = buffer(input1.data(), num_items); + auto in_buffer2 = buffer(input2.data(), num_items); + + myQueue.submit([&](handler &cgh) { + using Accessor = + accessor; + Accessor a[2] = { + in_buffer1.get_access(cgh), + in_buffer2.get_access(cgh), + }; + + cgh.parallel_for(num_items, [=](cl::sycl::id<1> index) { + a[0][index] = a[1][index] + 2; + }); + }); + const auto HostAccessor = in_buffer1.get_access(); + + return verify_1D>("Accessor arrays 1", c_num_items, input1, ref); +} + +bool test_accessor_arrays_2(queue &myQueue) { + std::array input1; + std::array input2; + std::array output; + std::array ref; + init(input1, 1, 1); + init(input2, 22, 1); + init(ref, 23, 2); + + auto in_buffer1 = buffer(input1.data(), num_items); + auto in_buffer2 = buffer(input2.data(), num_items); + auto out_buffer = buffer(output.data(), num_items); + + myQueue.submit([&](handler &cgh) { + using Accessor = + accessor; + Accessor a[4] = {in_buffer1.get_access(cgh), + in_buffer2.get_access(cgh), + in_buffer1.get_access(cgh), + in_buffer2.get_access(cgh)}; + auto output_accessor = out_buffer.get_access(cgh); + + cgh.parallel_for(num_items, [=](cl::sycl::id<1> index) { + output_accessor[index] = a[0][index] + a[3][index]; + }); + }); + const auto HostAccessor = out_buffer.get_access(); + + return verify_1D>("Accessor arrays 2", c_num_items, output, ref); +} + +bool test_accessor_array_in_struct(queue &myQueue) { + std::array input1; + std::array input2; + std::array output; + std::array ref; + init(input1, 1, 1); + init(input2, 22, 1); + init(ref, 35, 2); + + auto in_buffer1 = buffer(input1.data(), num_items); + auto in_buffer2 = buffer(input2.data(), num_items); + auto out_buffer = buffer(output.data(), num_items); + + myQueue.submit([&](handler &cgh) { + using Accessor = + accessor; + struct S { + int w; + int x; + Accessor a[2]; + int y; + int z; + } S = { + 3, 3, {in_buffer1.get_access(cgh), in_buffer2.get_access(cgh)}, 7, 7}; + auto output_accessor = out_buffer.get_access(cgh); + + cgh.parallel_for(num_items, [=](cl::sycl::id<1> index) { + S.a[0][index]++; + S.a[1][index]++; + output_accessor[index] = S.a[0][index] + S.a[1][index] + S.x + S.y; + }); + }); + const auto HostAccessor = out_buffer.get_access(); + + return verify_1D("Accessor array in struct", c_num_items, output, ref); +} + +bool run_tests() { + queue Q([](exception_list L) { + for (auto ep : L) { + try { + std::rethrow_exception(ep); + } catch (std::exception &E) { + std::cout << "*** std exception caught:\n"; + std::cout << E.what(); + } catch (cl::sycl::exception const &E1) { + std::cout << "*** SYCL exception caught:\n"; + std::cout << E1.what(); + } + } + }); + + passCount = 0; + if (test_one_array(Q)) { + ++passCount; + } + if (test_two_arrays(Q)) { + ++passCount; + } + if (test_accessor_arrays_1(Q)) { + ++passCount; + } + if (test_accessor_arrays_2(Q)) { + ++passCount; + } + if (test_accessor_array_in_struct(Q)) { + ++passCount; + } + + auto D = Q.get_device(); + const char *devType = D.is_host() ? "Host" : D.is_cpu() ? "CPU" : "GPU"; + std::cout << passCount << " of " << testCount << " tests passed on " + << devType << "\n"; + + return (testCount == passCount); +} + +int main(int argc, char *argv[]) { + bool passed = true; + default_selector selector{}; + auto D = selector.select_device(); + const char *devType = D.is_host() ? "Host" : D.is_cpu() ? "CPU" : "GPU"; + std::cout << "Running on device " << devType << " (" + << D.get_info() << ")\n"; + try { + passed &= run_tests(); + } catch (exception e) { + std::cout << e.what(); + } + + if (!passed) { + std::cout << "FAILED\n"; + return 1; + } + std::cout << "PASSED\n"; + return 0; +} From 49071944ac7bf5220c1b50481d6f53cc76baf7df Mon Sep 17 00:00:00 2001 From: rdeodhar Date: Tue, 9 Jun 2020 12:37:39 -0700 Subject: [PATCH 02/14] Reusing some memberexpr building code. --- clang/lib/Sema/SemaSYCL.cpp | 45 +++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 5e5ef9a1ffabc..3495fd2b13a3e 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -1035,6 +1035,23 @@ class SyclKernelDeclCreator return true; } + // Create a new class around a field - used to wrap arrays. + RecordDecl *wrapAnArray(const QualType ArgTy, FieldDecl *Field) { + RecordDecl *NewClass = + SemaRef.getASTContext().buildImplicitRecord("wrapped_array"); + NewClass->startDefinition(); + Field = FieldDecl::Create( + SemaRef.getASTContext(), NewClass, SourceLocation(), SourceLocation(), + /*Id=*/nullptr, ArgTy, + SemaRef.getASTContext().getTrivialTypeSourceInfo(ArgTy, + SourceLocation()), + /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); + Field->setAccess(AS_public); + NewClass->addDecl(Field); + NewClass->completeDefinition(); + return NewClass; + }; + static void setKernelImplicitAttrs(ASTContext &Context, FunctionDecl *FD, StringRef Name) { // Set implicit attributes. @@ -1125,23 +1142,10 @@ class SyclKernelDeclCreator } bool handleArrayType(FieldDecl *FD, QualType FieldTy) final { - // if (!Util::isArrayOfSpecialSyclType(FieldTy)) { if (!cast(FieldTy) ->getElementType() ->isStructureOrClassType()) { - // Wrap the array in a struct. - RecordDecl *NewClass = - SemaRef.getASTContext().buildImplicitRecord("wrapped_array"); - NewClass->startDefinition(); - FieldDecl *Field = FieldDecl::Create( - SemaRef.getASTContext(), NewClass, SourceLocation(), SourceLocation(), - /*Id=*/nullptr, FieldTy, - SemaRef.getASTContext().getTrivialTypeSourceInfo(FieldTy, - SourceLocation()), - /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); - Field->setAccess(AS_public); - NewClass->addDecl(Field); - NewClass->completeDefinition(); + RecordDecl *NewClass = wrapAnArray(FieldTy, FD); QualType ST = SemaRef.getASTContext().getRecordType(NewClass); addParam(FD, ST); } @@ -1290,15 +1294,9 @@ class SyclKernelBodyCreator CXXRecordDecl *WrapperStruct = ParamType->getAsCXXRecordDecl(); // The first and only field of the wrapper struct is the array FieldDecl *Array = *(WrapperStruct->field_begin()); - auto DRE = DeclRefExpr::Create(SemaRef.Context, NestedNameSpecifierLoc(), - SourceLocation(), KernelParameter, false, - DeclarationNameInfo(), ParamType, VK_LValue); - DeclAccessPair ArrayDAP = DeclAccessPair::make(Array, AS_none); - Expr *InitExpr = MemberExpr::Create( - SemaRef.Context, DRE, false, SourceLocation(), NestedNameSpecifierLoc(), - SourceLocation(), Array, ArrayDAP, - DeclarationNameInfo(Array->getDeclName(), SourceLocation()), nullptr, - Array->getType(), VK_LValue, OK_Ordinary, NOUR_None); + Expr *DRE = SemaRef.BuildDeclRefExpr(KernelParameter, ParamType, VK_LValue, + SourceLocation()); + Expr *InitExpr = BuildMemberExpr(DRE, Array); InitializationKind InitKind = InitializationKind::CreateDirect( SourceLocation(), SourceLocation(), SourceLocation()); InitializedEntity Entity = InitializedEntity::InitializeLambdaCapture( @@ -1602,7 +1600,6 @@ class SyclKernelIntHeaderCreator } bool handleArrayType(FieldDecl *FD, QualType FieldTy) final { - // if (!Util::isArrayOfSpecialSyclType(FieldTy)) if (!cast(FieldTy) ->getElementType() ->isStructureOrClassType()) From 52ce3f2dddfeaed583f29669bd968f3453e3a06f Mon Sep 17 00:00:00 2001 From: rdeodhar Date: Fri, 12 Jun 2020 10:41:54 -0700 Subject: [PATCH 03/14] Updated support for arrays. --- clang/lib/Sema/SemaSYCL.cpp | 66 +-- .../CodeGenSYCL/kernel-param-acc-array-ih.cpp | 5 +- .../CodeGenSYCL/kernel-param-acc-array.cpp | 67 +-- clang/test/SemaSYCL/array-kernel-param.cpp | 9 +- sycl/doc/CompilerAndRuntimeDesign.md | 3 +- sycl/doc/KernelParameterPassing.md | 424 ++++++++++++++++++ 6 files changed, 493 insertions(+), 81 deletions(-) create mode 100755 sycl/doc/KernelParameterPassing.md diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index dafa4f2357c1e..78497f1599e05 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -644,6 +644,22 @@ static ParamDesc makeParamDesc(ASTContext &Ctx, const CXXBaseSpecifier &Src, Ctx.getTrivialTypeSourceInfo(Ty)); } +// Create a new class around a field - used to wrap arrays. +static RecordDecl *wrapAnArray(ASTContext &Ctx, const QualType ArgTy, + FieldDecl *&Field) { + RecordDecl *NewClass = Ctx.buildImplicitRecord("wrapped_array"); + NewClass->startDefinition(); + Field = FieldDecl::Create( + Ctx, NewClass, SourceLocation(), SourceLocation(), + /*Id=*/nullptr, ArgTy, + Ctx.getTrivialTypeSourceInfo(ArgTy, SourceLocation()), + /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); + Field->setAccess(AS_public); + NewClass->addDecl(Field); + NewClass->completeDefinition(); + return NewClass; +} + /// \return the target of given SYCL accessor type static target getAccessTarget(const ClassTemplateSpecializationDecl *AccTy) { return static_cast( @@ -1035,23 +1051,6 @@ class SyclKernelDeclCreator return true; } - // Create a new class around a field - used to wrap arrays. - RecordDecl *wrapAnArray(const QualType ArgTy, FieldDecl *Field) { - RecordDecl *NewClass = - SemaRef.getASTContext().buildImplicitRecord("wrapped_array"); - NewClass->startDefinition(); - Field = FieldDecl::Create( - SemaRef.getASTContext(), NewClass, SourceLocation(), SourceLocation(), - /*Id=*/nullptr, ArgTy, - SemaRef.getASTContext().getTrivialTypeSourceInfo(ArgTy, - SourceLocation()), - /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); - Field->setAccess(AS_public); - NewClass->addDecl(Field); - NewClass->completeDefinition(); - return NewClass; - }; - static void setKernelImplicitAttrs(ASTContext &Context, FunctionDecl *FD, StringRef Name) { // Set implicit attributes. @@ -1142,13 +1141,9 @@ class SyclKernelDeclCreator } bool handleArrayType(FieldDecl *FD, QualType FieldTy) final { - if (!cast(FieldTy) - ->getElementType() - ->isStructureOrClassType()) { - RecordDecl *NewClass = wrapAnArray(FieldTy, FD); - QualType ST = SemaRef.getASTContext().getRecordType(NewClass); - addParam(FD, ST); - } + RecordDecl *NewClass = wrapAnArray(SemaRef.getASTContext(), FieldTy, FD); + QualType ST = SemaRef.getASTContext().getRecordType(NewClass); + addParam(FD, ST); return true; } @@ -1295,7 +1290,7 @@ class SyclKernelBodyCreator // The first and only field of the wrapper struct is the array FieldDecl *Array = *(WrapperStruct->field_begin()); Expr *DRE = SemaRef.BuildDeclRefExpr(KernelParameter, ParamType, VK_LValue, - SourceLocation()); + SourceLocation()); Expr *InitExpr = BuildMemberExpr(DRE, Array); InitializationKind InitKind = InitializationKind::CreateDirect( SourceLocation(), SourceLocation(), SourceLocation()); @@ -1307,15 +1302,6 @@ class SyclKernelBodyCreator InitExprs.push_back(MemberInit.get()); } - void createExprForArrayElement(size_t ArrayIndex) { - Expr *ArrayBase = MemberExprBases.back(); - ExprResult IndexExpr = - SemaRef.ActOnIntegerConstant(SourceLocation(), ArrayIndex); - ExprResult ElementBase = SemaRef.CreateBuiltinArraySubscriptExpr( - ArrayBase, SourceLocation(), IndexExpr.get(), SourceLocation()); - MemberExprBases.push_back(ElementBase.get()); - } - void createSpecialMethodCall(const CXXRecordDecl *SpecialClass, Expr *Base, const std::string &MethodName, FieldDecl *Field) { @@ -1455,11 +1441,7 @@ class SyclKernelBodyCreator } bool handleArrayType(FieldDecl *FD, QualType FieldTy) final { - if (!cast(FieldTy) - ->getElementType() - ->isStructureOrClassType()) { - createExprForArray(FD); - } + createExprForArray(FD); return true; } @@ -1600,10 +1582,8 @@ class SyclKernelIntHeaderCreator } bool handleArrayType(FieldDecl *FD, QualType FieldTy) final { - if (!cast(FieldTy) - ->getElementType() - ->isStructureOrClassType()) - addParam(FD, FieldTy, SYCLIntegrationHeader::kind_std_layout); + wrapAnArray(SemaRef.getASTContext(), FieldTy, FD); + addParam(FD, FD->getType(), SYCLIntegrationHeader::kind_std_layout); return true; } diff --git a/clang/test/CodeGenSYCL/kernel-param-acc-array-ih.cpp b/clang/test/CodeGenSYCL/kernel-param-acc-array-ih.cpp index fbaffee9f9b92..0f2b0b32d315c 100755 --- a/clang/test/CodeGenSYCL/kernel-param-acc-array-ih.cpp +++ b/clang/test/CodeGenSYCL/kernel-param-acc-array-ih.cpp @@ -1,4 +1,4 @@ -// RUN: %clang -I %S/Inputs -fsycl-device-only -Xclang -fsycl-int-header=%t.h %s -c -o %T/kernel.spv +// RUN: %clang -I %S/Inputs -fsycl-device-only -Xclang -fsycl-int-header=/iusers/rdeodhar/work/dpcc/jira/7004/t.h %s -c -o %T/kernel.spv // RUN: FileCheck -input-file=%t.h %s // This test checks the integration header generated when @@ -19,7 +19,8 @@ // CHECK: static constexpr // CHECK-NEXT: const kernel_param_desc_t kernel_signatures[] = { -// CHECK-NEXT: //--- _ZTSZ4mainE8kernel_A +// CHECK-NEXT: //--- _ZTSZ4mainE8kernel_A +// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 24, 0 }, // CHECK-NEXT: { kernel_param_kind_t::kind_accessor, 4062, 0 }, // CHECK-NEXT: { kernel_param_kind_t::kind_accessor, 4062, 12 }, // CHECK-EMPTY: diff --git a/clang/test/CodeGenSYCL/kernel-param-acc-array.cpp b/clang/test/CodeGenSYCL/kernel-param-acc-array.cpp index 09d1f48f86907..2306599393b7a 100644 --- a/clang/test/CodeGenSYCL/kernel-param-acc-array.cpp +++ b/clang/test/CodeGenSYCL/kernel-param-acc-array.cpp @@ -25,23 +25,24 @@ int main() { // Check kernel_A parameters // CHECK: define spir_kernel void @{{.*}}kernel_A -// CHECK-SAME: i32 addrspace(1)* [[MEM_ARG1:%[a-zA-Z0-9_]+]], -// CHECK-SAME: %"struct.{{.*}}.cl::sycl::range"* byval{{.*}}align 4 [[ACC_RANGE1:%[a-zA-Z0-9_]+_1]], -// CHECK-SAME: %"struct.{{.*}}.cl::sycl::range"* byval{{.*}}align 4 [[MEM_RANGE1:%[a-zA-Z0-9_]+_2]], -// CHECK-SAME: %"struct.{{.*}}.cl::sycl::id"* byval{{.*}}align 4 [[OFFSET1:%[a-zA-Z0-9_]+_3]], -// CHECK-SAME: i32 addrspace(1)* [[MEM_ARG2:%[a-zA-Z0-9_]+_4]], -// CHECK-SAME: %"struct.{{.*}}.cl::sycl::range"* byval{{.*}}align 4 [[ACC_RANGE2:%[a-zA-Z0-9_]+_6]], -// CHECK-SAME: %"struct.{{.*}}.cl::sycl::range"* byval{{.*}}align 4 [[MEM_RANGE2:%[a-zA-Z0-9_]+_7]], -// CHECK-SAME: %"struct.{{.*}}.cl::sycl::id"* byval{{.*}}align 4 [[OFFSET2:%[a-zA-Z0-9_]+_8]]) - -// Check alloca for pointer arguments -// CHECK: [[MEM_ARG1]].addr{{[0-9]*}} = alloca i32 addrspace(1)*, align 8 -// CHECK: [[MEM_ARG1]].addr{{[0-9]*}} = alloca i32 addrspace(1)*, align 8 - -// Check lambda object alloca +// CHECK-SAME: %struct.{{.*}}.wrapped_array* byval{{.*}}align 4 %_arg_, +// CHECK-SAME: i32 addrspace(1)* [[MEM_ARG1:%[a-zA-Z0-9_]+_1]], +// CHECK-SAME: %"struct.{{.*}}.cl::sycl::range"* byval{{.*}}align 4 [[ACC_RANGE1:%[a-zA-Z0-9_]+_2]], +// CHECK-SAME: %"struct.{{.*}}.cl::sycl::range"* byval{{.*}}align 4 [[MEM_RANGE1:%[a-zA-Z0-9_]+_3]], +// CHECK-SAME: %"struct.{{.*}}.cl::sycl::id"* byval{{.*}}align 4 [[OFFSET1:%[a-zA-Z0-9_]+_4]], +// CHECK-SAME: i32 addrspace(1)* [[MEM_ARG2:%[a-zA-Z0-9_]+_5]], +// CHECK-SAME: %"struct.{{.*}}.cl::sycl::range"* byval{{.*}}align 4 [[ACC_RANGE2:%[a-zA-Z0-9_]+_7]], +// CHECK-SAME: %"struct.{{.*}}.cl::sycl::range"* byval{{.*}}align 4 [[MEM_RANGE2:%[a-zA-Z0-9_]+_8]], +// CHECK-SAME: %"struct.{{.*}}.cl::sycl::id"* byval{{.*}}align 4 [[OFFSET2:%[a-zA-Z0-9_]+_9]]) + +// CHECK alloca for pointer arguments +// CHECK: [[MEM_ARG1:%[a-zA-Z0-9_.]+]] = alloca i32 addrspace(1)*, align 8 +// CHECK: [[MEM_ARG2:%[a-zA-Z0-9_.]+]] = alloca i32 addrspace(1)*, align 8 + +// CHECK lambda object alloca // CHECK: [[LOCAL_OBJECT:%0]] = alloca %"class.{{.*}}.anon", align 4 -// Check allocas for ranges +// CHECK allocas for ranges // CHECK: [[ACC_RANGE1:%[a-zA-Z0-9_.]+]] = alloca %"struct.{{.*}}.cl::sycl::range" // CHECK: [[MEM_RANGE1:%[a-zA-Z0-9_.]+]] = alloca %"struct.{{.*}}.cl::sycl::range" // CHECK: [[OFFSET1:%[a-zA-Z0-9_.]+]] = alloca %"struct.{{.*}}.cl::sycl::id" @@ -49,26 +50,30 @@ int main() { // CHECK: [[MEM_RANGE2:%[a-zA-Z0-9_.]+]] = alloca %"struct.{{.*}}.cl::sycl::range" // CHECK: [[OFFSET2:%[a-zA-Z0-9_.]+]] = alloca %"struct.{{.*}}.cl::sycl::id" -// Check accessor array GEP for acc[0] -// CHECK: [[ACCESSOR_ARRAY1:%[a-zA-Z0-9_]+]] = getelementptr inbounds %"class.{{.*}}.anon", %"class.{{.*}}.anon"* [[LOCAL_OBJECT]], i32 0, i32 0 -// CHECK: [[Z0:%[a-zA-Z0-9_]*]] = getelementptr inbounds{{.*}}[[ACCESSOR_ARRAY1]], i64 0, i64 0 +// Check array initialization +// CHECK: arrayinit.body: +// CHECK: arrayinit.end: -// Check load from kernel pointer argument alloca -// CHECK: [[MEM_LOAD1:%[a-zA-Z0-9_]+]] = load i32 addrspace(1)*, i32 addrspace(1)** [[MEM_ARG1]].addr{{[0-9]*}} +// CHECK accessor array GEP for acc[0] +// CHECK: [[ACCESSOR_ARRAY1:%[a-zA-Z0-9_]+]] = getelementptr inbounds %"class.{{.*}}.anon", %"class.{{.*}}.anon"* [[LOCAL_OBJECT]], i32 0, i32 0 +// CHECK: [[Z0:%[a-zA-Z0-9_]*]] = getelementptr inbounds{{.*}}[[ACCESSOR_ARRAY1]], i64 0, i64 0 -// Check acc[0] __init method call -// CHECK: [[ACC_CAST1:%[0-9]+]] = addrspacecast %"class{{.*}}accessor"* [[Z0]] to %"class{{.*}}accessor" addrspace(4)* +// CHECK load from kernel pointer argument alloca +// CHECK: [[MEM_LOAD1:%[a-zA-Z0-9_]+]] = load i32 addrspace(1)*, i32 addrspace(1)** [[MEM_ARG1]] -// CHECK: call spir_func void @{{.*}}__init{{.*}}(%"class.{{.*}}.cl::sycl::accessor" addrspace(4)* [[ACC_CAST1]], i32 addrspace(1)* [[MEM_LOAD1]], %"struct.{{.*}}.cl::sycl::range"* byval({{.*}}) align 4 [[ACC_RANGE1]], %"struct.{{.*}}.cl::sycl::range"* byval({{.*}}) align 4 [[MEM_RANGE1]], %"struct.{{.*}}.cl::sycl::id"* byval({{.*}}) align 4 [[OFFSET1]]) +// CHECK acc[0] __init method call +// CHECK: [[ACC_CAST1:%[0-9]+]] = addrspacecast %"class{{.*}}accessor"* [[Z0]] to %"class{{.*}}accessor" addrspace(4)* -// Check accessor array GEP for acc[1] -// CHECK: [[ACCESSOR_ARRAY2:%[a-zA-Z0-9_]+]] = getelementptr inbounds %"class.{{.*}}.anon", %"class.{{.*}}.anon"* [[LOCAL_OBJECT]], i32 0, i32 0 -// CHECK: [[Z1:%[a-zA-Z0-9_]*]] = getelementptr inbounds{{.*}}[[ACCESSOR_ARRAY2]], i64 0, i64 1 +// CHECK: call spir_func void @{{.*}}__init{{.*}}(%"class.{{.*}}.cl::sycl::accessor" addrspace(4)* [[ACC_CAST1]], i32 addrspace(1)* [[MEM_LOAD1]], %"struct.{{.*}}.cl::sycl::range"* byval({{.*}}) align 4 [[ACC_RANGE1]], %"struct.{{.*}}.cl::sycl::range"* byval({{.*}}) align 4 [[MEM_RANGE1]], %"struct.{{.*}}.cl::sycl::id"* byval({{.*}}) align 4 [[OFFSET1]]) -// Check load from kernel pointer argument alloca -// CHECK: [[MEM_LOAD2:%[a-zA-Z0-9_]+]] = load i32 addrspace(1)*, i32 addrspace(1)** [[MEM_ARG1]].addr{{[0-9]*}} +// CHECK accessor array GEP for acc[1] +// CHECK: [[ACCESSOR_ARRAY2:%[a-zA-Z0-9_]+]] = getelementptr inbounds %"class.{{.*}}.anon", %"class.{{.*}}.anon"* [[LOCAL_OBJECT]], i32 0, i32 0 +// CHECK: [[Z1:%[a-zA-Z0-9_]*]] = getelementptr inbounds{{.*}}[[ACCESSOR_ARRAY2]], i64 0, i64 1 -// Check acc[1] __init method call -// CHECK: [[ACC_CAST2:%[0-9]+]] = addrspacecast %"class{{.*}}accessor"* [[Z1]] to %"class{{.*}}accessor" addrspace(4)* +// CHECK load from kernel pointer argument alloca +// CHECK: [[MEM_LOAD2:%[a-zA-Z0-9_]+]] = load i32 addrspace(1)*, i32 addrspace(1)** [[MEM_ARG2]] -// CHECK: call spir_func void @{{.*}}__init{{.*}}(%"class.{{.*}}.cl::sycl::accessor" addrspace(4)* [[ACC_CAST2]], i32 addrspace(1)* [[MEM_LOAD2]], %"struct.{{.*}}.cl::sycl::range"* byval({{.*}}) align 4 [[ACC_RANGE2]], %"struct.{{.*}}.cl::sycl::range"* byval({{.*}}) align 4 [[MEM_RANGE2]], %"struct.{{.*}}.cl::sycl::id"* byval({{.*}}) align 4 [[OFFSET2]]) +// CHECK acc[1] __init method call +// CHECK: [[ACC_CAST2:%[0-9]+]] = addrspacecast %"class{{.*}}accessor"* [[Z1]] to %"class{{.*}}accessor" addrspace(4)* + +// CHECK: call spir_func void @{{.*}}__init{{.*}}(%"class.{{.*}}.cl::sycl::accessor" addrspace(4)* [[ACC_CAST2]], i32 addrspace(1)* [[MEM_LOAD2]], %"struct.{{.*}}.cl::sycl::range"* byval({{.*}}) align 4 [[ACC_RANGE2]], %"struct.{{.*}}.cl::sycl::range"* byval({{.*}}) align 4 [[MEM_RANGE2]], %"struct.{{.*}}.cl::sycl::id"* byval({{.*}}) align 4 [[OFFSET2]]) diff --git a/clang/test/SemaSYCL/array-kernel-param.cpp b/clang/test/SemaSYCL/array-kernel-param.cpp index a7157f479b997..d94c76600bb13 100755 --- a/clang/test/SemaSYCL/array-kernel-param.cpp +++ b/clang/test/SemaSYCL/array-kernel-param.cpp @@ -40,7 +40,8 @@ int main() { } // Check kernel_A parameters -// CHECK: FunctionDecl {{.*}}kernel_A{{.*}} 'void (__global int *, cl::sycl::range<1>, cl::sycl::range<1>, cl::sycl::id<1>, __global int *, cl::sycl::range<1>, cl::sycl::range<1>, cl::sycl::id<1>)' +// CHECK: FunctionDecl {{.*}}kernel_A{{.*}} 'void (wrapped_array, __global int *, cl::sycl::range<1>, cl::sycl::range<1>, cl::sycl::id<1>, __global int *, cl::sycl::range<1>, cl::sycl::range<1>, cl::sycl::id<1>)' +// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_ 'wrapped_array' // CHECK-NEXT: ParmVarDecl {{.*}} used _arg_ '__global int *' // CHECK-NEXT: ParmVarDecl {{.*}} used _arg_ 'cl::sycl::range<1>' // CHECK-NEXT: ParmVarDecl {{.*}} used _arg_ 'cl::sycl::range<1>' @@ -54,7 +55,7 @@ int main() { // CHECK: CXXMemberCallExpr {{.*}} 'void' // CHECK-NEXT: MemberExpr {{.*}}__init -// Check kernel_B parameters +// CHECK kernel_B parameters // CHECK: FunctionDecl {{.*}}kernel_B{{.*}} 'void (wrapped_array)' // CHECK-NEXT: ParmVarDecl {{.*}} 'wrapped_array' // CHECK-NEXT: CompoundStmt @@ -63,7 +64,7 @@ int main() { // CHECK-NEXT: InitListExpr // CHECK-NEXT: ArrayInitLoopExpr {{.*}} 'int [100]' -// Check kernel_C parameters +// CHECK kernel_C parameters // CHECK: FunctionDecl {{.*}}kernel_C{{.*}} 'void (struct {{.*}}, __global int *, cl::sycl::range<1>, cl::sycl::range<1>, cl::sycl::id<1>, __global int *, cl::sycl::range<1>, cl::sycl::range<1>, cl::sycl::id<1>, __global int *, cl::sycl::range<1>, cl::sycl::range<1>, cl::sycl::id<1>, __global int *, cl::sycl::range<1>, cl::sycl::range<1>, cl::sycl::id<1>)' // CHECK-NEXT: ParmVarDecl {{.*}} 'struct {{.*}}' // CHECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc '__global int *' @@ -83,7 +84,7 @@ int main() { // CHECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'cl::sycl::range<1>' // CHECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'cl::sycl::id<1>' -// Check that four accessor init functions are called +// CHECK that four accessor init functions are called // CHECK: CXXMemberCallExpr {{.*}} 'void' // CHECK-NEXT: MemberExpr {{.*}}__init // CHECK: CXXMemberCallExpr {{.*}} 'void' diff --git a/sycl/doc/CompilerAndRuntimeDesign.md b/sycl/doc/CompilerAndRuntimeDesign.md index dbc73ca657094..d503687fcd153 100644 --- a/sycl/doc/CompilerAndRuntimeDesign.md +++ b/sycl/doc/CompilerAndRuntimeDesign.md @@ -168,7 +168,8 @@ __kernel KernelName(global int* a) { ``` OpenCL kernel function is generated by the compiler inside the Sema using AST -nodes. +nodes. Additional details of kernel parameter passing may be found in the document +[SYCL Kernel Parameter Handling and Array Support](KernelParameterPassing.md) . ### SYCL support in the driver diff --git a/sycl/doc/KernelParameterPassing.md b/sycl/doc/KernelParameterPassing.md new file mode 100755 index 0000000000000..a6f31ec114482 --- /dev/null +++ b/sycl/doc/KernelParameterPassing.md @@ -0,0 +1,424 @@ +

SYCL Kernel Parameter Handling and Array Support

+ +

Introduction

+ +This document describes how parameters of SYCL kernels are passed +from host to device. Support for arrays as kernel parameters was added +later and aspects of that design are covered in more detail. +The special treatment of arrays of `sycl::accessor` objects is also discussed. +Array support covers these cases: + +1. arrays of standard-layout type +2. arrays of accessors +3. arrays of structs that contain accessor arrays or accessor fields + +The motivation for allowing arrays as kernel parameters is to +bring consistency to the treatment of arrays. +In C++ a lambda function is allowed to access an element of an array +defined outside the lambda. The compiler captures the entire array +by value. Note that this behavior is limited to implicit +capture of the array by value. If the array name were in +the capture list then the base address of the array would be captured +and not the entire array. + +A user would expect the same mode of array capture in a SYCL kernel +lambda object as in any other lambda object. + +The first few sections describe the overall design. +The last three sections provide additional details of array support. +The implementation of this design is confined to four classes in the +file `SemaSYCL.cpp`. +

A SYCL Kernel

+ +The SYCL constructs `single_task`, `parallel_for`, and +`parallel_for_work_group` each take a function object or a lambda function + as one of their arguments. The code within the function object or +lambda function is executed on the device. +To enable execution of the kernel on OpenCL devices, the lambda/function object +is converted into the format of an OpenCL kernel. + +

SYCL Kernel Code Generation

+ +Consider a source code example that captures an int, a struct and an accessor +by value: + +```C++ +constexpr size_t c_num_items = 10; +range<1> num_items{c_num_items}; // range<1>(num_items) + +int main() +{ + int output[c_num_items]; + queue myQueue; + + int i = 55; + struct S { + int m; + } s = { 66 }; + auto outBuf = buffer(&output[0], num_items); + + myQueue.submit([&](handler &cgh) { + auto outAcc = outBuf.get_access(cgh); + cgh.parallel_for(num_items, [=](cl::sycl::id<1> index) { + outAcc[index] = i + s.m; + }); + }); + + return 0; +} +``` + +The input to the code generation routines is a function object that represents +the kernel. In pseudo-code: + +```C++ +struct Capture { + sycl::accessor outAcc; + int i; + struct S s; + () { + outAcc[index] = i + s.m; + } +} +``` + +The compiler-generated code for a call to such a lambda function would look like this: +```C++ +()(struct Capture* this); +``` + +When offloading the kernel to a device, the lambda/function object's +function operator cannot be directly called with a capture object address. +Instead, the code generated for the device is in the form of a +�kernel caller� and a �kernel callee�. +The callee is a clone of the SYCL kernel object. +The caller is generated in the form of an OpenCL kernel function. +It receives the lambda capture object in pieces, assembles the pieces +into the original lambda capture object and then calls the callee: + +```C++ +spir_kernel void caller( + __global int* AccData, // arg1 of accessor init function + range<1> AccR1, // arg2 of accessor init function + range<1> AccR2, // arg3 of accessor init function + id<1> I, // arg4 of accessor init function + int i, + struct S s +) +{ + // Local capture object + struct Capture local; + + // Reassemble capture object from parts + local.i = i; + local.s = s; + // Call accessor�s init function + sycl::accessor::init(&local.outAcc, AccData, AccR1, AccR2, I); + + // Call the kernel body + callee(&local, id<1> wi); +} + +spir_func void callee(struct Capture* this, id<1> wi) +{ +} +``` + +As may be observed from the example above, standard-layout lambda capture +components are passed by value to the device as separate parameters. +This includes scalars, pointers, and standard-layout structs. +Certain object types defined by the SYCL standard, such as +`sycl::accessor` and `sycl::sampler` although standard-layout, cannot be +simply copied from host to device. Their layout on the device may be different +from that on the host. Some host fields may be absent on the device, +other host fields replaced with device-specific fields and +the host data pointer field must be translated to an OpenCL +or L0 memory object before it can be passed as a kernel parameter. +To enable all of this, the parameters of the `sycl::accessor` +and `sycl::sampler` init functions are transfered from +host to device separately. The values received on the device +are passed to the `init` functions executed on the device, +which results in the reassembly of the SYCL object in a form usable on the device. + +There is one other aspect of code generation. An �integration header� +is generated for use during host compilation. +This header file contains entries for each kernel. +Among the items it defines is a table of sizes and offsets of the +kernel parameters. +For the source example above the integration header contains the +following snippet: + +```C++ +// array representing signatures of all kernels defined in the +// corresponding source +static constexpr +const kernel_param_desc_t kernel_signatures[] = { + //--- _ZTSZZ4mainENKUlRN2cl4sycl7handlerEE19->18clES2_E6Worker + { kernel_param_kind_t::kind_accessor, 4062, 0 }, + { kernel_param_kind_t::kind_std_layout, 4, 32 }, + { kernel_param_kind_t::kind_std_layout, 4, 36 }, +}; +``` + +Each entry in the kernel_signatures table is a `kernel_param_desc_t` +object which contains three values: +1) an encoding of the type of capture object member +2) a field that encodes additional properties, and +3) an offset within the lambda object where the value of that kernel argument is placed + +The previous sections described how kernel arguments are handled today. +The next three sections describe support for arrays. + +

Fix 1: Kernel Arguments that are Standard-Layout Arrays

+ +As described earlier, each variable captured by a lambda that comprises a +SYCL kernel becomes a parameter of the kernel caller function. +For arrays, simply allowing them through would result in a +function parameter of array type. This is not supported in C++. +Therefore, the array needing capture is wrapped in a struct for +the purposes of passing to the device. Once received on the device +within its wrapper, the array is copied into the local capture object. +All references to the array within the kernel body are directed to +the non-wrapped array which is a member of the local capture object. + +

Source code fragment:

+ +```C++ + int array[100]; + auto outBuf = buffer(&output[0], num_items); + + myQueue.submit([&](handler &cgh) { + auto outAcc = outBuf.get_access(cgh); + cgh.parallel_for(num_items, [=](cl::sycl::id<1> index) { + outAcc[index] = array[index.get(0)]; + }); + }); +``` + +

Integration header produced:

+ +```C++ +static constexpr +const kernel_param_desc_t kernel_signatures[] = { + //--- _ZTSZZ4mainENKUlRN2cl4sycl7handlerEE16->18clES2_E6Worker + { kernel_param_kind_t::kind_accessor, 4062, 0 }, + { kernel_param_kind_t::kind_std_layout, 400, 32 }, +}; +``` + +

The changes to device code made to support this extension, in pseudo-code:

+ +```C++ +struct Capture { + sycl::accessor outAcc; + int array[100]; + () { + // Body + } +} + +struct wrapper { + int array[100]; +}; +spir_kernel void caller( + __global int* AccData, // arg1 of accessor init function + range<1> AccR1, // arg2 of accessor init function + range<1> AccR2, // arg3 of accessor init function + id<1> I, // arg4 of accessor init function + struct wrapper w_s // Pass the array wrapped in a struct +) +{ + // Local capture object + struct Capture local; + + // Reassemble capture object from parts + // Initialize array using existing clang Initialization mechanisms + local.array = w_s; + // Call accessor�s init function + sycl::accessor::init(&local.outAcc, AccData, AccR1, AccR2, I); + + callee(&local, id<1> wi); +} +``` + +

Fix 2: Kernel Arguments that are Arrays of Accessors

+ +Arrays of accessors are supported in a manner similar to that of a plain +accessor. For each accessor array element, the four values required to +call its init function are passed as separate arguments to the kernel. +Reassembly within the kernel caller is done by calling the `init` functions +of each accessor array element in ascending index value. + +

Source code fragment:

+ +```C++ + myQueue.submit([&](handler &cgh) { + using Accessor = + accessor; + Accessor inAcc[2] = {in_buffer1.get_access(cgh), + in_buffer2.get_access(cgh)}; + auto outAcc = out_buffer.get_access(cgh); + + cgh.parallel_for(num_items, [=](cl::sycl::id<1> index) { + outAcc[index] = inAcc[0][index] + inAcc[1][index]; + }); + }); +``` + +

Integration header:

+ +```C++ +static constexpr +const kernel_param_desc_t kernel_signatures[] = { + //--- _ZTSZZ4mainENKUlRN2cl4sycl7handlerEE20->18clES2_E6Worker + { kernel_param_kind_t::kind_accessor, 4062, 0 }, + { kernel_param_kind_t::kind_accessor, 4062, 32 }, + { kernel_param_kind_t::kind_accessor, 4062, 64 }, +}; +``` + +

Device code generated in pseudo-code form:

+ +```C++ +struct Capture { + sycl::accessor outAcc; + sycl::accessor inAcc[2]; + () { + // Body + } +} + +spir_kernel void caller( + __global int* outAccData, // args of OutAcc + range<1> outAccR1, + range<1> outAccR2, + id<1> outI, + __global int* inAccData_0, // args of inAcc[0] + range<1> inAccR1_0, + range<1> inAccR2_0, + id<1> inI_0, + __global int* inAccData_1, // args of inAcc[1] + range<1> inAccR1_1, + range<1> inAccR2_1, + id<1> inI_1, +) +{ + // Local capture object + struct Capture local; + + // Reassemble capture object from parts + // Call outAcc accessor�s init function + sycl::accessor::init(&local.outAcc, outAccData, outAccR1, outAccR2, outI); + + // Call inAcc[0] accessor�s init function + sycl::accessor::init(&local.inAcc[0], inAccData_0, inAccR1_0, inAccR2_0, inI_0); + + // Call inAcc[1] accessor�s init function + sycl::accessor::init(&local.inAcc[1], inAccData_1, inAccR1_1, inAccR2_1, inI_1); + + callee(&local, id<1> wi); +} +``` + +

Fix 3: Accessor Arrays within Structs

+ +Kernel parameters that are structs are traversed member +by member, recursively, to enumerate member structs that are one of +the SYCL special types: `sycl::accessor` and `sycl::sampler`. +The arguments of the `init` functions of each special struct encountered +in the traversal are added as separate arguments to the kernel. +Support for arrays containing SYCL special types +builds upon the support for single accessors within structs. +Each element of such arrays is treated as +an individual object, and the arguments of its init function +are added to the kernel arguments in sequence. +Within the kernel caller function, the lambda object is reassembled +in a manner similar to other instances of accessor arrays. + + +

Source code fragment:

+ +```C++ + myQueue.submit([&](handler &cgh) { + using Accessor = + accessor; + struct S { + int m; + sycl::accessor inAcc[2]; + } s = { 55, + {in_buffer1.get_access(cgh), + in_buffer2.get_access(cgh)} + }; + auto outAcc = out_buffer.get_access(cgh); + + cgh.parallel_for(num_items, [=](cl::sycl::id<1> index) { + outAcc[index] = s.m + s.inAcc[0][index] + s.inAcc[1][index]; + }); +}); +``` + +

Integration header:

+ +```C++ +static constexpr +const kernel_param_desc_t kernel_signatures[] = { + //--- _ZTSZZ4mainENKUlRN2cl4sycl7handlerEE20->18clES2_E6Worker + { kernel_param_kind_t::kind_accessor, 4062, 0 }, + { kernel_param_kind_t::kind_std_layout, 72, 32 }, + { kernel_param_kind_t::kind_accessor, 4062, 40 }, + { kernel_param_kind_t::kind_accessor, 4062, 72 }, + +}; +``` + +

Device code generated in pseudo-code form:

+ +```C++ +struct Capture { + sycl::accessor outAcc; + struct S s; + () { + // Body + } +} + +spir_kernel void caller( + __global int* outAccData, // args of OutAcc + range<1> outAccR1, + range<1> outAccR2, + id<1> outI, + struct S s, // the struct S + __global int* inAccData_0, // args of s.inAcc[0] + range<1> inAccR1_0, + range<1> inAccR2_0, + id<1> inI_0, + __global int* inAccData_1, // args of s.inAcc[1] + range<1> inAccR1_1, + range<1> inAccR2_1, + id<1> inI_1, +) +{ + // Local capture object + struct Capture local; + + // Reassemble capture object from parts + + // 1. Copy struct argument contents to local copy + local.s = s; + + // 2. Initialize accessors by calling init functions + // 2a. Call outAcc accessor�s init function + sycl::accessor::init( + &local.outAcc, outAccData, outAccR1, outAccR2, outI); + + // 2b. Call s.inAcc[0] accessor�s init function + sycl::accessor::init( + &local.s.inAcc[0], inAccData_0, inAccR1_0, inAccR2_0, inI_0); + + // 2c. Call s.inAcc[1] accessor�s init function + sycl::accessor::init( + &local.s.inAcc[1], inAccData_1, inAccR1_1, inAccR2_1, inI_1); + + callee(&local, id<1> wi); +} +``` From 1bf090337d01834a045dd7d42ace24b6e6f58541 Mon Sep 17 00:00:00 2001 From: rdeodhar Date: Fri, 12 Jun 2020 11:30:54 -0700 Subject: [PATCH 04/14] Formatting changes. --- clang/test/CodeGenSYCL/kernel-param-acc-array-ih.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/clang/test/CodeGenSYCL/kernel-param-acc-array-ih.cpp b/clang/test/CodeGenSYCL/kernel-param-acc-array-ih.cpp index 0f2b0b32d315c..b7384ae0af9f9 100755 --- a/clang/test/CodeGenSYCL/kernel-param-acc-array-ih.cpp +++ b/clang/test/CodeGenSYCL/kernel-param-acc-array-ih.cpp @@ -1,5 +1,6 @@ -// RUN: %clang -I %S/Inputs -fsycl-device-only -Xclang -fsycl-int-header=/iusers/rdeodhar/work/dpcc/jira/7004/t.h %s -c -o %T/kernel.spv -// RUN: FileCheck -input-file=%t.h %s +// RUN: %clang -I %S/Inputs -fsycl-device-only -Xclang +// -fsycl-int-header=/iusers/rdeodhar/work/dpcc/jira/7004/t.h %s -c -o +// %T/kernel.spv RUN: FileCheck -input-file=%t.h %s // This test checks the integration header generated when // the kernel argument is an Accessor array. @@ -49,8 +50,5 @@ int main() { Accessor acc[2]; - a_kernel( - [=]() { - acc[1].use(); - }); + a_kernel([=]() { acc[1].use(); }); } From 5d5121b42794ee1f45bbd2fa34313df147437c9a Mon Sep 17 00:00:00 2001 From: rdeodhar Date: Fri, 12 Jun 2020 16:06:09 -0700 Subject: [PATCH 05/14] Formatting changes. --- clang/test/CodeGenSYCL/kernel-param-acc-array-ih.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/clang/test/CodeGenSYCL/kernel-param-acc-array-ih.cpp b/clang/test/CodeGenSYCL/kernel-param-acc-array-ih.cpp index b7384ae0af9f9..f977534d260fd 100755 --- a/clang/test/CodeGenSYCL/kernel-param-acc-array-ih.cpp +++ b/clang/test/CodeGenSYCL/kernel-param-acc-array-ih.cpp @@ -1,6 +1,5 @@ -// RUN: %clang -I %S/Inputs -fsycl-device-only -Xclang -// -fsycl-int-header=/iusers/rdeodhar/work/dpcc/jira/7004/t.h %s -c -o -// %T/kernel.spv RUN: FileCheck -input-file=%t.h %s +// RUN: %clang -I %S/Inputs -fsycl-device-only -Xclang -fsycl-int-header=/iusers/rdeodhar/work/dpcc/jira/7004/t.h %s -c -o %T/kernel.spv +// RUN: FileCheck -input-file=%t.h %s // This test checks the integration header generated when // the kernel argument is an Accessor array. From f03edd9f30db18912ea8ca8df5ebe766fb0b5f56 Mon Sep 17 00:00:00 2001 From: rdeodhar Date: Mon, 15 Jun 2020 09:00:11 -0700 Subject: [PATCH 06/14] Correction to a test. --- clang/test/CodeGenSYCL/kernel-param-acc-array-ih.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/CodeGenSYCL/kernel-param-acc-array-ih.cpp b/clang/test/CodeGenSYCL/kernel-param-acc-array-ih.cpp index f977534d260fd..1020fbb4c12e2 100755 --- a/clang/test/CodeGenSYCL/kernel-param-acc-array-ih.cpp +++ b/clang/test/CodeGenSYCL/kernel-param-acc-array-ih.cpp @@ -1,4 +1,4 @@ -// RUN: %clang -I %S/Inputs -fsycl-device-only -Xclang -fsycl-int-header=/iusers/rdeodhar/work/dpcc/jira/7004/t.h %s -c -o %T/kernel.spv +// RUN: %clang -I %S/Inputs -fsycl-device-only -Xclang -fsycl-int-header=%t.h %s -c -o %T/kernel.spv // RUN: FileCheck -input-file=%t.h %s // This test checks the integration header generated when From 0412db38150147199a6090a307678be53bb399ed Mon Sep 17 00:00:00 2001 From: rdeodhar Date: Wed, 24 Jun 2020 21:10:27 -0700 Subject: [PATCH 07/14] Array elements are now passed as individual parameters. --- clang/lib/Sema/SemaSYCL.cpp | 147 +++++++++++++++++++----------------- 1 file changed, 76 insertions(+), 71 deletions(-) diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index d0a016a39b23f..6cc14e3df4835 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -688,22 +688,6 @@ static ParamDesc makeParamDesc(ASTContext &Ctx, const CXXBaseSpecifier &Src, Ctx.getTrivialTypeSourceInfo(Ty)); } -// Create a new class around a field - used to wrap arrays. -static RecordDecl *wrapAnArray(ASTContext &Ctx, const QualType ArgTy, - FieldDecl *&Field) { - RecordDecl *NewClass = Ctx.buildImplicitRecord("wrapped_array"); - NewClass->startDefinition(); - Field = FieldDecl::Create( - Ctx, NewClass, SourceLocation(), SourceLocation(), - /*Id=*/nullptr, ArgTy, - Ctx.getTrivialTypeSourceInfo(ArgTy, SourceLocation()), - /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit); - Field->setAccess(AS_public); - NewClass->addDecl(Field); - NewClass->completeDefinition(); - return NewClass; -} - /// \return the target of given SYCL accessor type static target getAccessTarget(const ClassTemplateSpecializationDecl *AccTy) { return static_cast( @@ -799,15 +783,21 @@ static void VisitField(CXXRecordDecl *Owner, RangeTy &&Item, QualType ItemTy, Handlers &... handlers) { if (Util::isSyclAccessorType(ItemTy)) KF_FOR_EACH(handleSyclAccessorType, Item, ItemTy); - if (Util::isSyclStreamType(ItemTy)) + else if (Util::isSyclStreamType(ItemTy)) KF_FOR_EACH(handleSyclStreamType, Item, ItemTy); - if (ItemTy->isStructureOrClassType()) + else if (ItemTy->isStructureOrClassType()) VisitAccessorWrapper(Owner, Item, ItemTy->getAsCXXRecordDecl(), handlers...); - if (ItemTy->isArrayType()) + else if (ItemTy->isArrayType()) VisitArrayElements(Item, ItemTy, handlers...); } +template +static void VisitScalarField(CXXRecordDecl *Owner, RangeTy &&Item, QualType ItemTy, + Handlers &... handlers) { + KF_FOR_EACH(handleScalarType, Item, ItemTy); +} + template static void VisitArrayElements(RangeTy Item, QualType FieldTy, Handlers &... handlers) { @@ -816,7 +806,10 @@ static void VisitArrayElements(RangeTy Item, QualType FieldTy, int64_t ElemCount = CAT->getSize().getSExtValue(); std::initializer_list{(handlers.enterArray(), 0)...}; for (int64_t Count = 0; Count < ElemCount; Count++) { - VisitField(nullptr, Item, ET, handlers...); + if (ET->isScalarType()) + VisitScalarField(nullptr, Item, ET, handlers...); + else + VisitField(nullptr, Item, ET, handlers...); (void)std::initializer_list{(handlers.nextElement(ET), 0)...}; } (void)std::initializer_list{(handlers.leaveArray(ET, ElemCount), 0)...}; @@ -919,6 +912,9 @@ template class SyclKernelFieldHandler { virtual bool handleReferenceType(FieldDecl *, QualType) { return true; } virtual bool handlePointerType(FieldDecl *, QualType) { return true; } virtual bool handleArrayType(FieldDecl *, QualType) { return true; } + virtual bool handleScalarType(const CXXBaseSpecifier &, QualType) { + return true; + } virtual bool handleScalarType(FieldDecl *, QualType) { return true; } // Most handlers shouldn't be handling this, just the field checker. virtual bool handleOtherType(FieldDecl *, QualType) { return true; } @@ -1003,7 +999,8 @@ class SyclKernelFieldChecker public: SyclKernelFieldChecker(Sema &S) - : SyclKernelFieldHandler(S), Diag(S.getASTContext().getDiagnostics()) {} + : SyclKernelFieldHandler(S), Diag(S.getASTContext().getDiagnostics()) { + } bool isValid() { return !IsInvalid; } bool handleReferenceType(FieldDecl *FD, QualType FieldTy) final { @@ -1052,6 +1049,10 @@ class SyclKernelDeclCreator size_t LastParamIndex = 0; void addParam(const FieldDecl *FD, QualType FieldTy) { + const ConstantArrayType *CAT = + SemaRef.getASTContext().getAsConstantArrayType(FieldTy); + if (CAT) + FieldTy = CAT->getElementType(); ParamDesc newParamDesc = makeParamDesc(FD, FieldTy); addParam(newParamDesc, FieldTy); } @@ -1068,7 +1069,6 @@ class SyclKernelDeclCreator SemaRef.getASTContext(), KernelDecl, SourceLocation(), SourceLocation(), std::get<1>(newParamDesc), std::get<0>(newParamDesc), std::get<2>(newParamDesc), SC_None, /*DefArg*/ nullptr); - NewParam->setScopeInfo(0, Params.size()); NewParam->setIsUsed(); @@ -1131,7 +1131,8 @@ class SyclKernelDeclCreator : SyclKernelFieldHandler(S), KernelDecl(createKernelDecl(S.getASTContext(), Name, Loc, IsInline, IsSIMDKernel)), - ArgChecker(ArgChecker), FuncContext(SemaRef, KernelDecl) {} + ArgChecker(ArgChecker), FuncContext(SemaRef, KernelDecl) { + } ~SyclKernelDeclCreator() { ASTContext &Ctx = SemaRef.getASTContext(); @@ -1189,18 +1190,12 @@ class SyclKernelDeclCreator return true; } - bool handleArrayType(FieldDecl *FD, QualType FieldTy) final { - RecordDecl *NewClass = wrapAnArray(SemaRef.getASTContext(), FieldTy, FD); - QualType ST = SemaRef.getASTContext().getRecordType(NewClass); - addParam(FD, ST); - return true; - } - bool handleScalarType(FieldDecl *FD, QualType FieldTy) final { addParam(FD, FieldTy); return true; } + //FIXME Remove this function when structs are replaced by their fields bool handleStructType(FieldDecl *FD, QualType FieldTy) final { addParam(FD, FieldTy); return true; @@ -1225,6 +1220,8 @@ class SyclKernelDeclCreator return ArrayRef(std::begin(Params) + LastParamIndex, std::end(Params)); } + + using SyclKernelFieldHandler::handleScalarType; }; class SyclKernelBodyCreator @@ -1309,11 +1306,9 @@ class SyclKernelBodyCreator return Result; } - void createExprForStructOrScalar(FieldDecl *FD) { + Expr *createInitExpr(FieldDecl *FD) { ParmVarDecl *KernelParameter = DeclCreator.getParamVarDeclsForCurrentField()[0]; - InitializedEntity Entity = - InitializedEntity::InitializeMember(FD, &VarEntity); QualType ParamType = KernelParameter->getOriginalType(); Expr *DRE = SemaRef.BuildDeclRefExpr(KernelParameter, ParamType, VK_LValue, SourceLocation()); @@ -1323,32 +1318,49 @@ class SyclKernelBodyCreator DRE = ImplicitCastExpr::Create(SemaRef.Context, FD->getType(), CK_AddressSpaceConversion, DRE, nullptr, VK_RValue); + return DRE; + } + + void createExprForStructOrScalar(FieldDecl *FD) { + InitializedEntity Entity = + InitializedEntity::InitializeMember(FD, &VarEntity); InitializationKind InitKind = InitializationKind::CreateCopy(SourceLocation(), SourceLocation()); + Expr *DRE = createInitExpr(FD); InitializationSequence InitSeq(SemaRef, Entity, InitKind, DRE); - ExprResult MemberInit = InitSeq.Perform(SemaRef, Entity, InitKind, DRE); InitExprs.push_back(MemberInit.get()); } - void createExprForArray(FieldDecl *FD) { - ParmVarDecl *KernelParameter = - DeclCreator.getParamVarDeclsForCurrentField()[0]; - QualType ParamType = KernelParameter->getOriginalType(); - CXXRecordDecl *WrapperStruct = ParamType->getAsCXXRecordDecl(); - // The first and only field of the wrapper struct is the array - FieldDecl *Array = *(WrapperStruct->field_begin()); - Expr *DRE = SemaRef.BuildDeclRefExpr(KernelParameter, ParamType, VK_LValue, - SourceLocation()); - Expr *InitExpr = BuildMemberExpr(DRE, Array); - InitializationKind InitKind = InitializationKind::CreateDirect( - SourceLocation(), SourceLocation(), SourceLocation()); - InitializedEntity Entity = InitializedEntity::InitializeLambdaCapture( - nullptr, Array->getType(), SourceLocation()); - InitializationSequence InitSeq(SemaRef, Entity, InitKind, InitExpr); - ExprResult MemberInit = - InitSeq.Perform(SemaRef, Entity, InitKind, InitExpr); - InitExprs.push_back(MemberInit.get()); + void createExprForScalarElement(FieldDecl *FD, QualType FieldTy) { + InitializedEntity ArrayEntity = + InitializedEntity::InitializeMember(FD, &VarEntity); + InitializationKind InitKind = + InitializationKind::CreateCopy(SourceLocation(), SourceLocation()); + Expr *DRE = createInitExpr(FD); + Expr *Idx = dyn_cast(MemberExprBases.back())->getIdx(); + llvm::APSInt Result; + SemaRef.VerifyIntegerConstantExpression(Idx, &Result); + uint64_t IntIdx = Result.getZExtValue(); + InitializedEntity Entity = InitializedEntity::InitializeElement( + SemaRef.getASTContext(), IntIdx, ArrayEntity); + InitializationSequence InitSeq(SemaRef, Entity, InitKind, DRE); + ExprResult MemberInit = InitSeq.Perform(SemaRef, Entity, InitKind, DRE); + llvm::SmallVector ArrayInitExprs; + if (IntIdx > 0) { + // Continue with the current InitList + InitListExpr *ILE = cast(InitExprs.back()); + InitExprs.pop_back(); + llvm::ArrayRef L = ILE->inits(); + for (size_t I = 0; I < L.size(); I++) + ArrayInitExprs.push_back(L[I]); + } + ArrayInitExprs.push_back(MemberInit.get()); + Expr *ILE = new (SemaRef.getASTContext()) + InitListExpr(SemaRef.getASTContext(), SourceLocation(), ArrayInitExprs, + SourceLocation()); + ILE->setType(FD->getType()); + InitExprs.push_back(ILE); } void createSpecialMethodCall(const CXXRecordDecl *SpecialClass, Expr *Base, @@ -1479,18 +1491,17 @@ class SyclKernelBodyCreator return true; } + //FIXME Remove this function when structs are replaced by their fields bool handleStructType(FieldDecl *FD, QualType FieldTy) final { createExprForStructOrScalar(FD); return true; } bool handleScalarType(FieldDecl *FD, QualType FieldTy) final { - createExprForStructOrScalar(FD); - return true; - } - - bool handleArrayType(FieldDecl *FD, QualType FieldTy) final { - createExprForArray(FD); + if (dyn_cast(MemberExprBases.back())) + createExprForScalarElement(FD, FieldTy); + else + createExprForStructOrScalar(FD); return true; } @@ -1531,6 +1542,7 @@ class SyclKernelBodyCreator using SyclKernelFieldHandler::enterArray; using SyclKernelFieldHandler::enterField; + using SyclKernelFieldHandler::handleScalarType; using SyclKernelFieldHandler::leaveField; }; @@ -1547,13 +1559,9 @@ class SyclKernelIntHeaderCreator uint64_t Size; const ConstantArrayType *CAT = SemaRef.getASTContext().getAsConstantArrayType(ArgTy); - if (CAT) { - QualType ET = CAT->getElementType(); - Size = static_cast(CAT->getSize().getZExtValue()) * - SemaRef.getASTContext().getTypeSizeInChars(ET).getQuantity(); - } else { - Size = SemaRef.getASTContext().getTypeSizeInChars(ArgTy).getQuantity(); - } + if (CAT) + ArgTy = CAT->getElementType(); + Size = SemaRef.getASTContext().getTypeSizeInChars(ArgTy).getQuantity(); Header.addParamDesc(Kind, static_cast(Size), static_cast(CurOffset)); } @@ -1630,12 +1638,7 @@ class SyclKernelIntHeaderCreator return true; } - bool handleArrayType(FieldDecl *FD, QualType FieldTy) final { - wrapAnArray(SemaRef.getASTContext(), FieldTy, FD); - addParam(FD, FD->getType(), SYCLIntegrationHeader::kind_std_layout); - return true; - } - + //FIXME Remove this function when structs are replaced by their fields bool handleStructType(FieldDecl *FD, QualType FieldTy) final { addParam(FD, FieldTy, SYCLIntegrationHeader::kind_std_layout); return true; @@ -1692,6 +1695,8 @@ class SyclKernelIntHeaderCreator } CurOffset -= ArraySize; } + + using SyclKernelFieldHandler::handleScalarType; }; } // namespace From af0b0c948654023f79cc2a6e43cd71dc5aa9ed18 Mon Sep 17 00:00:00 2001 From: rdeodhar Date: Thu, 25 Jun 2020 12:38:22 -0700 Subject: [PATCH 08/14] Corrections to temporarily disable tests expected to fail. --- clang/lib/Sema/SemaSYCL.cpp | 5 +++++ clang/test/CodeGenSYCL/kernel-param-acc-array-ih.cpp | 1 + clang/test/CodeGenSYCL/kernel-param-acc-array.cpp | 1 + clang/test/CodeGenSYCL/kernel-param-member-acc-array-ih.cpp | 1 + clang/test/CodeGenSYCL/kernel-param-member-acc-array.cpp | 1 + clang/test/CodeGenSYCL/kernel-param-pod-array-ih.cpp | 1 + clang/test/CodeGenSYCL/kernel-param-pod-array.cpp | 1 + clang/test/SemaSYCL/array-kernel-param.cpp | 1 + sycl/test/array_param/array-kernel-param-run.cpp | 1 + 9 files changed, 13 insertions(+) diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 21d5b2e9f42eb..50c5a0804241a 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -786,11 +786,16 @@ static void VisitField(CXXRecordDecl *Owner, RangeTy &&Item, QualType ItemTy, KF_FOR_EACH(handleSyclAccessorType, Item, ItemTy); else if (Util::isSyclStreamType(ItemTy)) KF_FOR_EACH(handleSyclStreamType, Item, ItemTy); + else if (Util::isSyclSamplerType(ItemTy)) + KF_FOR_EACH(handleSyclSamplerType, Item, ItemTy); else if (ItemTy->isStructureOrClassType()) VisitAccessorWrapper(Owner, Item, ItemTy->getAsCXXRecordDecl(), handlers...); +#if 0 + // FIXME Enable this when structs are replaced by their fields else if (ItemTy->isArrayType()) VisitArrayElements(Item, ItemTy, handlers...); +#endif } template diff --git a/clang/test/CodeGenSYCL/kernel-param-acc-array-ih.cpp b/clang/test/CodeGenSYCL/kernel-param-acc-array-ih.cpp index 1020fbb4c12e2..5ac40c1d18d18 100755 --- a/clang/test/CodeGenSYCL/kernel-param-acc-array-ih.cpp +++ b/clang/test/CodeGenSYCL/kernel-param-acc-array-ih.cpp @@ -1,5 +1,6 @@ // RUN: %clang -I %S/Inputs -fsycl-device-only -Xclang -fsycl-int-header=%t.h %s -c -o %T/kernel.spv // RUN: FileCheck -input-file=%t.h %s +// XFAIL: * // This test checks the integration header generated when // the kernel argument is an Accessor array. diff --git a/clang/test/CodeGenSYCL/kernel-param-acc-array.cpp b/clang/test/CodeGenSYCL/kernel-param-acc-array.cpp index 2306599393b7a..48ca6def5f74a 100644 --- a/clang/test/CodeGenSYCL/kernel-param-acc-array.cpp +++ b/clang/test/CodeGenSYCL/kernel-param-acc-array.cpp @@ -1,4 +1,5 @@ // RUN: %clang_cc1 -fsycl -fsycl-is-device -I %S/Inputs -triple spir64-unknown-unknown-sycldevice -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s +// XFAIL: * // This test checks a kernel argument that is an Accessor array diff --git a/clang/test/CodeGenSYCL/kernel-param-member-acc-array-ih.cpp b/clang/test/CodeGenSYCL/kernel-param-member-acc-array-ih.cpp index 649d6fd88f0b8..21726109a1be9 100644 --- a/clang/test/CodeGenSYCL/kernel-param-member-acc-array-ih.cpp +++ b/clang/test/CodeGenSYCL/kernel-param-member-acc-array-ih.cpp @@ -1,5 +1,6 @@ // RUN: %clang -I %S/Inputs -fsycl-device-only -Xclang -fsycl-int-header=%t.h %s -c -o %T/kernel.spv // RUN: FileCheck -input-file=%t.h %s +// XFAIL: * // This test checks the integration header when kernel argument // is a struct containing an Accessor array. diff --git a/clang/test/CodeGenSYCL/kernel-param-member-acc-array.cpp b/clang/test/CodeGenSYCL/kernel-param-member-acc-array.cpp index d72691eb0e279..ae476edf08c2e 100644 --- a/clang/test/CodeGenSYCL/kernel-param-member-acc-array.cpp +++ b/clang/test/CodeGenSYCL/kernel-param-member-acc-array.cpp @@ -1,4 +1,5 @@ // RUN: %clang_cc1 -fsycl -fsycl-is-device -I %S/Inputs -fsycl-int-header=%t.h -triple spir64-unknown-unknown-sycldevice -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s +// XFAIL: * // This test checks a kernel with struct parameter that contains an Accessor array. diff --git a/clang/test/CodeGenSYCL/kernel-param-pod-array-ih.cpp b/clang/test/CodeGenSYCL/kernel-param-pod-array-ih.cpp index 799a0fb9183f1..0c57501c7a497 100755 --- a/clang/test/CodeGenSYCL/kernel-param-pod-array-ih.cpp +++ b/clang/test/CodeGenSYCL/kernel-param-pod-array-ih.cpp @@ -1,5 +1,6 @@ // RUN: %clang -I %S/Inputs -fsycl-device-only -Xclang -fsycl-int-header=%t.h %s -c -o %T/kernel.spv // RUN: FileCheck -input-file=%t.h %s +// XFAIL: * // This test checks the integration header generated for a kernel // with an argument that is a POD array. diff --git a/clang/test/CodeGenSYCL/kernel-param-pod-array.cpp b/clang/test/CodeGenSYCL/kernel-param-pod-array.cpp index 7549afa16c91d..191b740aca1aa 100755 --- a/clang/test/CodeGenSYCL/kernel-param-pod-array.cpp +++ b/clang/test/CodeGenSYCL/kernel-param-pod-array.cpp @@ -1,4 +1,5 @@ // RUN: %clang_cc1 -fsycl -fsycl-is-device -I %S/Inputs -triple spir64-unknown-unknown-sycldevice -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s +// XFAIL: * // This test checks a kernel with an argument that is a POD array. diff --git a/clang/test/SemaSYCL/array-kernel-param.cpp b/clang/test/SemaSYCL/array-kernel-param.cpp index d94c76600bb13..69fa04f048884 100755 --- a/clang/test/SemaSYCL/array-kernel-param.cpp +++ b/clang/test/SemaSYCL/array-kernel-param.cpp @@ -1,4 +1,5 @@ // RUN: %clang_cc1 -I %S/Inputs -fsycl -fsycl-is-device -ast-dump %s | FileCheck %s +// XFAIL: * // This test checks that compiler generates correct kernel arguments for // arrays, Accessor arrays, and structs containing Accessors. diff --git a/sycl/test/array_param/array-kernel-param-run.cpp b/sycl/test/array_param/array-kernel-param-run.cpp index 179901a072985..379066340714d 100755 --- a/sycl/test/array_param/array-kernel-param-run.cpp +++ b/sycl/test/array_param/array-kernel-param-run.cpp @@ -5,6 +5,7 @@ // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out +// XFAIL: * #include #include From d5fb2d95b74355199b78c1823edfaba1b5243650 Mon Sep 17 00:00:00 2001 From: rdeodhar Date: Thu, 25 Jun 2020 17:13:59 -0700 Subject: [PATCH 09/14] Changed tests to work with current array support. --- .../CodeGenSYCL/kernel-param-acc-array-ih.cpp | 2 - .../CodeGenSYCL/kernel-param-acc-array.cpp | 54 ++++++------- .../CodeGenSYCL/kernel-param-pod-array-ih.cpp | 11 +-- .../CodeGenSYCL/kernel-param-pod-array.cpp | 28 ++++--- clang/test/SemaSYCL/array-kernel-param.cpp | 77 +++++++++---------- 5 files changed, 82 insertions(+), 90 deletions(-) diff --git a/clang/test/CodeGenSYCL/kernel-param-acc-array-ih.cpp b/clang/test/CodeGenSYCL/kernel-param-acc-array-ih.cpp index 5ac40c1d18d18..f9dfd144079e1 100755 --- a/clang/test/CodeGenSYCL/kernel-param-acc-array-ih.cpp +++ b/clang/test/CodeGenSYCL/kernel-param-acc-array-ih.cpp @@ -1,6 +1,5 @@ // RUN: %clang -I %S/Inputs -fsycl-device-only -Xclang -fsycl-int-header=%t.h %s -c -o %T/kernel.spv // RUN: FileCheck -input-file=%t.h %s -// XFAIL: * // This test checks the integration header generated when // the kernel argument is an Accessor array. @@ -21,7 +20,6 @@ // CHECK: static constexpr // CHECK-NEXT: const kernel_param_desc_t kernel_signatures[] = { // CHECK-NEXT: //--- _ZTSZ4mainE8kernel_A -// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 24, 0 }, // CHECK-NEXT: { kernel_param_kind_t::kind_accessor, 4062, 0 }, // CHECK-NEXT: { kernel_param_kind_t::kind_accessor, 4062, 12 }, // CHECK-EMPTY: diff --git a/clang/test/CodeGenSYCL/kernel-param-acc-array.cpp b/clang/test/CodeGenSYCL/kernel-param-acc-array.cpp index 48ca6def5f74a..c70e25b2feb72 100644 --- a/clang/test/CodeGenSYCL/kernel-param-acc-array.cpp +++ b/clang/test/CodeGenSYCL/kernel-param-acc-array.cpp @@ -1,5 +1,4 @@ // RUN: %clang_cc1 -fsycl -fsycl-is-device -I %S/Inputs -triple spir64-unknown-unknown-sycldevice -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s -// XFAIL: * // This test checks a kernel argument that is an Accessor array @@ -26,15 +25,14 @@ int main() { // Check kernel_A parameters // CHECK: define spir_kernel void @{{.*}}kernel_A -// CHECK-SAME: %struct.{{.*}}.wrapped_array* byval{{.*}}align 4 %_arg_, -// CHECK-SAME: i32 addrspace(1)* [[MEM_ARG1:%[a-zA-Z0-9_]+_1]], -// CHECK-SAME: %"struct.{{.*}}.cl::sycl::range"* byval{{.*}}align 4 [[ACC_RANGE1:%[a-zA-Z0-9_]+_2]], -// CHECK-SAME: %"struct.{{.*}}.cl::sycl::range"* byval{{.*}}align 4 [[MEM_RANGE1:%[a-zA-Z0-9_]+_3]], -// CHECK-SAME: %"struct.{{.*}}.cl::sycl::id"* byval{{.*}}align 4 [[OFFSET1:%[a-zA-Z0-9_]+_4]], -// CHECK-SAME: i32 addrspace(1)* [[MEM_ARG2:%[a-zA-Z0-9_]+_5]], -// CHECK-SAME: %"struct.{{.*}}.cl::sycl::range"* byval{{.*}}align 4 [[ACC_RANGE2:%[a-zA-Z0-9_]+_7]], -// CHECK-SAME: %"struct.{{.*}}.cl::sycl::range"* byval{{.*}}align 4 [[MEM_RANGE2:%[a-zA-Z0-9_]+_8]], -// CHECK-SAME: %"struct.{{.*}}.cl::sycl::id"* byval{{.*}}align 4 [[OFFSET2:%[a-zA-Z0-9_]+_9]]) +// CHECK-SAME: i32 addrspace(1)* [[MEM_ARG1:%[a-zA-Z0-9_]+]], +// CHECK-SAME: %"struct.{{.*}}.cl::sycl::range"* byval{{.*}}align 4 [[ACC_RANGE1:%[a-zA-Z0-9_]+_1]], +// CHECK-SAME: %"struct.{{.*}}.cl::sycl::range"* byval{{.*}}align 4 [[MEM_RANGE1:%[a-zA-Z0-9_]+_2]], +// CHECK-SAME: %"struct.{{.*}}.cl::sycl::id"* byval{{.*}}align 4 [[OFFSET1:%[a-zA-Z0-9_]+_3]], +// CHECK-SAME: i32 addrspace(1)* [[MEM_ARG2:%[a-zA-Z0-9_]+_4]], +// CHECK-SAME: %"struct.{{.*}}.cl::sycl::range"* byval{{.*}}align 4 [[ACC_RANGE2:%[a-zA-Z0-9_]+_6]], +// CHECK-SAME: %"struct.{{.*}}.cl::sycl::range"* byval{{.*}}align 4 [[MEM_RANGE2:%[a-zA-Z0-9_]+_7]], +// CHECK-SAME: %"struct.{{.*}}.cl::sycl::id"* byval{{.*}}align 4 [[OFFSET2:%[a-zA-Z0-9_]+_8]]) // CHECK alloca for pointer arguments // CHECK: [[MEM_ARG1:%[a-zA-Z0-9_.]+]] = alloca i32 addrspace(1)*, align 8 @@ -51,30 +49,26 @@ int main() { // CHECK: [[MEM_RANGE2:%[a-zA-Z0-9_.]+]] = alloca %"struct.{{.*}}.cl::sycl::range" // CHECK: [[OFFSET2:%[a-zA-Z0-9_.]+]] = alloca %"struct.{{.*}}.cl::sycl::id" -// Check array initialization -// CHECK: arrayinit.body: -// CHECK: arrayinit.end: +// CHECK accessor array GEP for acc[0] +// CHECK: [[ACCESSOR_ARRAY1:%[a-zA-Z0-9_]+]] = getelementptr inbounds %"class.{{.*}}.anon", %"class.{{.*}}.anon"* [[LOCAL_OBJECT]], i32 0, i32 0 +// CHECK: [[Z0:%[a-zA-Z0-9_]*]] = getelementptr inbounds{{.*}}[[ACCESSOR_ARRAY1]], i64 0, i64 0 -// CHECK accessor array GEP for acc[0] -// CHECK: [[ACCESSOR_ARRAY1:%[a-zA-Z0-9_]+]] = getelementptr inbounds %"class.{{.*}}.anon", %"class.{{.*}}.anon"* [[LOCAL_OBJECT]], i32 0, i32 0 -// CHECK: [[Z0:%[a-zA-Z0-9_]*]] = getelementptr inbounds{{.*}}[[ACCESSOR_ARRAY1]], i64 0, i64 0 +// CHECK load from kernel pointer argument alloca +// CHECK: [[MEM_LOAD1:%[a-zA-Z0-9_]+]] = load i32 addrspace(1)*, i32 addrspace(1)** [[MEM_ARG1]] -// CHECK load from kernel pointer argument alloca -// CHECK: [[MEM_LOAD1:%[a-zA-Z0-9_]+]] = load i32 addrspace(1)*, i32 addrspace(1)** [[MEM_ARG1]] +// CHECK acc[0] __init method call +// CHECK: [[ACC_CAST1:%[0-9]+]] = addrspacecast %"class{{.*}}accessor"* [[Z0]] to %"class{{.*}}accessor" addrspace(4)* -// CHECK acc[0] __init method call -// CHECK: [[ACC_CAST1:%[0-9]+]] = addrspacecast %"class{{.*}}accessor"* [[Z0]] to %"class{{.*}}accessor" addrspace(4)* +// CHECK: call spir_func void @{{.*}}__init{{.*}}(%"class.{{.*}}.cl::sycl::accessor" addrspace(4)* [[ACC_CAST1]], i32 addrspace(1)* [[MEM_LOAD1]], %"struct.{{.*}}.cl::sycl::range"* byval({{.*}}) align 4 [[ACC_RANGE1]], %"struct.{{.*}}.cl::sycl::range"* byval({{.*}}) align 4 [[MEM_RANGE1]], %"struct.{{.*}}.cl::sycl::id"* byval({{.*}}) align 4 [[OFFSET1]]) -// CHECK: call spir_func void @{{.*}}__init{{.*}}(%"class.{{.*}}.cl::sycl::accessor" addrspace(4)* [[ACC_CAST1]], i32 addrspace(1)* [[MEM_LOAD1]], %"struct.{{.*}}.cl::sycl::range"* byval({{.*}}) align 4 [[ACC_RANGE1]], %"struct.{{.*}}.cl::sycl::range"* byval({{.*}}) align 4 [[MEM_RANGE1]], %"struct.{{.*}}.cl::sycl::id"* byval({{.*}}) align 4 [[OFFSET1]]) +// CHECK accessor array GEP for acc[1] +// CHECK: [[ACCESSOR_ARRAY2:%[a-zA-Z0-9_]+]] = getelementptr inbounds %"class.{{.*}}.anon", %"class.{{.*}}.anon"* [[LOCAL_OBJECT]], i32 0, i32 0 +// CHECK: [[Z1:%[a-zA-Z0-9_]*]] = getelementptr inbounds{{.*}}[[ACCESSOR_ARRAY2]], i64 0, i64 1 -// CHECK accessor array GEP for acc[1] -// CHECK: [[ACCESSOR_ARRAY2:%[a-zA-Z0-9_]+]] = getelementptr inbounds %"class.{{.*}}.anon", %"class.{{.*}}.anon"* [[LOCAL_OBJECT]], i32 0, i32 0 -// CHECK: [[Z1:%[a-zA-Z0-9_]*]] = getelementptr inbounds{{.*}}[[ACCESSOR_ARRAY2]], i64 0, i64 1 +// CHECK load from kernel pointer argument alloca +// CHECK: [[MEM_LOAD2:%[a-zA-Z0-9_]+]] = load i32 addrspace(1)*, i32 addrspace(1)** [[MEM_ARG2]] -// CHECK load from kernel pointer argument alloca -// CHECK: [[MEM_LOAD2:%[a-zA-Z0-9_]+]] = load i32 addrspace(1)*, i32 addrspace(1)** [[MEM_ARG2]] +// CHECK acc[1] __init method call +// CHECK: [[ACC_CAST2:%[0-9]+]] = addrspacecast %"class{{.*}}accessor"* [[Z1]] to %"class{{.*}}accessor" addrspace(4)* -// CHECK acc[1] __init method call -// CHECK: [[ACC_CAST2:%[0-9]+]] = addrspacecast %"class{{.*}}accessor"* [[Z1]] to %"class{{.*}}accessor" addrspace(4)* - -// CHECK: call spir_func void @{{.*}}__init{{.*}}(%"class.{{.*}}.cl::sycl::accessor" addrspace(4)* [[ACC_CAST2]], i32 addrspace(1)* [[MEM_LOAD2]], %"struct.{{.*}}.cl::sycl::range"* byval({{.*}}) align 4 [[ACC_RANGE2]], %"struct.{{.*}}.cl::sycl::range"* byval({{.*}}) align 4 [[MEM_RANGE2]], %"struct.{{.*}}.cl::sycl::id"* byval({{.*}}) align 4 [[OFFSET2]]) +// CHECK: call spir_func void @{{.*}}__init{{.*}}(%"class.{{.*}}.cl::sycl::accessor" addrspace(4)* [[ACC_CAST2]], i32 addrspace(1)* [[MEM_LOAD2]], %"struct.{{.*}}.cl::sycl::range"* byval({{.*}}) align 4 [[ACC_RANGE2]], %"struct.{{.*}}.cl::sycl::range"* byval({{.*}}) align 4 [[MEM_RANGE2]], %"struct.{{.*}}.cl::sycl::id"* byval({{.*}}) align 4 [[OFFSET2]]) diff --git a/clang/test/CodeGenSYCL/kernel-param-pod-array-ih.cpp b/clang/test/CodeGenSYCL/kernel-param-pod-array-ih.cpp index 0c57501c7a497..349d540f22ebc 100755 --- a/clang/test/CodeGenSYCL/kernel-param-pod-array-ih.cpp +++ b/clang/test/CodeGenSYCL/kernel-param-pod-array-ih.cpp @@ -1,7 +1,4 @@ // RUN: %clang -I %S/Inputs -fsycl-device-only -Xclang -fsycl-int-header=%t.h %s -c -o %T/kernel.spv -// RUN: FileCheck -input-file=%t.h %s -// XFAIL: * - // This test checks the integration header generated for a kernel // with an argument that is a POD array. @@ -21,7 +18,11 @@ // CHECK: static constexpr // CHECK-NEXT: const kernel_param_desc_t kernel_signatures[] = { // CHECK-NEXT: //--- _ZTSZ4mainE8kernel_B -// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 400, 0 }, +// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 0 }, +// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 4 }, +// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 8 }, +// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 12 }, +// CHECK-NEXT: { kernel_param_kind_t::kind_std_layout, 4, 16 }, // CHECK-EMPTY: // CHECK-NEXT: }; @@ -43,7 +44,7 @@ __attribute__((sycl_kernel)) void a_kernel(Func kernelFunc) { int main() { - int a[100]; + int a[5]; a_kernel( [=]() { diff --git a/clang/test/CodeGenSYCL/kernel-param-pod-array.cpp b/clang/test/CodeGenSYCL/kernel-param-pod-array.cpp index 191b740aca1aa..9a35239ad1fb8 100755 --- a/clang/test/CodeGenSYCL/kernel-param-pod-array.cpp +++ b/clang/test/CodeGenSYCL/kernel-param-pod-array.cpp @@ -1,5 +1,4 @@ // RUN: %clang_cc1 -fsycl -fsycl-is-device -I %S/Inputs -triple spir64-unknown-unknown-sycldevice -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s -// XFAIL: * // This test checks a kernel with an argument that is a POD array. @@ -14,26 +13,31 @@ __attribute__((sycl_kernel)) void a_kernel(Func kernelFunc) { int main() { - int a[100]; + int a[2]; a_kernel( [=]() { - int local = a[3]; + int local = a[1]; }); } // Check kernel_B parameters // CHECK: define spir_kernel void @{{.*}}kernel_B -// CHECK-SAME: %struct.{{.*}}.wrapped_array* byval{{.*}}align 4 [[ARG_STRUCT:%[a-zA-Z0-9_]+]] +// CHECK-SAME: i32 [[ELEM_ARG0:%[a-zA-Z0-9_]+]], +// CHECK-SAME: i32 [[ELEM_ARG1:%[a-zA-Z_]+_[0-9]+]]) // Check local lambda object alloca -// CHECK: [[LOCAL_OBJECT:%0]] = alloca %"class.{{.*}}.anon", align 4 +// CHECK: [[LOCAL_OBJECT:%[0-9]+]] = alloca %"class.{{.*}}.anon", align 4 -// Check init of local array -// CHECK: [[ARRAY1:%[a-zA-Z0-9_]+]] = getelementptr inbounds %"class.{{.*}}.anon", %"class.{{.*}}.anon"* [[LOCAL_OBJECT]], i32 0, i32 0 - -// CHECK: [[ARRAY2:%[a-zA-Z0-9_]+]] = getelementptr inbounds %struct.{{.*}}.wrapped_array, %struct.{{.*}}.wrapped_array* [[ARG_STRUCT]], i32 0, i32 0 +// Check local variables created for parameters +// CHECK: store i32 [[ELEM_ARG0]], i32* [[ELEM_L0:%[a-zA-Z_]+.addr]], align 4 +// CHECK: store i32 [[ELEM_ARG1]], i32* [[ELEM_L1:%[a-zA-Z_]+.addr[0-9]*]], align 4 -// CHECK: %{{[a-zA-Z0-9._]+}} = getelementptr inbounds [100 x i32], [100 x i32]* [[ARRAY1]], i64 0, i64 0 - -// CHECK: %{{[a-zA-Z0-9_]+}} = getelementptr inbounds [100 x i32], [100 x i32]* [[ARRAY2]], i64 0, i64 +// Check init of local array +// CHECK: [[ARRAY:%[0-9]*]] = getelementptr inbounds %"class.{{.*}}.anon", %"class.{{.*}}.anon"* [[LOCAL_OBJECT]], i32 0, i32 0 +// CHECK: [[ARRAY_BEGIN:%[a-zA-Z_.]+]] = getelementptr inbounds [2 x i32], [2 x i32]* [[ARRAY]], i64 0, i64 0 +// CHECK: [[ARRAY0:%[0-9]*]] = load i32, i32* [[ELEM_L0]], align 4 +// CHECK: store i32 [[ARRAY0]], i32* [[ARRAY_BEGIN]], align 4 +// CHECK: [[ARRAY_ELEMENT:%[a-zA-Z_.]+]] = getelementptr inbounds i32, i32* %arrayinit.begin, i64 1 +// CHECK: [[ARRAY1:%[0-9]*]] = load i32, i32* [[ELEM_L1]], align 4 +// CHECK: store i32 [[ARRAY1]], i32* [[ARRAY_ELEMENT]], align 4 \ No newline at end of file diff --git a/clang/test/SemaSYCL/array-kernel-param.cpp b/clang/test/SemaSYCL/array-kernel-param.cpp index 69fa04f048884..19d8562da1436 100755 --- a/clang/test/SemaSYCL/array-kernel-param.cpp +++ b/clang/test/SemaSYCL/array-kernel-param.cpp @@ -1,5 +1,4 @@ // RUN: %clang_cc1 -I %S/Inputs -fsycl -fsycl-is-device -ast-dump %s | FileCheck %s -// XFAIL: * // This test checks that compiler generates correct kernel arguments for // arrays, Accessor arrays, and structs containing Accessors. @@ -19,7 +18,7 @@ int main() { accessor; Accessor acc[2]; - int a[100]; + int a[2]; struct struct_acc_t { Accessor member_acc[4]; } struct_acc; @@ -31,7 +30,7 @@ int main() { a_kernel( [=]() { - int local = a[3]; + int local = a[1]; }); a_kernel( @@ -41,8 +40,7 @@ int main() { } // Check kernel_A parameters -// CHECK: FunctionDecl {{.*}}kernel_A{{.*}} 'void (wrapped_array, __global int *, cl::sycl::range<1>, cl::sycl::range<1>, cl::sycl::id<1>, __global int *, cl::sycl::range<1>, cl::sycl::range<1>, cl::sycl::id<1>)' -// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_ 'wrapped_array' +// CHECK: FunctionDecl {{.*}}kernel_A{{.*}} 'void (__global int *, cl::sycl::range<1>, cl::sycl::range<1>, cl::sycl::id<1>, __global int *, cl::sycl::range<1>, cl::sycl::range<1>, cl::sycl::id<1>)' // CHECK-NEXT: ParmVarDecl {{.*}} used _arg_ '__global int *' // CHECK-NEXT: ParmVarDecl {{.*}} used _arg_ 'cl::sycl::range<1>' // CHECK-NEXT: ParmVarDecl {{.*}} used _arg_ 'cl::sycl::range<1>' @@ -56,41 +54,38 @@ int main() { // CHECK: CXXMemberCallExpr {{.*}} 'void' // CHECK-NEXT: MemberExpr {{.*}}__init -// CHECK kernel_B parameters -// CHECK: FunctionDecl {{.*}}kernel_B{{.*}} 'void (wrapped_array)' -// CHECK-NEXT: ParmVarDecl {{.*}} 'wrapped_array' -// CHECK-NEXT: CompoundStmt -// CHECK-NEXT: DeclStmt -// CHECK-NEXT: VarDecl -// CHECK-NEXT: InitListExpr -// CHECK-NEXT: ArrayInitLoopExpr {{.*}} 'int [100]' +// Check kernel_B parameters +// CHECK: FunctionDecl {{.*}}kernel_B{{.*}} 'void (int, int)' +// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_ 'int' +// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_ 'int' -// CHECK kernel_C parameters -// CHECK: FunctionDecl {{.*}}kernel_C{{.*}} 'void (struct {{.*}}, __global int *, cl::sycl::range<1>, cl::sycl::range<1>, cl::sycl::id<1>, __global int *, cl::sycl::range<1>, cl::sycl::range<1>, cl::sycl::id<1>, __global int *, cl::sycl::range<1>, cl::sycl::range<1>, cl::sycl::id<1>, __global int *, cl::sycl::range<1>, cl::sycl::range<1>, cl::sycl::id<1>)' -// CHECK-NEXT: ParmVarDecl {{.*}} 'struct {{.*}}' -// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc '__global int *' -// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'cl::sycl::range<1>' -// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'cl::sycl::range<1>' -// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'cl::sycl::id<1>' -// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc '__global int *' -// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'cl::sycl::range<1>' -// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'cl::sycl::range<1>' -// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'cl::sycl::id<1>' -// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc '__global int *' -// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'cl::sycl::range<1>' -// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'cl::sycl::range<1>' -// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'cl::sycl::id<1>' -// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc '__global int *' -// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'cl::sycl::range<1>' -// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'cl::sycl::range<1>' -// CHECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'cl::sycl::id<1>' +// Correct and enable after struct mebers are extracted into separate parameters +// C HECK kernel_C parameters +// C HECK: FunctionDecl {{.*}}kernel_C{{.*}} 'void (struct {{.*}}, __global int *, cl::sycl::range<1>, cl::sycl::range<1>, cl::sycl::id<1>, __global int *, cl::sycl::range<1>, cl::sycl::range<1>, cl::sycl::id<1>, __global int *, cl::sycl::range<1>, cl::sycl::range<1>, cl::sycl::id<1>, __global int *, cl::sycl::range<1>, cl::sycl::range<1>, cl::sycl::id<1>)' +// C HECK-NEXT: ParmVarDecl {{.*}} 'struct {{.*}}' +// C HECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc '__global int *' +// C HECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'cl::sycl::range<1>' +// C HECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'cl::sycl::range<1>' +// C HECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'cl::sycl::id<1>' +// C HECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc '__global int *' +// C HECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'cl::sycl::range<1>' +// C HECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'cl::sycl::range<1>' +// C HECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'cl::sycl::id<1>' +// C HECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc '__global int *' +// C HECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'cl::sycl::range<1>' +// C HECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'cl::sycl::range<1>' +// C HECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'cl::sycl::id<1>' +// C HECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc '__global int *' +// C HECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'cl::sycl::range<1>' +// C HECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'cl::sycl::range<1>' +// C HECK-NEXT: ParmVarDecl {{.*}} used _arg_member_acc 'cl::sycl::id<1>' -// CHECK that four accessor init functions are called -// CHECK: CXXMemberCallExpr {{.*}} 'void' -// CHECK-NEXT: MemberExpr {{.*}}__init -// CHECK: CXXMemberCallExpr {{.*}} 'void' -// CHECK-NEXT: MemberExpr {{.*}}__init -// CHECK: CXXMemberCallExpr {{.*}} 'void' -// CHECK-NEXT: MemberExpr {{.*}}__init -// CHECK: CXXMemberCallExpr {{.*}} 'void' -// CHECK-NEXT: MemberExpr {{.*}}__init +// C HECK that four accessor init functions are called +// C HECK: CXXMemberCallExpr {{.*}} 'void' +// C HECK-NEXT: MemberExpr {{.*}}__init +// C HECK: CXXMemberCallExpr {{.*}} 'void' +// C HECK-NEXT: MemberExpr {{.*}}__init +// C HECK: CXXMemberCallExpr {{.*}} 'void' +// C HECK-NEXT: MemberExpr {{.*}}__init +// C HECK: CXXMemberCallExpr {{.*}} 'void' +// C HECK-NEXT: MemberExpr {{.*}}__init From db492bd762db2c27826166b1e7b5ea90175b3374 Mon Sep 17 00:00:00 2001 From: rdeodhar Date: Sat, 27 Jun 2020 12:20:31 -0700 Subject: [PATCH 10/14] Decomposed array elements, and changed manner of array element initialization. --- clang/lib/Sema/SemaSYCL.cpp | 124 +++-- .../CodeGenSYCL/kernel-param-acc-array.cpp | 24 +- .../test/SemaSYCL/array-kernel-param-neg.cpp | 13 - clang/test/SemaSYCL/array-kernel-param.cpp | 12 +- sycl/doc/Array_Kernel_Parameters.md | 435 ------------------ sycl/doc/KernelParameterPassing.md | 32 +- .../array-kernel-param-nested-run.cpp | 135 ++++++ .../array_param/array-kernel-param-run.cpp | 89 ++-- 8 files changed, 290 insertions(+), 574 deletions(-) delete mode 100755 sycl/doc/Array_Kernel_Parameters.md create mode 100755 sycl/test/array_param/array-kernel-param-nested-run.cpp diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 50c5a0804241a..c29b191181621 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -791,11 +791,15 @@ static void VisitField(CXXRecordDecl *Owner, RangeTy &&Item, QualType ItemTy, else if (ItemTy->isStructureOrClassType()) VisitAccessorWrapper(Owner, Item, ItemTy->getAsCXXRecordDecl(), handlers...); -#if 0 - // FIXME Enable this when structs are replaced by their fields + // FIXME Enable this when structs are replaced by their fields +#define STRUCTS_DECOMPOSED 0 +#if STRUCTS_DECOMPOSED else if (ItemTy->isArrayType()) VisitArrayElements(Item, ItemTy, handlers...); -#endif + else if (ItemTy->isScalarType()) + KF_FOR_EACH(handleScalarType, Item, ItemTy); +} +#else } template @@ -803,6 +807,7 @@ static void VisitScalarField(CXXRecordDecl *Owner, RangeTy &&Item, QualType ItemTy, Handlers &... handlers) { KF_FOR_EACH(handleScalarType, Item, ItemTy); } +#endif template static void VisitArrayElements(RangeTy Item, QualType FieldTy, @@ -812,13 +817,18 @@ static void VisitArrayElements(RangeTy Item, QualType FieldTy, int64_t ElemCount = CAT->getSize().getSExtValue(); std::initializer_list{(handlers.enterArray(), 0)...}; for (int64_t Count = 0; Count < ElemCount; Count++) { +#if STRUCTS_DECOMPOSED + VisitField(nullptr, Item, ET, handlers...); +#else if (ET->isScalarType()) VisitScalarField(nullptr, Item, ET, handlers...); else VisitField(nullptr, Item, ET, handlers...); +#endif (void)std::initializer_list{(handlers.nextElement(ET), 0)...}; } - (void)std::initializer_list{(handlers.leaveArray(ET, ElemCount), 0)...}; + (void)std::initializer_list{ + (handlers.leaveArray(Item, ET, ElemCount), 0)...}; } template @@ -932,20 +942,31 @@ template class SyclKernelFieldHandler { // class/field graph. Int Headers use this to calculate offset, most others // don't have a need for these. - virtual void enterStruct(const CXXRecordDecl *, FieldDecl *) {} - virtual void leaveStruct(const CXXRecordDecl *, FieldDecl *) {} - virtual void enterStruct(const CXXRecordDecl *, const CXXBaseSpecifier &) {} - virtual void leaveStruct(const CXXRecordDecl *, const CXXBaseSpecifier &) {} + virtual bool enterStruct(const CXXRecordDecl *, FieldDecl *) { return true; } + virtual bool leaveStruct(const CXXRecordDecl *, FieldDecl *) { return true; } + virtual bool enterStruct(const CXXRecordDecl *, const CXXBaseSpecifier &) { + return true; + } + virtual bool leaveStruct(const CXXRecordDecl *, const CXXBaseSpecifier &) { + return true; + } // The following are used for stepping through array elements. - virtual void enterField(const CXXRecordDecl *, const CXXBaseSpecifier &) {} - virtual void leaveField(const CXXRecordDecl *, const CXXBaseSpecifier &) {} - virtual void enterField(const CXXRecordDecl *, FieldDecl *) {} - virtual void leaveField(const CXXRecordDecl *, FieldDecl *) {} - virtual void enterArray() {} - virtual void nextElement(QualType) {} - virtual void leaveArray(QualType, int64_t) {} + virtual bool enterField(const CXXRecordDecl *, const CXXBaseSpecifier &) { + return true; + } + virtual bool leaveField(const CXXRecordDecl *, const CXXBaseSpecifier &) { + return true; + } + virtual bool enterField(const CXXRecordDecl *, FieldDecl *) { return true; } + virtual bool leaveField(const CXXRecordDecl *, FieldDecl *) { return true; } + virtual bool enterArray() { return true; } + virtual bool nextElement(QualType) { return true; } + virtual bool leaveArray(const CXXBaseSpecifier &, QualType, int64_t) { + return true; + } + virtual bool leaveArray(FieldDecl *, QualType, int64_t) { return true; } }; // A type to check the validity of all of the argument types. @@ -1242,6 +1263,7 @@ class SyclKernelBodyCreator InitializedEntity VarEntity; CXXRecordDecl *KernelObj; llvm::SmallVector MemberExprBases; + uint64_t ArrayIndex; FunctionDecl *KernelCallerFunc; // Using the statements/init expressions that we've created, this generates @@ -1340,30 +1362,27 @@ class SyclKernelBodyCreator InitExprs.push_back(MemberInit.get()); } - void createExprForScalarElement(FieldDecl *FD, QualType FieldTy) { + void createExprForScalarElement(FieldDecl *FD) { InitializedEntity ArrayEntity = InitializedEntity::InitializeMember(FD, &VarEntity); InitializationKind InitKind = InitializationKind::CreateCopy(SourceLocation(), SourceLocation()); Expr *DRE = createInitExpr(FD); - Expr *Idx = dyn_cast(MemberExprBases.back())->getIdx(); - llvm::APSInt Result; - SemaRef.VerifyIntegerConstantExpression(Idx, &Result); - uint64_t IntIdx = Result.getZExtValue(); InitializedEntity Entity = InitializedEntity::InitializeElement( - SemaRef.getASTContext(), IntIdx, ArrayEntity); + SemaRef.getASTContext(), ArrayIndex, ArrayEntity); + ArrayIndex++; InitializationSequence InitSeq(SemaRef, Entity, InitKind, DRE); ExprResult MemberInit = InitSeq.Perform(SemaRef, Entity, InitKind, DRE); + InitExprs.push_back(MemberInit.get()); + } + + void addArrayInit(FieldDecl *FD, int64_t Count) { llvm::SmallVector ArrayInitExprs; - if (IntIdx > 0) { - // Continue with the current InitList - InitListExpr *ILE = cast(InitExprs.back()); + for (int64_t I = 0; I < Count; I++) { + ArrayInitExprs.push_back(InitExprs.back()); InitExprs.pop_back(); - llvm::ArrayRef L = ILE->inits(); - for (size_t I = 0; I < L.size(); I++) - ArrayInitExprs.push_back(L[I]); } - ArrayInitExprs.push_back(MemberInit.get()); + std::reverse(ArrayInitExprs.begin(), ArrayInitExprs.end()); Expr *ILE = new (SemaRef.getASTContext()) InitListExpr(SemaRef.getASTContext(), SourceLocation(), ArrayInitExprs, SourceLocation()); @@ -1421,8 +1440,10 @@ class SyclKernelBodyCreator bool handleSpecialType(FieldDecl *FD, QualType Ty) { const auto *RecordDecl = Ty->getAsCXXRecordDecl(); - // Perform initialization only if it is field of kernel object - if (MemberExprBases.size() == 2) { + ArraySubscriptExpr *ArrayRef = + dyn_cast(MemberExprBases.back()); + // Perform initialization only if decomposed from array + if (ArrayRef || MemberExprBases.size() == 2) { InitializedEntity Entity = InitializedEntity::InitializeMember(FD, &VarEntity); // Initialize with the default constructor. @@ -1507,31 +1528,37 @@ class SyclKernelBodyCreator bool handleScalarType(FieldDecl *FD, QualType FieldTy) final { if (dyn_cast(MemberExprBases.back())) - createExprForScalarElement(FD, FieldTy); + createExprForScalarElement(FD); else createExprForStructOrScalar(FD); return true; } - void enterField(const CXXRecordDecl *RD, FieldDecl *FD) final { + bool enterField(const CXXRecordDecl *RD, FieldDecl *FD) final { if (!FD->getType()->isReferenceType()) MemberExprBases.push_back(BuildMemberExpr(MemberExprBases.back(), FD)); + return true; } - void leaveField(const CXXRecordDecl *, FieldDecl *FD) final { + bool leaveField(const CXXRecordDecl *, FieldDecl *FD) final { if (!FD->getType()->isReferenceType()) MemberExprBases.pop_back(); + return true; } - void enterArray() final { + bool enterArray() final { Expr *ArrayBase = MemberExprBases.back(); ExprResult IndexExpr = SemaRef.ActOnIntegerConstant(SourceLocation(), 0); ExprResult ElementBase = SemaRef.CreateBuiltinArraySubscriptExpr( ArrayBase, SourceLocation(), IndexExpr.get(), SourceLocation()); MemberExprBases.push_back(ElementBase.get()); + ArrayIndex = 0; + return true; } - void nextElement(QualType) final { + bool nextElement(QualType ET) final { + if (ET->isScalarType()) + return true; ArraySubscriptExpr *LastArrayRef = dyn_cast(MemberExprBases.back()); MemberExprBases.pop_back(); @@ -1544,14 +1571,20 @@ class SyclKernelBodyCreator ExprResult ElementBase = SemaRef.CreateBuiltinArraySubscriptExpr( ArrayBase, SourceLocation(), IndexExpr.get(), SourceLocation()); MemberExprBases.push_back(ElementBase.get()); + return true; } - void leaveArray(QualType, int64_t) final { MemberExprBases.pop_back(); } + bool leaveArray(FieldDecl *FD, QualType, int64_t Count) final { + addArrayInit(FD, Count); + MemberExprBases.pop_back(); + return true; + } using SyclKernelFieldHandler::enterArray; using SyclKernelFieldHandler::enterField; using SyclKernelFieldHandler::handleScalarType; using SyclKernelFieldHandler::handleSyclSamplerType; + using SyclKernelFieldHandler::leaveArray; using SyclKernelFieldHandler::leaveField; }; @@ -1670,43 +1703,50 @@ class SyclKernelIntHeaderCreator return true; } - void enterField(const CXXRecordDecl *RD, FieldDecl *FD) final { + bool enterField(const CXXRecordDecl *RD, FieldDecl *FD) final { CurOffset += SemaRef.getASTContext().getFieldOffset(FD) / 8; + return true; } - void leaveField(const CXXRecordDecl *, FieldDecl *FD) final { + bool leaveField(const CXXRecordDecl *, FieldDecl *FD) final { CurOffset -= SemaRef.getASTContext().getFieldOffset(FD) / 8; + return true; } - void enterField(const CXXRecordDecl *RD, const CXXBaseSpecifier &BS) final { + bool enterField(const CXXRecordDecl *RD, const CXXBaseSpecifier &BS) final { const ASTRecordLayout &Layout = SemaRef.getASTContext().getASTRecordLayout(RD); CurOffset += Layout.getBaseClassOffset(BS.getType()->getAsCXXRecordDecl()) .getQuantity(); + return true; } - void leaveField(const CXXRecordDecl *RD, const CXXBaseSpecifier &BS) final { + bool leaveField(const CXXRecordDecl *RD, const CXXBaseSpecifier &BS) final { const ASTRecordLayout &Layout = SemaRef.getASTContext().getASTRecordLayout(RD); CurOffset -= Layout.getBaseClassOffset(BS.getType()->getAsCXXRecordDecl()) .getQuantity(); + return true; } - void nextElement(QualType ET) final { + bool nextElement(QualType ET) final { CurOffset += SemaRef.getASTContext().getTypeSizeInChars(ET).getQuantity(); + return true; } - void leaveArray(QualType ET, int64_t Count) final { + bool leaveArray(FieldDecl *, QualType ET, int64_t Count) final { int64_t ArraySize = SemaRef.getASTContext().getTypeSizeInChars(ET).getQuantity(); if (!ET->isArrayType()) { ArraySize *= Count; } CurOffset -= ArraySize; + return true; } using SyclKernelFieldHandler::handleScalarType; using SyclKernelFieldHandler::handleSyclSamplerType; + using SyclKernelFieldHandler::leaveArray; }; } // namespace diff --git a/clang/test/CodeGenSYCL/kernel-param-acc-array.cpp b/clang/test/CodeGenSYCL/kernel-param-acc-array.cpp index c70e25b2feb72..ec8ac8bc01f5f 100644 --- a/clang/test/CodeGenSYCL/kernel-param-acc-array.cpp +++ b/clang/test/CodeGenSYCL/kernel-param-acc-array.cpp @@ -49,26 +49,30 @@ int main() { // CHECK: [[MEM_RANGE2:%[a-zA-Z0-9_.]+]] = alloca %"struct.{{.*}}.cl::sycl::range" // CHECK: [[OFFSET2:%[a-zA-Z0-9_.]+]] = alloca %"struct.{{.*}}.cl::sycl::id" -// CHECK accessor array GEP for acc[0] +// CHECK accessor array default inits // CHECK: [[ACCESSOR_ARRAY1:%[a-zA-Z0-9_]+]] = getelementptr inbounds %"class.{{.*}}.anon", %"class.{{.*}}.anon"* [[LOCAL_OBJECT]], i32 0, i32 0 -// CHECK: [[Z0:%[a-zA-Z0-9_]*]] = getelementptr inbounds{{.*}}[[ACCESSOR_ARRAY1]], i64 0, i64 0 +// CHECK: [[BEGIN:%[a-zA-Z0-9._]*]] = getelementptr inbounds [2 x [[ACCESSOR:.*]]], [2 x [[ACCESSOR]]]* [[ACCESSOR_ARRAY1]], i64 0, i64 0 +// CHECK: [[END:%[a-zA-Z0-9._]*]] = getelementptr inbounds [[ACCESSOR:.*]], [[ACCESSOR]]* [[BEGIN]], i64 2 +// CHECK: [[NEXT0:%[a-zA-Z0-9._]*]] = getelementptr inbounds [[ACCESSOR]], [[ACCESSOR]]* {{.*}}, i64 1 +// CHECK: [[ELEMENT:%[a-zA-Z0-9._]*]] = getelementptr inbounds [[ACCESSOR]], [[ACCESSOR]]* {{.*}}, i64 1 +// CHECK: [[ELEMENT:%[a-zA-Z0-9._]*]] = getelementptr inbounds [[ACCESSOR]], [[ACCESSOR]]* {{.*}}, i64 2 +// CHECK: [[NEXT1:%[a-zA-Z0-9._]*]] = getelementptr inbounds [[ACCESSOR]], [[ACCESSOR]]* {{.*}}, i64 1 + +// CHECK: [[ACCESSOR_ARRAY2:%[a-zA-Z0-9_]+]] = getelementptr inbounds %"class.{{.*}}.anon", %"class.{{.*}}.anon"* [[LOCAL_OBJECT]], i32 0, i32 0 +// CHECK: [[INDEX:%[a-zA-Z0-9._]*]] = getelementptr inbounds [2 x [[ACCESSOR]]], [2 x [[ACCESSOR]]]* [[ACCESSOR_ARRAY2]], i64 0, i64 0 // CHECK load from kernel pointer argument alloca // CHECK: [[MEM_LOAD1:%[a-zA-Z0-9_]+]] = load i32 addrspace(1)*, i32 addrspace(1)** [[MEM_ARG1]] -// CHECK acc[0] __init method call -// CHECK: [[ACC_CAST1:%[0-9]+]] = addrspacecast %"class{{.*}}accessor"* [[Z0]] to %"class{{.*}}accessor" addrspace(4)* +// CHECK: [[ACC_CAST1:%[0-9]+]] = addrspacecast [[ACCESSOR]]* {{.*}} to [[ACCESSOR]] addrspace(4)* +// CHECK acc[0] __init method call // CHECK: call spir_func void @{{.*}}__init{{.*}}(%"class.{{.*}}.cl::sycl::accessor" addrspace(4)* [[ACC_CAST1]], i32 addrspace(1)* [[MEM_LOAD1]], %"struct.{{.*}}.cl::sycl::range"* byval({{.*}}) align 4 [[ACC_RANGE1]], %"struct.{{.*}}.cl::sycl::range"* byval({{.*}}) align 4 [[MEM_RANGE1]], %"struct.{{.*}}.cl::sycl::id"* byval({{.*}}) align 4 [[OFFSET1]]) -// CHECK accessor array GEP for acc[1] -// CHECK: [[ACCESSOR_ARRAY2:%[a-zA-Z0-9_]+]] = getelementptr inbounds %"class.{{.*}}.anon", %"class.{{.*}}.anon"* [[LOCAL_OBJECT]], i32 0, i32 0 -// CHECK: [[Z1:%[a-zA-Z0-9_]*]] = getelementptr inbounds{{.*}}[[ACCESSOR_ARRAY2]], i64 0, i64 1 - // CHECK load from kernel pointer argument alloca // CHECK: [[MEM_LOAD2:%[a-zA-Z0-9_]+]] = load i32 addrspace(1)*, i32 addrspace(1)** [[MEM_ARG2]] -// CHECK acc[1] __init method call -// CHECK: [[ACC_CAST2:%[0-9]+]] = addrspacecast %"class{{.*}}accessor"* [[Z1]] to %"class{{.*}}accessor" addrspace(4)* +// CHECK: [[ACC_CAST2:%[0-9]+]] = addrspacecast [[ACCESSOR]]* {{.*}} to [[ACCESSOR]] addrspace(4)* +// CHECK acc[1] __init method call // CHECK: call spir_func void @{{.*}}__init{{.*}}(%"class.{{.*}}.cl::sycl::accessor" addrspace(4)* [[ACC_CAST2]], i32 addrspace(1)* [[MEM_LOAD2]], %"struct.{{.*}}.cl::sycl::range"* byval({{.*}}) align 4 [[ACC_RANGE2]], %"struct.{{.*}}.cl::sycl::range"* byval({{.*}}) align 4 [[MEM_RANGE2]], %"struct.{{.*}}.cl::sycl::id"* byval({{.*}}) align 4 [[OFFSET2]]) diff --git a/clang/test/SemaSYCL/array-kernel-param-neg.cpp b/clang/test/SemaSYCL/array-kernel-param-neg.cpp index b7f669ecd6671..0618014c9fb10 100755 --- a/clang/test/SemaSYCL/array-kernel-param-neg.cpp +++ b/clang/test/SemaSYCL/array-kernel-param-neg.cpp @@ -4,21 +4,12 @@ // an array of non-trivially copyable structs as SYCL kernel parameter or // a non-constant size array. -struct A { - int i; -}; - struct B { int i; B(int _i) : i(_i) {} B(const B &x) : i(x.i) {} }; -struct C : A { - const A C2; - C() : A{0}, C2{2} {} -}; - struct D { int i; ~D(); @@ -38,16 +29,12 @@ __attribute__((sycl_kernel)) void kernel_single_task(Func kernelFunc) { } void test() { - A cs1[10]; B nsl1[4] = {1, 2, 3, 4}; - C cs2[6]; D nsl2[5]; E es; kernel_single_task([=] { - int a = cs1[6].i; // expected-error@+1 {{kernel parameter has non-trivially copy constructible class/struct type}} int b = nsl1[2].i; - int c = cs2[0].i; // expected-error@+1 {{kernel parameter has non-trivially destructible class/struct type}} int d = nsl2[4].i; }); diff --git a/clang/test/SemaSYCL/array-kernel-param.cpp b/clang/test/SemaSYCL/array-kernel-param.cpp index 19d8562da1436..c8bdb390467a1 100755 --- a/clang/test/SemaSYCL/array-kernel-param.cpp +++ b/clang/test/SemaSYCL/array-kernel-param.cpp @@ -58,8 +58,18 @@ int main() { // CHECK: FunctionDecl {{.*}}kernel_B{{.*}} 'void (int, int)' // CHECK-NEXT: ParmVarDecl {{.*}} used _arg_ 'int' // CHECK-NEXT: ParmVarDecl {{.*}} used _arg_ 'int' +// Check kernel_B inits +// CHECK-NEXT: CompoundStmt +// CHECK-NEXT: DeclStmt +// CHECK-NEXT: VarDecl {{.*}} cinit +// CHECK-NEXT: InitListExpr +// CHECK-NEXT: InitListExpr {{.*}} 'int [2]' +// CHECK: ImplicitCastExpr +// CHECK: DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} '_arg_' 'int' +// CHECK: ImplicitCastExpr +// CHECK: DeclRefExpr {{.*}} 'int' lvalue ParmVar {{.*}} '_arg_' 'int' -// Correct and enable after struct mebers are extracted into separate parameters +// Correct and enable after struct members are extracted into separate parameters // C HECK kernel_C parameters // C HECK: FunctionDecl {{.*}}kernel_C{{.*}} 'void (struct {{.*}}, __global int *, cl::sycl::range<1>, cl::sycl::range<1>, cl::sycl::id<1>, __global int *, cl::sycl::range<1>, cl::sycl::range<1>, cl::sycl::id<1>, __global int *, cl::sycl::range<1>, cl::sycl::range<1>, cl::sycl::id<1>, __global int *, cl::sycl::range<1>, cl::sycl::range<1>, cl::sycl::id<1>)' // C HECK-NEXT: ParmVarDecl {{.*}} 'struct {{.*}}' diff --git a/sycl/doc/Array_Kernel_Parameters.md b/sycl/doc/Array_Kernel_Parameters.md deleted file mode 100755 index 22ec0b32c513b..0000000000000 --- a/sycl/doc/Array_Kernel_Parameters.md +++ /dev/null @@ -1,435 +0,0 @@ -

Array Parameters of SYCL Kernels

- -

Introduction

- -This document describes the changes to support passing arrays to SYCL kernels -and special treatment of Accessor arrays. -The following cases are handled: - -1. arrays of standard-layout type as top-level arguments -2. arrays of Accessors as top-level arguments -3. arrays of accessors within structs that are top-level arguments - -The motivation for this correction to kernel parameters processing is to -bring consistency to the treatment of arrays. -On the CPU, a lambda function is allowed to access an element of an array -defined outside the lambda. The implementation captures the entire array -by value. A user would naturally expect this to work in SYCL as well. -However, the current implementation flags references to arrays defined -outside a SYCL kernel as errors. - -The first few sections describe the current design. -The last three sections describe the design to support 1. to 3. above. -The implementation of this design is confined to three functions in the -file `SemaSYCL.cpp`. - -

A SYCL Kernel

- -The SYCL constructs `single_task`, `parallel_for`, and -`parallel_for_work_group` each take a function object or a lambda function - as one of their arguments. The code within the function object or -lambda function is executed on the device. -To enable execution of the kernel on OpenCL devices, the lambda/function object -is converted into the format of an OpenCL kernel. - -

SYCL Kernel Code Generation

- -Consider a source code example that captures an int, a struct and an accessor -by value: - -```C++ -constexpr size_t c_num_items = 10; -range<1> num_items{c_num_items}; // range<1>(num_items) - -int main() -{ - int output[c_num_items]; - queue myQueue; - - int i = 55; - struct S { - int m; - } s = { 66 }; - auto outBuf = buffer(&output[0], num_items); - - myQueue.submit([&](handler &cgh) { - auto outAcc = outBuf.get_access(cgh); - cgh.parallel_for(num_items, [=](cl::sycl::id<1> index) { - outAcc[index] = i + s.m; - }); - }); - - return 0; -} -``` - -The input to the code generation routines is a function object that represents -the kernel. In pseudo-code: - -```C++ -struct Capture { - Accessor outAcc; - int i; - struct S s; - () { - outAcc[index] = i + s.m; - } -} -``` - -On the CPU a call to such a lambda function would look like this: -```C++ -()(struct Capture* this); -``` - -When offloading the kernel to a device, the lambda/function object's -function operator cannot be directly called with a capture object address. -Instead, the code generated for the device is in the form of a -�kernel caller� and a �kernel callee�. -The callee is a clone of the SYCL kernel object. -The caller is generated in the form of an OpenCL kernel function. -It receives the lambda capture object in pieces, assembles the pieces -into the original lambda capture object and then calls the callee: - -```C++ -spir_kernel void caller( - __global int* AccData, // arg1 of Accessor init function - range<1> AccR1, // arg2 of Accessor init function - range<1> AccR2, // arg3 of Accessor init function - id<1> I, // arg4 of Accessor init function - int i, - struct S s -) -{ - // Local capture object - struct Capture local; - - // Reassemble capture object from parts - local.i = i; - local.s = s; - // Call accessor�s init function - Accessor::init(&local.outAcc, AccData, AccR1, AccR2, I); - - // Call the kernel body - callee(&local, id<1> wi); -} - -spir_func void callee(struct Capture* this, id<1> wi) -{ -} -``` - -As may be observed from the example above, standard-layout lambda capture -components are passed by value to the device as separate parameters. -This includes scalars, pointers, and standard-layout structs. -Certain SYCL struct types that are not standard-layout, -such as Accessors and Samplers, are treated specially. -The arguments to their init functions are passed as separate parameters -and used within the kernel caller function to initialize Accessors/Samplers -on the device by calling their init functions using the received arguments. - -There is one other aspect of code generation. An �integration header� -is generated for use during host compilation. -This header file contains entries for each kernel. -Among the items it defines is a table of sizes and offsets of the -kernel parameters. -For the source example above the integration header contains the -following snippet: - -```C++ -// array representing signatures of all kernels defined in the -// corresponding source -static constexpr -const kernel_param_desc_t kernel_signatures[] = { - //--- _ZTSZZ4mainENKUlRN2cl4sycl7handlerEE19->18clES2_E6Worker - { kernel_param_kind_t::kind_accessor, 4062, 0 }, - { kernel_param_kind_t::kind_std_layout, 4, 32 }, - { kernel_param_kind_t::kind_std_layout, 4, 36 }, -}; -``` - -Each entry in the kernel_signatures table contains three values: -1) an encoding of the type of capture object member -2) a field that encodes additional properties, and -3) an offset within a block of memory where the value of that -4) kernel argument is placed. - -The previous sections described how kernel arguments are handled today. -The next three sections describe support for arrays. - -

Fix 1: Kernel Arguments that are Standard-Layout Arrays

- -As described earlier, each variable captured by a lambda that comprises a -SYCL kernel becomes a parameter of the kernel caller function. -For arrays, simply allowing them through would result in a -function parameter of array type. This is not supported in C++. -Therefore, the array needing capture is wrapped in a struct for -the purposes of passing to the device. Once received on the device -within its wrapper, the array is copied into the local capture object. -All references to the array within the kernel body are directed to -the non-wrapped array which is a member of the local capture object. - -

Source code fragment:

- -```C++ - int array[100]; - auto outBuf = buffer(&output[0], num_items); - - myQueue.submit([&](handler &cgh) { - auto outAcc = outBuf.get_access(cgh); - cgh.parallel_for(num_items, [=](cl::sycl::id<1> index) { - outAcc[index] = array[index.get(0)]; - }); - }); -``` - -

Integration header produced:

- -```C++ -static constexpr -const kernel_param_desc_t kernel_signatures[] = { - //--- _ZTSZZ4mainENKUlRN2cl4sycl7handlerEE16->18clES2_E6Worker - { kernel_param_kind_t::kind_accessor, 4062, 0 }, - { kernel_param_kind_t::kind_std_layout, 400, 32 }, -}; -``` - -

The changes to device code made to support this extension, in pseudo-code:

- -```C++ -struct Capture { - Accessor outAcc; - int array[100]; - () { - // Body - } -} - -struct wrapper { - int array[100]; -}; -spir_kernel void caller( - __global int* AccData, // arg1 of Accessor init function - range<1> AccR1, // arg2 of Accessor init function - range<1> AccR2, // arg3 of Accessor init function - id<1> I, // arg4 of Accessor init function - struct wrapper w_s // Pass the array wrapped in a struct -) -{ - // Local capture object - struct Capture local; - - // Reassemble capture object from parts - // Initialize array using existing clang Initialization mechanisms - local.array = w_s; - // Call accessor�s init function - Accessor::init(&local.outAcc, AccData, AccR1, AccR2, I); - - callee(&local, id<1> wi); -} -``` - -The sharp-eyed reviewer of `SemaSYCL.cpp` will notice that the array -is actually double-wrapped in structs. This was done simply to preserve -the interface to an existing function (`CreateAndAddPrmDsc`) which -processes each kernel caller parameter as a capture object member. -The object being added to a list in `CreateAndAddPrmDsc` is `Fld`, -which is expected to be a field of some struct. So a wrapped struct -cannot be passed to this function. A double-wrapped struct is needed -as shown below. This does not affect the generated code. - -```C++ -struct { - struct { - int array[100]; - } -} -``` - -This could be changed but it would mean changes to the `CreateAndAddPrmDsc` -implementation, to all its callers and to the place where the list created -by it is processed. -By wrapping the array twice, the inner, single-wrapped array appears as a -member of a struct and meets the requirements of the existing code. - -

Fix 2: Kernel Arguments that are Arrays of Accessors

- -Arrays of accessors are supported in a manner similar to that of a plain -Accessor. For each accessor array element, the four values required to -call its init function are passed as separate arguments to the kernel. -Reassembly within the kernel caller is serialized by accessor array element. - -

Source code fragment:

- -```C++ - myQueue.submit([&](handler &cgh) { - using Accessor = - accessor; - Accessor inAcc[2] = {in_buffer1.get_access(cgh), - in_buffer2.get_access(cgh)}; - auto outAcc = out_buffer.get_access(cgh); - - cgh.parallel_for(num_items, [=](cl::sycl::id<1> index) { - outAcc[index] = inAcc[0][index] + inAcc[1][index]; - }); - }); -``` - -

Integration header:

- -```C++ -static constexpr -const kernel_param_desc_t kernel_signatures[] = { - //--- _ZTSZZ4mainENKUlRN2cl4sycl7handlerEE20->18clES2_E6Worker - { kernel_param_kind_t::kind_accessor, 4062, 0 }, - { kernel_param_kind_t::kind_accessor, 4062, 32 }, - { kernel_param_kind_t::kind_accessor, 4062, 64 }, -}; -``` - -

Device code generated in pseudo-code form:

- -```C++ -struct Capture { - Accessor outAcc; - Accessor inAcc[2]; - () { - // Body - } -} - -spir_kernel void caller( - __global int* outAccData, // args of OutAcc - range<1> outAccR1, - range<1> outAccR2, - id<1> outI, - __global int* inAccData_0, // args of inAcc[0] - range<1> inAccR1_0, - range<1> inAccR2_0, - id<1> inI_0, - __global int* inAccData_1, // args of inAcc[1] - range<1> inAccR1_1, - range<1> inAccR2_1, - id<1> inI_1, -) -{ - // Local capture object - struct Capture local; - - // Reassemble capture object from parts - // Call outAcc accessor�s init function - Accessor::init(&local.outAcc, outAccData, outAccR1, outAccR2, outI); - - // Call inAcc[0] accessor�s init function - Accessor::init(&local.inAcc[0], inAccData_0, inAccR1_0, inAccR2_0, inI_0); - - // Call inAcc[1] accessor�s init function - Accessor::init(&local.inAcc[1], inAccData_1, inAccR1_1, inAccR2_1, inI_1); - - callee(&local, id<1> wi); -} -``` - -

Fix 3: Accessor Arrays within Structs

- -*Individual* Accessors within structs were already supported. -Struct parameters of kernels that are structs are traversed member -by member, recursively, to enumerate member structs that are one of -the SYCL special types: Accessors and Samplers. For each special -struct encountered in the scan, arguments of their init functions -are added as separate arguments to the kernel. -However, *arrays* of accessors within structs were not supported. -Building on the support for single Accessors within structs, -the extension to arrays of Accessors/Samplers within structs -is straightforward. Each element of such arrays is treated as -an individual object, and the arguments of its init function -are added to the kernel arguments in sequence. -Within the kernel caller function, the lambda object is reassembled -in a manner similar to other instances of Accessor arrays. - - -

Source code fragment:

- -```C++ - myQueue.submit([&](handler &cgh) { - using Accessor = - accessor; - struct S { - int m; - Accessor inAcc[2]; - } s = { 55, - {in_buffer1.get_access(cgh), - in_buffer2.get_access(cgh)} - }; - auto outAcc = out_buffer.get_access(cgh); - - cgh.parallel_for(num_items, [=](cl::sycl::id<1> index) { - outAcc[index] = s.m + s.inAcc[0][index] + s.inAcc[1][index]; - }); -}); -``` - -

Integration header:

- -```C++ -static constexpr -const kernel_param_desc_t kernel_signatures[] = { - //--- _ZTSZZ4mainENKUlRN2cl4sycl7handlerEE20->18clES2_E6Worker - { kernel_param_kind_t::kind_accessor, 4062, 0 }, - { kernel_param_kind_t::kind_std_layout, 72, 32 }, - { kernel_param_kind_t::kind_accessor, 4062, 40 }, - { kernel_param_kind_t::kind_accessor, 4062, 72 }, - -}; -``` - -

Device code generated in pseudo-code form:

- -```C++ -struct Capture { - Accessor outAcc; - struct S s; - () { - // Body - } -} - -spir_kernel void caller( - __global int* outAccData, // args of OutAcc - range<1> outAccR1, - range<1> outAccR2, - id<1> outI, - struct S s, // the struct S - __global int* inAccData_0, // args of s.inAcc[0] - range<1> inAccR1_0, - range<1> inAccR2_0, - id<1> inI_0, - __global int* inAccData_1, // args of s.inAcc[1] - range<1> inAccR1_1, - range<1> inAccR2_1, - id<1> inI_1, -) -{ - // Local capture object - struct Capture local; - - // Reassemble capture object from parts - // Copy struct argument contents to local copy - // Accessor array will be initialized by calling init functions - local.s = s; - - // Call outAcc accessor�s init function - Accessor::init( - &local.outAcc, outAccData, outAccR1, outAccR2, outI); - - // Call s.inAcc[0] accessor�s init function - Accessor::init( - &local.s.inAcc[0], inAccData_0, inAccR1_0, inAccR2_0, inI_0); - - // Call s.inAcc[1] accessor�s init function - Accessor::init( - &local.s.inAcc[1], inAccData_1, inAccR1_1, inAccR2_1, inI_1); - - callee(&local, id<1> wi); -} -``` diff --git a/sycl/doc/KernelParameterPassing.md b/sycl/doc/KernelParameterPassing.md index a6f31ec114482..fc71e4b173851 100755 --- a/sycl/doc/KernelParameterPassing.md +++ b/sycl/doc/KernelParameterPassing.md @@ -175,16 +175,18 @@ As described earlier, each variable captured by a lambda that comprises a SYCL kernel becomes a parameter of the kernel caller function. For arrays, simply allowing them through would result in a function parameter of array type. This is not supported in C++. -Therefore, the array needing capture is wrapped in a struct for -the purposes of passing to the device. Once received on the device -within its wrapper, the array is copied into the local capture object. -All references to the array within the kernel body are directed to -the non-wrapped array which is a member of the local capture object. +Therefore, the array needing capture is decomposed into its elements for +the purposes of passing to the device. Each array element is passed as a +separate parameter. The array elements received on the device +are copied into the array within the local capture object.

Source code fragment:

```C++ - int array[100]; + constexpr int num_items = 2; + int array[num_items]; + int output[num_items]; + auto outBuf = buffer(&output[0], num_items); myQueue.submit([&](handler &cgh) { @@ -200,10 +202,13 @@ the non-wrapped array which is a member of the local capture object. ```C++ static constexpr const kernel_param_desc_t kernel_signatures[] = { - //--- _ZTSZZ4mainENKUlRN2cl4sycl7handlerEE16->18clES2_E6Worker + //--- _ZTSZZ1fRN2cl4sycl5queueEENK3$_0clERNS0_7handlerEE6Worker { kernel_param_kind_t::kind_accessor, 4062, 0 }, - { kernel_param_kind_t::kind_std_layout, 400, 32 }, + { kernel_param_kind_t::kind_std_layout, 4, 32 }, + { kernel_param_kind_t::kind_std_layout, 4, 36 }, + }; + ```

The changes to device code made to support this extension, in pseudo-code:

@@ -211,21 +216,19 @@ const kernel_param_desc_t kernel_signatures[] = { ```C++ struct Capture { sycl::accessor outAcc; - int array[100]; + int array[num_items]; () { // Body } } -struct wrapper { - int array[100]; -}; spir_kernel void caller( __global int* AccData, // arg1 of accessor init function range<1> AccR1, // arg2 of accessor init function range<1> AccR2, // arg3 of accessor init function id<1> I, // arg4 of accessor init function - struct wrapper w_s // Pass the array wrapped in a struct + int p_array_0; // Pass array element 0 + int p_array_1; // Pass array element 1 ) { // Local capture object @@ -233,7 +236,8 @@ spir_kernel void caller( // Reassemble capture object from parts // Initialize array using existing clang Initialization mechanisms - local.array = w_s; + local.array[0] = p_array_0; + local.array[1] = p_array_1; // Call accessor�s init function sycl::accessor::init(&local.outAcc, AccData, AccR1, AccR2, I); diff --git a/sycl/test/array_param/array-kernel-param-nested-run.cpp b/sycl/test/array_param/array-kernel-param-nested-run.cpp new file mode 100755 index 0000000000000..28b9469cda89a --- /dev/null +++ b/sycl/test/array_param/array-kernel-param-nested-run.cpp @@ -0,0 +1,135 @@ +// This test checks kernel execution with array parameters inside structs. + +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: env SYCL_DEVICE_TYPE=HOST %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out +// XFAIL: * + +#include +#include + +using namespace cl::sycl; + +constexpr size_t c_num_items = 100; +range<1> num_items{c_num_items}; // range<1>(num_items) + +// Change if tests are added/removed +static int testCount = 1; +static int passCount; + +template +static bool verify_1D(const char *name, int X, T A, T A_ref) { + int ErrCnt = 0; + + for (int i = 0; i < X; i++) { + if (A_ref[i] != A[i]) { + if (++ErrCnt < 10) { + std::cout << name << " mismatch at " << i << ". Expected " << A_ref[i] + << " result is " << A[i] << "\n"; + } + } + } + + if (ErrCnt == 0) { + return true; + } + std::cout << " Failed. Failure rate: " << ErrCnt << "/" << X << "(" + << ErrCnt / (float)X * 100.f << "%)\n"; + return false; +} + +template +void init(T &A, int value, int increment) { + for (int i = 0; i < c_num_items; i++) { + A[i] = value; + value += increment; + } +} + +bool test_accessor_array_in_struct(queue &myQueue) { + std::array input1; + std::array input2; + std::array output; + std::array ref; + init(input1, 1, 1); + init(input2, 22, 1); + init(ref, 35, 2); + + auto in_buffer1 = buffer(input1.data(), num_items); + auto in_buffer2 = buffer(input2.data(), num_items); + auto out_buffer = buffer(output.data(), num_items); + + myQueue.submit([&](handler &cgh) { + using Accessor = + accessor; + struct S { + int w; + int x; + Accessor a[2]; + int y; + int z; + } S = { + 3, 3, {in_buffer1.get_access(cgh), in_buffer2.get_access(cgh)}, 7, 7}; + auto output_accessor = out_buffer.get_access(cgh); + + cgh.parallel_for(num_items, [=](cl::sycl::id<1> index) { + S.a[0][index]++; + S.a[1][index]++; + output_accessor[index] = S.a[0][index] + S.a[1][index] + S.x + S.y; + }); + }); + const auto HostAccessor = out_buffer.get_access(); + + return verify_1D("Accessor array in struct", c_num_items, output, ref); +} + +bool run_tests() { + queue Q([](exception_list L) { + for (auto ep : L) { + try { + std::rethrow_exception(ep); + } catch (std::exception &E) { + std::cout << "*** std exception caught:\n"; + std::cout << E.what(); + } catch (cl::sycl::exception const &E1) { + std::cout << "*** SYCL exception caught:\n"; + std::cout << E1.what(); + } + } + }); + + passCount = 0; + if (test_accessor_array_in_struct(Q)) { + ++passCount; + } + + auto D = Q.get_device(); + const char *devType = D.is_host() ? "Host" : D.is_cpu() ? "CPU" : "GPU"; + std::cout << passCount << " of " << testCount << " tests passed on " + << devType << "\n"; + + return (testCount == passCount); +} + +int main(int argc, char *argv[]) { + bool passed = true; + default_selector selector{}; + auto D = selector.select_device(); + const char *devType = D.is_host() ? "Host" : D.is_cpu() ? "CPU" : "GPU"; + std::cout << "Running on device " << devType << " (" + << D.get_info() << ")\n"; + try { + passed &= run_tests(); + } catch (exception e) { + std::cout << e.what(); + } + + if (!passed) { + std::cout << "FAILED\n"; + return 1; + } + std::cout << "PASSED\n"; + return 0; +} diff --git a/sycl/test/array_param/array-kernel-param-run.cpp b/sycl/test/array_param/array-kernel-param-run.cpp index 379066340714d..5ed29a410997e 100755 --- a/sycl/test/array_param/array-kernel-param-run.cpp +++ b/sycl/test/array_param/array-kernel-param-run.cpp @@ -5,7 +5,6 @@ // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out -// XFAIL: * #include #include @@ -16,7 +15,7 @@ constexpr size_t c_num_items = 100; range<1> num_items{c_num_items}; // range<1>(num_items) // Change if tests are added/removed -static int testCount = 5; +static int testCount = 4; static int passCount; template @@ -65,7 +64,8 @@ bool test_one_array(queue &myQueue) { output_accessor[index] = input1[index] + 1; }); }); - const auto HostAccessor = out_buffer.get_access(); + const auto HostAccessor = + out_buffer.get_access(); return verify_1D("One array", c_num_items, output, ref); } @@ -88,7 +88,8 @@ bool test_two_arrays(queue &myQueue) { output_accessor[index] = input1[index] + input2[index]; }); }); - const auto HostAccessor = out_buffer.get_access(); + const auto HostAccessor = + out_buffer.get_access(); return verify_1D("Two arrays", c_num_items, output, ref); } @@ -96,29 +97,36 @@ bool test_two_arrays(queue &myQueue) { bool test_accessor_arrays_1(queue &myQueue) { std::array input1; std::array input2; + int input3[c_num_items]; + int input4[c_num_items]; std::array ref; init(input1, 1, 1); init(input2, 22, 1); - init(ref, 24, 1); + init(input3, 5, 1); + init(input4, -7, 1); + init(ref, 22, 3); auto in_buffer1 = buffer(input1.data(), num_items); auto in_buffer2 = buffer(input2.data(), num_items); myQueue.submit([&](handler &cgh) { - using Accessor = - accessor; + using Accessor = accessor; Accessor a[2] = { in_buffer1.get_access(cgh), in_buffer2.get_access(cgh), }; - cgh.parallel_for(num_items, [=](cl::sycl::id<1> index) { - a[0][index] = a[1][index] + 2; - }); + cgh.parallel_for( + num_items, [=](cl::sycl::id<1> index) { + a[0][index] = a[1][index] + input3[index] + input4[index] + 2; + }); }); - const auto HostAccessor = in_buffer1.get_access(); + const auto HostAccessor = + in_buffer1.get_access(); - return verify_1D>("Accessor arrays 1", c_num_items, input1, ref); + return verify_1D>("Accessor arrays 1", + c_num_items, input1, ref); } bool test_accessor_arrays_2(queue &myQueue) { @@ -135,58 +143,24 @@ bool test_accessor_arrays_2(queue &myQueue) { auto out_buffer = buffer(output.data(), num_items); myQueue.submit([&](handler &cgh) { - using Accessor = - accessor; + using Accessor = accessor; Accessor a[4] = {in_buffer1.get_access(cgh), in_buffer2.get_access(cgh), in_buffer1.get_access(cgh), in_buffer2.get_access(cgh)}; auto output_accessor = out_buffer.get_access(cgh); - cgh.parallel_for(num_items, [=](cl::sycl::id<1> index) { - output_accessor[index] = a[0][index] + a[3][index]; - }); + cgh.parallel_for( + num_items, [=](cl::sycl::id<1> index) { + output_accessor[index] = a[0][index] + a[3][index]; + }); }); - const auto HostAccessor = out_buffer.get_access(); + const auto HostAccessor = + out_buffer.get_access(); - return verify_1D>("Accessor arrays 2", c_num_items, output, ref); -} - -bool test_accessor_array_in_struct(queue &myQueue) { - std::array input1; - std::array input2; - std::array output; - std::array ref; - init(input1, 1, 1); - init(input2, 22, 1); - init(ref, 35, 2); - - auto in_buffer1 = buffer(input1.data(), num_items); - auto in_buffer2 = buffer(input2.data(), num_items); - auto out_buffer = buffer(output.data(), num_items); - - myQueue.submit([&](handler &cgh) { - using Accessor = - accessor; - struct S { - int w; - int x; - Accessor a[2]; - int y; - int z; - } S = { - 3, 3, {in_buffer1.get_access(cgh), in_buffer2.get_access(cgh)}, 7, 7}; - auto output_accessor = out_buffer.get_access(cgh); - - cgh.parallel_for(num_items, [=](cl::sycl::id<1> index) { - S.a[0][index]++; - S.a[1][index]++; - output_accessor[index] = S.a[0][index] + S.a[1][index] + S.x + S.y; - }); - }); - const auto HostAccessor = out_buffer.get_access(); - - return verify_1D("Accessor array in struct", c_num_items, output, ref); + return verify_1D>("Accessor arrays 2", + c_num_items, output, ref); } bool run_tests() { @@ -217,9 +191,6 @@ bool run_tests() { if (test_accessor_arrays_2(Q)) { ++passCount; } - if (test_accessor_array_in_struct(Q)) { - ++passCount; - } auto D = Q.get_device(); const char *devType = D.is_host() ? "Host" : D.is_cpu() ? "CPU" : "GPU"; From 4afc3a306aa49f950a970e6ba1ebf42fadd42429 Mon Sep 17 00:00:00 2001 From: rdeodhar Date: Mon, 29 Jun 2020 12:33:41 -0700 Subject: [PATCH 11/14] Removed one redundant check. --- clang/lib/Sema/SemaSYCL.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index c29b191181621..b7ca9aa85739e 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -1557,8 +1557,6 @@ class SyclKernelBodyCreator } bool nextElement(QualType ET) final { - if (ET->isScalarType()) - return true; ArraySubscriptExpr *LastArrayRef = dyn_cast(MemberExprBases.back()); MemberExprBases.pop_back(); From 9196a307e70850675b2de03f0a7575f38cb8e33d Mon Sep 17 00:00:00 2001 From: rdeodhar Date: Tue, 30 Jun 2020 12:23:47 -0700 Subject: [PATCH 12/14] Changed how some lit tests are run. --- clang/test/CodeGenSYCL/kernel-param-acc-array-ih.cpp | 2 +- clang/test/CodeGenSYCL/kernel-param-member-acc-array-ih.cpp | 2 +- clang/test/CodeGenSYCL/kernel-param-pod-array-ih.cpp | 4 +++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/clang/test/CodeGenSYCL/kernel-param-acc-array-ih.cpp b/clang/test/CodeGenSYCL/kernel-param-acc-array-ih.cpp index f9dfd144079e1..8c2cfb2a1bd8b 100755 --- a/clang/test/CodeGenSYCL/kernel-param-acc-array-ih.cpp +++ b/clang/test/CodeGenSYCL/kernel-param-acc-array-ih.cpp @@ -1,4 +1,4 @@ -// RUN: %clang -I %S/Inputs -fsycl-device-only -Xclang -fsycl-int-header=%t.h %s -c -o %T/kernel.spv +// RUN: %clang_cc1 -I %S/Inputs -fsycl -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -fsycl-int-header=%t.h %s -fsyntax-only // RUN: FileCheck -input-file=%t.h %s // This test checks the integration header generated when diff --git a/clang/test/CodeGenSYCL/kernel-param-member-acc-array-ih.cpp b/clang/test/CodeGenSYCL/kernel-param-member-acc-array-ih.cpp index 21726109a1be9..d53711e0c5b20 100644 --- a/clang/test/CodeGenSYCL/kernel-param-member-acc-array-ih.cpp +++ b/clang/test/CodeGenSYCL/kernel-param-member-acc-array-ih.cpp @@ -1,4 +1,4 @@ -// RUN: %clang -I %S/Inputs -fsycl-device-only -Xclang -fsycl-int-header=%t.h %s -c -o %T/kernel.spv +// RUN: %clang_cc1 -I %S/Inputs -fsycl -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -fsycl-int-header=%t.h %s -fsyntax-only // RUN: FileCheck -input-file=%t.h %s // XFAIL: * diff --git a/clang/test/CodeGenSYCL/kernel-param-pod-array-ih.cpp b/clang/test/CodeGenSYCL/kernel-param-pod-array-ih.cpp index 349d540f22ebc..49fd34d3206e5 100755 --- a/clang/test/CodeGenSYCL/kernel-param-pod-array-ih.cpp +++ b/clang/test/CodeGenSYCL/kernel-param-pod-array-ih.cpp @@ -1,4 +1,6 @@ -// RUN: %clang -I %S/Inputs -fsycl-device-only -Xclang -fsycl-int-header=%t.h %s -c -o %T/kernel.spv +// RUN: %clang_cc1 -I %S/Inputs -fsycl -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -fsycl-int-header=%t.h %s -fsyntax-only +// RUN: FileCheck -input-file=%t.h %s + // This test checks the integration header generated for a kernel // with an argument that is a POD array. From 5660269c8a946d8977b477e83a419aede7bb4f0b Mon Sep 17 00:00:00 2001 From: rdeodhar Date: Tue, 30 Jun 2020 13:38:27 -0700 Subject: [PATCH 13/14] Update clang/test/CodeGenSYCL/kernel-param-member-acc-array-ih.cpp Co-authored-by: kbobrovs --- clang/test/CodeGenSYCL/kernel-param-member-acc-array-ih.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/clang/test/CodeGenSYCL/kernel-param-member-acc-array-ih.cpp b/clang/test/CodeGenSYCL/kernel-param-member-acc-array-ih.cpp index d53711e0c5b20..972499f1e8a34 100644 --- a/clang/test/CodeGenSYCL/kernel-param-member-acc-array-ih.cpp +++ b/clang/test/CodeGenSYCL/kernel-param-member-acc-array-ih.cpp @@ -1,5 +1,6 @@ // RUN: %clang_cc1 -I %S/Inputs -fsycl -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -fsycl-int-header=%t.h %s -fsyntax-only // RUN: FileCheck -input-file=%t.h %s +// XFAIL for now due to : https://github.com/intel/llvm/issues/2018 // XFAIL: * // This test checks the integration header when kernel argument From 81ace266477cc33eed964cf3119d24e21c4bd4ac Mon Sep 17 00:00:00 2001 From: rdeodhar Date: Tue, 30 Jun 2020 13:49:42 -0700 Subject: [PATCH 14/14] Fixed formatting. --- clang/test/CodeGenSYCL/kernel-param-member-acc-array-ih.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/test/CodeGenSYCL/kernel-param-member-acc-array-ih.cpp b/clang/test/CodeGenSYCL/kernel-param-member-acc-array-ih.cpp index 972499f1e8a34..141191219b4dc 100644 --- a/clang/test/CodeGenSYCL/kernel-param-member-acc-array-ih.cpp +++ b/clang/test/CodeGenSYCL/kernel-param-member-acc-array-ih.cpp @@ -1,6 +1,6 @@ // RUN: %clang_cc1 -I %S/Inputs -fsycl -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -fsycl-int-header=%t.h %s -fsyntax-only // RUN: FileCheck -input-file=%t.h %s -// XFAIL for now due to : https://github.com/intel/llvm/issues/2018 +// XFAIL for now due to : https://github.com/intel/llvm/issues/2018 // XFAIL: * // This test checks the integration header when kernel argument