diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 881f5001e1a10..d5312019b7d5d 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -1162,6 +1162,16 @@ def SYCLRegisterNum : InheritableAttr { let PragmaAttributeSupport = 0; } +// Used to mark ESIMD kernel pointer parameters originating from accessors. +def SYCLSimdAccessorPtr : InheritableAttr { + // No spelling, as this attribute can't be created in the source code. + let Spellings = []; + let Subjects = SubjectList<[ParmVar]>; + let LangOpts = [SYCLExplicitSIMD]; + let Documentation = [SYCLSimdAccessorPtrDocs]; + let PragmaAttributeSupport = 0; +} + def SYCLScope : Attr { // No spelling, as this attribute can't be created in the source code. let Spellings = []; diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index e4631fbe3d066..3a88bc89ec67c 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -380,6 +380,18 @@ def SYCLRegisterNumDocs : Documentation { }]; } +def SYCLSimdAccessorPtrDocs : Documentation { + let Category = DocCatVariable; + let Content = [{ + The ``__attribute__((esimd_acc_ptr))`` attribute is used by FE to mark ESIMD + kernel pointer parameters which correspond to the original + lambda's captured accessors. FE turns the attribute to some metadata + required by the ESIMD Back-End. + Not supposed to be used directly in the source - SYCL device compiler FE + automatically adds it for ESIMD kernels. + }]; +} + def C11NoReturnDocs : Documentation { let Category = DocCatFunction; let Content = [{ diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index ccd3ead784c23..dcf0252bfb1fe 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -332,7 +332,8 @@ class SYCLIntegrationHeader { /// Signals that subsequent parameter descriptor additions will go to /// the kernel with given name. Starts new kernel invocation descriptor. void startKernel(StringRef KernelName, QualType KernelNameType, - StringRef KernelStableName, SourceLocation Loc); + StringRef KernelStableName, SourceLocation Loc, + bool IsESIMD); /// Adds a kernel parameter descriptor to current kernel invocation /// descriptor. @@ -375,6 +376,9 @@ class SYCLIntegrationHeader { SourceLocation KernelLocation; + /// Whether this kernel is an ESIMD one. + bool IsESIMDKernel; + /// Descriptor of kernel actual parameters. SmallVector Params; diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 604f10724733f..dd0c69072e78d 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -1487,6 +1487,10 @@ void CodeGenModule::GenOpenCLArgMetadata(llvm::Function *Fn, // MDNode for the intel_buffer_location attribute. SmallVector argSYCLBufferLocationAttr; + // MDNode for listing ESIMD kernel pointer arguments originating from + // accessors + SmallVector argESIMDAccPtrs; + if (FD && CGF) for (unsigned i = 0, e = FD->getNumParams(); i != e; ++i) { const ParmVarDecl *parm = FD->getParamDecl(i); @@ -1618,6 +1622,10 @@ void CodeGenModule::GenOpenCLArgMetadata(llvm::Function *Fn, ? llvm::ConstantAsMetadata::get(CGF->Builder.getInt32( SYCLBufferLocationAttr->getLocationID())) : llvm::ConstantAsMetadata::get(CGF->Builder.getInt32(-1))); + + if (FD->hasAttr()) + argESIMDAccPtrs.push_back(llvm::ConstantAsMetadata::get( + CGF->Builder.getInt1(parm->hasAttr()))); } if (LangOpts.SYCLIsDevice && !LangOpts.SYCLExplicitSIMD) @@ -1634,6 +1642,9 @@ void CodeGenModule::GenOpenCLArgMetadata(llvm::Function *Fn, llvm::MDNode::get(VMContext, argBaseTypeNames)); Fn->setMetadata("kernel_arg_type_qual", llvm::MDNode::get(VMContext, argTypeQuals)); + if (FD && FD->hasAttr()) + Fn->setMetadata("kernel_arg_accessor_ptr", + llvm::MDNode::get(VMContext, argESIMDAccPtrs)); if (getCodeGenOpts().EmitOpenCLArgMetadata) Fn->setMetadata("kernel_arg_name", llvm::MDNode::get(VMContext, argNames)); diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index ea17806730bf7..95b76a4024b5e 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -57,6 +57,7 @@ enum KernelInvocationKind { }; const static std::string InitMethodName = "__init"; +const static std::string InitESIMDMethodName = "__init_esimd"; const static std::string FinalizeMethodName = "__finalize"; constexpr unsigned MaxKernelArgsSize = 2048; @@ -1714,7 +1715,11 @@ class SyclKernelDeclCreator : public SyclKernelFieldHandler { bool isAccessorType = false) { const auto *RecordDecl = FieldTy->getAsCXXRecordDecl(); assert(RecordDecl && "The accessor/sampler must be a RecordDecl"); - CXXMethodDecl *InitMethod = getMethodByName(RecordDecl, InitMethodName); + const std::string &MethodName = + KernelDecl->hasAttr() && isAccessorType + ? InitESIMDMethodName + : InitMethodName; + CXXMethodDecl *InitMethod = getMethodByName(RecordDecl, MethodName); assert(InitMethod && "The accessor/sampler must have the __init method"); // Don't do -1 here because we count on this to be the first parameter added @@ -1723,9 +1728,14 @@ class SyclKernelDeclCreator : public SyclKernelFieldHandler { for (const ParmVarDecl *Param : InitMethod->parameters()) { QualType ParamTy = Param->getType(); addParam(FD, ParamTy.getCanonicalType()); - if (ParamTy.getTypePtr()->isPointerType() && isAccessorType) + if (ParamTy.getTypePtr()->isPointerType() && isAccessorType) { handleAccessorPropertyList(Params.back(), RecordDecl, FD->getLocation()); + if (KernelDecl->hasAttr()) + // In ESIMD kernels accessor's pointer argument needs to be marked + Params.back()->addAttr( + SYCLSimdAccessorPtrAttr::CreateImplicit(SemaRef.getASTContext())); + } } LastParamIndex = ParamIndex; return true; @@ -1819,7 +1829,10 @@ class SyclKernelDeclCreator : public SyclKernelFieldHandler { QualType FieldTy) final { const auto *RecordDecl = FieldTy->getAsCXXRecordDecl(); assert(RecordDecl && "The accessor/sampler must be a RecordDecl"); - CXXMethodDecl *InitMethod = getMethodByName(RecordDecl, InitMethodName); + const std::string MethodName = KernelDecl->hasAttr() + ? InitESIMDMethodName + : InitMethodName; + CXXMethodDecl *InitMethod = getMethodByName(RecordDecl, MethodName); assert(InitMethod && "The accessor/sampler must have the __init method"); // Don't do -1 here because we count on this to be the first parameter added @@ -1951,6 +1964,7 @@ class SyclKernelDeclCreator : public SyclKernelFieldHandler { class SyclKernelArgsSizeChecker : public SyclKernelFieldHandler { SourceLocation KernelLoc; unsigned SizeOfParams = 0; + bool IsSIMD = false; void addParam(QualType ArgTy) { SizeOfParams += @@ -1960,7 +1974,9 @@ class SyclKernelArgsSizeChecker : public SyclKernelFieldHandler { bool handleSpecialType(QualType FieldTy) { const CXXRecordDecl *RecordDecl = FieldTy->getAsCXXRecordDecl(); assert(RecordDecl && "The accessor/sampler must be a RecordDecl"); - CXXMethodDecl *InitMethod = getMethodByName(RecordDecl, InitMethodName); + const std::string &MethodName = + IsSIMD ? InitESIMDMethodName : InitMethodName; + CXXMethodDecl *InitMethod = getMethodByName(RecordDecl, MethodName); assert(InitMethod && "The accessor/sampler must have the __init method"); for (const ParmVarDecl *Param : InitMethod->parameters()) addParam(Param->getType()); @@ -1969,8 +1985,8 @@ class SyclKernelArgsSizeChecker : public SyclKernelFieldHandler { public: static constexpr const bool VisitInsideSimpleContainers = false; - SyclKernelArgsSizeChecker(Sema &S, SourceLocation Loc) - : SyclKernelFieldHandler(S), KernelLoc(Loc) {} + SyclKernelArgsSizeChecker(Sema &S, SourceLocation Loc, bool IsSIMD) + : SyclKernelFieldHandler(S), KernelLoc(Loc), IsSIMD(IsSIMD) {} ~SyclKernelArgsSizeChecker() { if (SizeOfParams > MaxKernelArgsSize) @@ -2044,6 +2060,19 @@ class SyclKernelArgsSizeChecker : public SyclKernelFieldHandler { using SyclKernelFieldHandler::handleSyclHalfType; }; +static const CXXMethodDecl *getOperatorParens(const CXXRecordDecl *Rec) { + for (const auto *MD : Rec->methods()) { + if (MD->getOverloadedOperator() == OO_Call) + return MD; + } + return nullptr; +} + +static bool isESIMDKernelType(const CXXRecordDecl *KernelObjType) { + const CXXMethodDecl *OpParens = getOperatorParens(KernelObjType); + return (OpParens != nullptr) && OpParens->hasAttr(); +} + class SyclKernelBodyCreator : public SyclKernelFieldHandler { SyclKernelDeclCreator &DeclCreator; llvm::SmallVector BodyStmts; @@ -2359,6 +2388,11 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { return VD; } + const std::string &getInitMethodName() const { + bool IsSIMDKernel = isESIMDKernelType(KernelObj); + return IsSIMDKernel ? InitESIMDMethodName : InitMethodName; + } + // Default inits the type, then calls the init-method in the body. bool handleSpecialType(FieldDecl *FD, QualType Ty) { addFieldInit(FD, Ty, None, @@ -2367,7 +2401,7 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { addFieldMemberExpr(FD, Ty); const auto *RecordDecl = Ty->getAsCXXRecordDecl(); - createSpecialMethodCall(RecordDecl, InitMethodName, BodyStmts); + createSpecialMethodCall(RecordDecl, getInitMethodName(), BodyStmts); removeFieldMemberExpr(FD, Ty); @@ -2377,7 +2411,7 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { bool handleSpecialType(const CXXBaseSpecifier &BS, QualType Ty) { const auto *RecordDecl = Ty->getAsCXXRecordDecl(); addBaseInit(BS, Ty, InitializationKind::CreateDefault(KernelCallerSrcLoc)); - createSpecialMethodCall(RecordDecl, InitMethodName, BodyStmts); + createSpecialMethodCall(RecordDecl, getInitMethodName(), BodyStmts); return true; } @@ -2501,7 +2535,7 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { // calls, so add them here instead. const auto *StreamDecl = Ty->getAsCXXRecordDecl(); - createSpecialMethodCall(StreamDecl, InitMethodName, BodyStmts); + createSpecialMethodCall(StreamDecl, getInitMethodName(), BodyStmts); createSpecialMethodCall(StreamDecl, FinalizeMethodName, FinalizeStmts); removeFieldMemberExpr(FD, Ty); @@ -2659,7 +2693,9 @@ class SyclKernelIntHeaderCreator : public SyclKernelFieldHandler { const CXXRecordDecl *KernelObj, QualType NameType, StringRef Name, StringRef StableName) : SyclKernelFieldHandler(S), Header(H) { - Header.startKernel(Name, NameType, StableName, KernelObj->getLocation()); + bool IsSIMDKernel = isESIMDKernelType(KernelObj); + Header.startKernel(Name, NameType, StableName, KernelObj->getLocation(), + IsSIMDKernel); } bool handleSyclAccessorType(const CXXRecordDecl *RD, @@ -3026,7 +3062,10 @@ void Sema::CheckSYCLKernelCall(FunctionDecl *KernelFunc, SourceRange CallLoc, SyclKernelDecompMarker DecompMarker(*this); SyclKernelFieldChecker FieldChecker(*this); SyclKernelUnionChecker UnionChecker(*this); - SyclKernelArgsSizeChecker ArgsSizeChecker(*this, Args[0]->getExprLoc()); + + bool IsSIMDKernel = isESIMDKernelType(KernelObj); + SyclKernelArgsSizeChecker ArgsSizeChecker(*this, Args[0]->getExprLoc(), + IsSIMDKernel); KernelObjVisitor Visitor{*this}; SYCLKernelNameTypeVisitor KernelNameTypeVisitor(*this, Args[0]->getExprLoc(), @@ -3087,6 +3126,8 @@ void Sema::ConstructOpenCLKernel(FunctionDecl *KernelCallerFunc, if (KernelObj->isInvalidDecl()) return; + bool IsSIMDKernel = isESIMDKernelType(KernelObj); + // Calculate both names, since Integration headers need both. std::string CalculatedName, StableName; std::tie(CalculatedName, StableName) = @@ -3095,7 +3136,7 @@ void Sema::ConstructOpenCLKernel(FunctionDecl *KernelCallerFunc, : CalculatedName); SyclKernelDeclCreator kernel_decl(*this, KernelName, KernelObj->getLocation(), KernelCallerFunc->isInlined(), - KernelCallerFunc->hasAttr()); + IsSIMDKernel); SyclKernelBodyCreator kernel_body(*this, kernel_decl, KernelObj, KernelCallerFunc); SyclKernelIntHeaderCreator int_header( @@ -3810,6 +3851,9 @@ void SYCLIntegrationHeader::emit(raw_ostream &O) { O << "getParamDesc(unsigned i) {\n"; O << " return kernel_signatures[i+" << CurStart << "];\n"; O << " }\n"; + O << " __SYCL_DLL_LOCAL\n"; + O << " static constexpr bool isESIMD() { return " << K.IsESIMDKernel + << "; }\n"; O << "};\n"; CurStart += N; } @@ -3839,12 +3883,14 @@ bool SYCLIntegrationHeader::emit(const StringRef &IntHeaderName) { void SYCLIntegrationHeader::startKernel(StringRef KernelName, QualType KernelNameType, StringRef KernelStableName, - SourceLocation KernelLocation) { + SourceLocation KernelLocation, + bool IsESIMDKernel) { KernelDescs.resize(KernelDescs.size() + 1); KernelDescs.back().Name = std::string(KernelName); KernelDescs.back().NameType = KernelNameType; KernelDescs.back().StableName = std::string(KernelStableName); KernelDescs.back().KernelLocation = KernelLocation; + KernelDescs.back().IsESIMDKernel = IsESIMDKernel; } void SYCLIntegrationHeader::addParamDesc(kernel_param_kind_t Kind, int Info, diff --git a/clang/test/CodeGenSYCL/Inputs/sycl.hpp b/clang/test/CodeGenSYCL/Inputs/sycl.hpp index 402663eafb595..72d1c284b39f2 100644 --- a/clang/test/CodeGenSYCL/Inputs/sycl.hpp +++ b/clang/test/CodeGenSYCL/Inputs/sycl.hpp @@ -156,6 +156,7 @@ class accessor { private: void __init(__attribute__((opencl_global)) dataT *Ptr, range AccessRange, range MemRange, id Offset) {} + void __init_esimd(__attribute__((opencl_global)) dataT *Ptr) {} }; template diff --git a/clang/test/CodeGenSYCL/esimd-accessor-ptr-md.cpp b/clang/test/CodeGenSYCL/esimd-accessor-ptr-md.cpp new file mode 100644 index 0000000000000..aee8bea09fdaa --- /dev/null +++ b/clang/test/CodeGenSYCL/esimd-accessor-ptr-md.cpp @@ -0,0 +1,44 @@ +// RUN: %clang_cc1 -fsycl -fsycl-explicit-simd -fsycl-is-device \ +// RUN: -internal-isystem %S/Inputs -triple spir64-unknown-unknown-sycldevice \ +// RUN: -disable-llvm-passes -emit-llvm %s -o - | FileCheck %s + +// This test checks +// 1) proper metadata generation for accessors used in ESIMD +// kernels: +// - Proper 'kernel_arg_accessor_ptr' metadata is generated by the FE for +// ESIMD kernels +// - Pointers originating from accessors are marked with 'buffer_t' and proper +// argument kind. +// 2) __init_esimd function is used to initialize the accessor rather than +// __init. + +#include "sycl.hpp" + +using namespace cl::sycl; + +void test(int val) { + queue q; + q.submit([&](handler &h) { + cl::sycl::accessor accessorA; + cl::sycl::accessor accessorB; + + h.single_task( + [=]() __attribute__((sycl_explicit_simd)) { + accessorA.use(val); + accessorB.use(); + }); + }); + + // --- Name + // CHECK-LABEL: define spir_kernel void @"_ZTSZZ4testiENK3$_0clERN2cl4sycl7handlerEE12esimd_kernel"( + // --- Signature + // CHECK: i32 addrspace(1)* "VCArgumentDesc"="buffer_t" "VCArgumentIOKind"="0" "VCArgumentKind"="2" %_arg_, + // CHECK: i32 "VCArgumentDesc" "VCArgumentIOKind"="0" "VCArgumentKind"="0" %_arg_1, + // CHECK: i32 addrspace(1)* "VCArgumentDesc"="buffer_t" "VCArgumentIOKind"="0" "VCArgumentKind"="2" %_arg_3) + // --- Attributes + // CHECK: {{.*}} !kernel_arg_accessor_ptr ![[ACC_PTR_ATTR:[0-9]+]] !sycl_explicit_simd !{{[0-9]+}} {{.*}}{ + // --- init_esimd call is expected instead of __init: + // CHECK: call spir_func void @{{.*}}__init_esimd{{.*}}(%"{{.*}}sycl::accessor" addrspace(4)* %{{[0-9]+}}, i32 addrspace(1)* %{{[0-9]+}}) + // CHECK-LABEL: } + // CHECK: ![[ACC_PTR_ATTR]] = !{i1 true, i1 false, i1 true} +} diff --git a/clang/test/CodeGenSYCL/int_header_esimd.cpp b/clang/test/CodeGenSYCL/int_header_esimd.cpp new file mode 100644 index 0000000000000..9bb5a3e124106 --- /dev/null +++ b/clang/test/CodeGenSYCL/int_header_esimd.cpp @@ -0,0 +1,62 @@ +// RUN: %clang_cc1 -fsycl -fsycl-explicit-simd -fsycl-is-device -internal-isystem %S/Inputs -fsycl-int-header=%t.h %s +// RUN: FileCheck -input-file=%t.h %s + +// This test checks that +// 1) New isESIMD() member is generated into the integration header +// 2) It returns 1 for ESIMD kernels and 0 - for non-ESIMD. + +#include "sycl.hpp" + +using namespace cl::sycl; + +// -- ESIMD Lambda kernel. + +void testA() { + queue q; + q.submit([&](handler &h) { + h.single_task([=]() __attribute__((sycl_explicit_simd)){}); + }); +} +// CHECK-LABEL: template <> struct KernelInfo { +// CHECK: static constexpr bool isESIMD() { return 1; } + +// -- ESIMD Functor object kernel. + +struct KernelFunctor { + void operator()() const __attribute__((sycl_explicit_simd)) {} +}; + +void testB() { + queue q; + q.submit([&](handler &h) { + h.single_task(KernelFunctor{}); + }); +} +// CHECK-LABEL: template <> struct KernelInfo<::KernelFunctor> { +// CHECK: static constexpr bool isESIMD() { return 1; } + +// -- Non-ESIMD Lambda kernel. + +void testNA() { + queue q; + q.submit([&](handler &h) { + h.single_task([=]() {}); + }); +} +// CHECK-LABEL: template <> struct KernelInfo { +// CHECK: static constexpr bool isESIMD() { return 0; } + +// -- Non-ESIMD Functor object kernel. + +struct KernelNonESIMDFunctor { + void operator()() const {} +}; + +void testNB() { + queue q; + q.submit([&](handler &h) { + h.single_task(KernelNonESIMDFunctor{}); + }); +} +// CHECK-LABEL: template <> struct KernelInfo<::KernelNonESIMDFunctor> { +// CHECK: static constexpr bool isESIMD() { return 0; } diff --git a/clang/test/CodeGenSYCL/kernel-by-reference.cpp b/clang/test/CodeGenSYCL/kernel-by-reference.cpp index 6645c52e33de3..6502cddf602d8 100644 --- a/clang/test/CodeGenSYCL/kernel-by-reference.cpp +++ b/clang/test/CodeGenSYCL/kernel-by-reference.cpp @@ -15,7 +15,7 @@ int simple_add(int i) { int main() { queue q; #if defined(SYCL2020) - // expected-warning@Inputs/sycl.hpp:285 {{Passing kernel functions by value is deprecated in SYCL 2020}} + // expected-warning@Inputs/sycl.hpp:286 {{Passing kernel functions by value is deprecated in SYCL 2020}} // expected-note@+3 {{in instantiation of function template specialization}} #endif q.submit([&](handler &h) { @@ -23,7 +23,7 @@ int main() { }); #if defined(SYCL2017) - // expected-warning@Inputs/sycl.hpp:280 {{Passing of kernel functions by reference is a SYCL 2020 extension}} + // expected-warning@Inputs/sycl.hpp:281 {{Passing of kernel functions by reference is a SYCL 2020 extension}} // expected-note@+3 {{in instantiation of function template specialization}} #endif q.submit([&](handler &h) { diff --git a/llvm/lib/SYCLLowerIR/LowerESIMD.cpp b/llvm/lib/SYCLLowerIR/LowerESIMD.cpp index 498e26b9fd53c..26863dd308543 100644 --- a/llvm/lib/SYCLLowerIR/LowerESIMD.cpp +++ b/llvm/lib/SYCLLowerIR/LowerESIMD.cpp @@ -1136,6 +1136,7 @@ void SYCLLowerESIMDLegacyPass::generateKernelMetadata(Module &M) { SmallVector ArgTypeDescs; auto *KernelArgTypes = F.getMetadata("kernel_arg_type"); + auto *KernelArgAccPtrs = F.getMetadata("kernel_arg_accessor_ptr"); unsigned Idx = 0; // Iterate argument list to gather argument kinds and generate argument @@ -1148,14 +1149,29 @@ void SYCLLowerESIMDLegacyPass::generateKernelMetadata(Module &M) { if (ArgType.find("image1d_t") != std::string::npos || ArgType.find("image2d_t") != std::string::npos || - ArgType.find("image3d_t") != std::string::npos || - ArgType.find("image1d_buffer_t") != std::string::npos) { + ArgType.find("image3d_t") != std::string::npos) { Kind = AK_SURFACE; ArgTypeDescs.push_back(MDString::get(Ctx, ArgType)); } else { StringRef ArgDesc = ""; - if (Arg.getType()->isPointerTy()) - ArgDesc = "svmptr_t"; + + if (Arg.getType()->isPointerTy()) { + const auto *IsAccMD = + KernelArgAccPtrs + ? cast(KernelArgAccPtrs->getOperand(Idx)) + : nullptr; + unsigned IsAcc = + IsAccMD + ? static_cast(cast(IsAccMD->getValue()) + ->getValue() + .getZExtValue()) + : 0; + if (IsAcc) { + ArgDesc = "buffer_t"; + Kind = AK_SURFACE; + } else + ArgDesc = "svmptr_t"; + } ArgTypeDescs.push_back(MDString::get(Ctx, ArgDesc)); } diff --git a/llvm/test/SYCLLowerIR/esimd_acc_ptr.ll b/llvm/test/SYCLLowerIR/esimd_acc_ptr.ll new file mode 100644 index 0000000000000..0a7d4c5c55c7e --- /dev/null +++ b/llvm/test/SYCLLowerIR/esimd_acc_ptr.ll @@ -0,0 +1,37 @@ +; RUN: opt -LowerESIMD -S < %s | FileCheck %s + +; This test checks that LowerESIMD pass correctly interpretes the +; 'kernel_arg_accessor_ptr' metadata. Particularly, that it generates additional +; vector of per-argument metadata (accessible from "genx.kernels" top-level +; metadata node): +; - for those arguments having non-zero in the corresponding +; 'kernel_arg_accessor_ptr' position: +; * "argument kind" metadata element is set to '2' - 'surface' +; * "argument descriptor" metadata element is set to 'buffer_t' +; - for those pointer arguments having '0' in the corresponding +; 'kernel_arg_accessor_ptr' position, the kind/descriptor is set to +; '0'/'svmptr_t' + +define weak_odr dso_local spir_kernel void @ESIMDKernel(i32 %_arg_, float addrspace(1)* %_arg_1, float addrspace(1)* %_arg_3, i32 %_arg_5, float addrspace(1)* %_arg_7) !kernel_arg_accessor_ptr !0 !sycl_explicit_simd !1 !intel_reqd_sub_group_size !2 { +; CHECK: {{.*}} spir_kernel void @ESIMDKernel({{.*}}) #[[GENX_MAIN:[0-9]+]] + ret void +} + +; kernel_arg_accessor_ptr: +; arg0= +; arg1= +; arg2= +; arg3= +; arg4= +; buffer_t and argument kind 2 (surface) metadata must be added for args 1 and 2 +!0 = !{i32 0, i32 1, i32 1, i32 0, i32 0} +!1 = !{} +!2 = !{i32 1} + +; CHECK: attributes #[[GENX_MAIN]] = { "CMGenxMain" "oclrt"="1" } +; CHECK: !genx.kernels = !{![[GENX_KERNELS:[0-9]+]]} +; CHECK: ![[GENX_KERNELS]] = !{void (i32, float addrspace(1)*, float addrspace(1)*, i32, float addrspace(1)*)* @ESIMDKernel, !"ESIMDKernel", ![[ARG_KINDS:[0-9]+]], i32 0, i32 0, ![[ARG_IO_KINDS:[0-9]+]], ![[ARG_DESCS:[0-9]+]]} +; CHECK: ![[ARG_KINDS]] = !{i32 0, i32 2, i32 2, i32 0, i32 0} +; CHECK: ![[ARG_IO_KINDS]] = !{i32 0, i32 0, i32 0, i32 0, i32 0} +; CHECK: ![[ARG_DESCS]] = !{!"", !"buffer_t", !"buffer_t", !"", !"svmptr_t"} + diff --git a/sycl/include/CL/sycl/accessor.hpp b/sycl/include/CL/sycl/accessor.hpp index 7f8e62409b661..b4d4dd91cf2cb 100755 --- a/sycl/include/CL/sycl/accessor.hpp +++ b/sycl/include/CL/sycl/accessor.hpp @@ -859,6 +859,9 @@ class accessor : detail::AccessorImplDevice impl; #ifdef __SYCL_EXPLICIT_SIMD__ + // TODO all the Image1dBuffer* stuff, including the union with MData field + // below is not used anymore and is left temporarily to avoid ABI breaking + // changes. using OCLImage1dBufferTy = typename detail::opencl_image1d_buffer_type::type; #endif // __SYCL_EXPLICIT_SIMD__ @@ -870,15 +873,9 @@ class accessor : #endif // __SYCL_EXPLICIT_SIMD__ }; -#ifdef __SYCL_EXPLICIT_SIMD__ - // TODO In ESIMD accessors usage is limited for now - access range, mem - // range and offset are not supported. The cl_mem object allocated for - // a global accessor is always wrapped into a 1d image buffer to enable - // surface index-based addressing. - void __init(OCLImage1dBufferTy ImgBuf) { ImageBuffer = ImgBuf; } + // TODO replace usages with getQualifiedPtr + const ConcreteASPtrType getNativeImageObj() const { return MData; } - const OCLImage1dBufferTy getNativeImageObj() const { return ImageBuffer; } -#else void __init(ConcreteASPtrType Ptr, range AccessRange, range MemRange, id Offset) { MData = Ptr; @@ -893,7 +890,12 @@ class accessor : if (1 == AdjustedDim) MData += Offset[0]; } -#endif // __SYCL_EXPLICIT_SIMD__ + + // __init variant used by the device compiler for ESIMD kernels. + // TODO In ESIMD accessors usage is limited for now - access range, mem + // range and offset are not supported. + void __init_esimd(ConcreteASPtrType Ptr) { MData = Ptr; } + ConcreteASPtrType getQualifiedPtr() const { return MData; } template imageAccessorInit(Image); } + // __init variant used by the device compiler for ESIMD kernels. + void __init_esimd(OCLImageTy Image) { this->imageAccessorInit(Image); } + public: // Default constructor for objects later initialized with __init member. accessor() = default; @@ -2014,6 +2019,9 @@ class accessorimageAccessorInit(Image); } + // __init variant used by the device compiler for ESIMD kernels. + void __init_esimd(OCLImageTy Image) { this->imageAccessorInit(Image); } + public: // Default constructor for objects later initialized with __init member. accessor() = default; diff --git a/sycl/include/CL/sycl/detail/accessor_impl.hpp b/sycl/include/CL/sycl/detail/accessor_impl.hpp index 792e848e636b5..51649bfcf6d6c 100644 --- a/sycl/include/CL/sycl/detail/accessor_impl.hpp +++ b/sycl/include/CL/sycl/detail/accessor_impl.hpp @@ -70,29 +70,16 @@ template class LocalAccessorBaseDevice { } }; -// TODO ESIMD Currently all accessors are treated as ESIMD under corresponding -// compiler option enabling the macro below. Eventually ESIMD kernels and usual -// kernels must co-exist and there must be a mechanism for distinguishing usual -// and ESIMD accessors. -#ifndef __SYCL_EXPLICIT_SIMD__ -constexpr bool IsESIMDAccInit = false; -#else -constexpr bool IsESIMDAccInit = true; -#endif // __SYCL_EXPLICIT_SIMD__ - class __SYCL_EXPORT AccessorImplHost { public: AccessorImplHost(id<3> Offset, range<3> AccessRange, range<3> MemoryRange, access::mode AccessMode, detail::SYCLMemObjI *SYCLMemObject, int Dims, int ElemSize, int OffsetInBytes = 0, - bool IsSubBuffer = false, bool IsESIMDAcc = IsESIMDAccInit) + bool IsSubBuffer = false) : MOffset(Offset), MAccessRange(AccessRange), MMemoryRange(MemoryRange), MAccessMode(AccessMode), MSYCLMemObj(SYCLMemObject), MDims(Dims), MElemSize(ElemSize), MOffsetInBytes(OffsetInBytes), - MIsSubBuffer(IsSubBuffer) { - MIsESIMDAcc = - IsESIMDAcc && (SYCLMemObject->getType() == SYCLMemObjI::BUFFER); - } + MIsSubBuffer(IsSubBuffer) {} ~AccessorImplHost(); @@ -101,7 +88,7 @@ class __SYCL_EXPORT AccessorImplHost { MMemoryRange(Other.MMemoryRange), MAccessMode(Other.MAccessMode), MSYCLMemObj(Other.MSYCLMemObj), MDims(Other.MDims), MElemSize(Other.MElemSize), MOffsetInBytes(Other.MOffsetInBytes), - MIsSubBuffer(Other.MIsSubBuffer), MIsESIMDAcc(Other.MIsESIMDAcc) {} + MIsSubBuffer(Other.MIsSubBuffer) {} // The resize method provides a way to change the size of the // allocated memory and corresponding properties for the accessor. @@ -133,9 +120,6 @@ class __SYCL_EXPORT AccessorImplHost { Command *MBlockedCmd = nullptr; bool PerWI = false; - - // Whether this accessor is ESIMD accessor with special memory allocation. - bool MIsESIMDAcc; }; using AccessorImplPtr = shared_ptr_class; @@ -148,8 +132,7 @@ class AccessorBaseHost { bool IsSubBuffer = false) { impl = shared_ptr_class(new AccessorImplHost( Offset, AccessRange, MemoryRange, AccessMode, SYCLMemObject, Dims, - ElemSize, OffsetInBytes, IsSubBuffer, - IsESIMDAccInit && (SYCLMemObject->getType() == SYCLMemObjI::BUFFER))); + ElemSize, OffsetInBytes, IsSubBuffer)); } protected: diff --git a/sycl/include/CL/sycl/detail/image_ocl_types.hpp b/sycl/include/CL/sycl/detail/image_ocl_types.hpp index 4727876b76169..7e641a992bf30 100644 --- a/sycl/include/CL/sycl/detail/image_ocl_types.hpp +++ b/sycl/include/CL/sycl/detail/image_ocl_types.hpp @@ -193,6 +193,9 @@ inline int getSPIRVElementSize(int ImageChannelType, int ImageChannelOrder) { } #ifdef __SYCL_EXPLICIT_SIMD__ +// TODO all the opencl_image1d_buffer* stuff below is not used anymore and is +// left temporarily to avoid ABI breaking changes - field of this type is +// temporarily present in the accessor class. template struct opencl_image1d_buffer_type; // OpenCL types used only when compiling DPCPP ESIMD kernels diff --git a/sycl/include/CL/sycl/detail/kernel_desc.hpp b/sycl/include/CL/sycl/detail/kernel_desc.hpp index a7f73d9d19f70..5c53b67f46b93 100644 --- a/sycl/include/CL/sycl/detail/kernel_desc.hpp +++ b/sycl/include/CL/sycl/detail/kernel_desc.hpp @@ -56,6 +56,7 @@ template struct KernelInfo { return Dummy; } static constexpr const char *getName() { return ""; } + static constexpr bool isESIMD() { return 0; } }; #else template struct KernelInfoData { @@ -65,6 +66,7 @@ template struct KernelInfoData { return Dummy; } static constexpr const char *getName() { return ""; } + static constexpr bool isESIMD() { return 0; } }; // C++14 like index_sequence and make_index_sequence diff --git a/sycl/include/CL/sycl/detail/memory_manager.hpp b/sycl/include/CL/sycl/detail/memory_manager.hpp index 8f17f0f7d95f1..75640e2e24b84 100644 --- a/sycl/include/CL/sycl/detail/memory_manager.hpp +++ b/sycl/include/CL/sycl/detail/memory_manager.hpp @@ -49,11 +49,13 @@ class __SYCL_EXPORT MemoryManager { RT::PiEvent &OutEvent); // Allocates memory buffer wrapped into an image. MemObj must be a buffer, - // not an image. Used in ESIMD extension to enable surface index-based access. + // not an image. + // TODO not used - remove. static void *wrapIntoImageBuffer(ContextImplPtr TargetContext, void *MemBuf, SYCLMemObjI *MemObj); // Releases the image buffer created by wrapIntoImageBuffer. + // TODO not used - remove. static void releaseImageBuffer(ContextImplPtr TargetContext, void *ImageBuf); // The following method creates OpenCL sub buffer for specified diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index 890fa1041293d..da3deeead5621 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -318,17 +318,30 @@ class __SYCL_EXPORT handler { /// Extracts and prepares kernel arguments from the lambda using integration /// header. + /// TODO replace with the version below once ABI breaking changes are allowed. void extractArgsAndReqsFromLambda(char *LambdaPtr, size_t KernelArgsNum, const detail::kernel_param_desc_t *KernelArgs); + /// Extracts and prepares kernel arguments from the lambda using integration + /// header. + void + extractArgsAndReqsFromLambda(char *LambdaPtr, size_t KernelArgsNum, + const detail::kernel_param_desc_t *KernelArgs, + bool IsESIMD); + /// Extracts and prepares kernel arguments set via set_arg(s). void extractArgsAndReqs(); + /// TODO replace with the version below once ABI breaking changes are allowed. void processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, const int Size, const size_t Index, size_t &IndexShift, bool IsKernelCreatedFromSource); + void processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, + const int Size, const size_t Index, size_t &IndexShift, + bool IsKernelCreatedFromSource, bool IsESIMD); + /// \return a string containing name of SYCL kernel. string_class getKernelName(); @@ -490,9 +503,10 @@ class __SYCL_EXPORT handler { // Empty name indicates that the compilation happens without integration // header, so don't perform things that require it. if (KI::getName() != nullptr && KI::getName()[0] != '\0') { + // TODO support ESIMD in no-integration-header case too. MArgs.clear(); extractArgsAndReqsFromLambda(MHostKernel->getPtr(), KI::getNumParams(), - &KI::getParamDesc(0)); + &KI::getParamDesc(0), KI::isESIMD()); MKernelName = KI::getName(); MOSModuleHandle = detail::OSUtil::getOSModuleHandle(KI::getName()); } else { diff --git a/sycl/source/detail/memory_manager.cpp b/sycl/source/detail/memory_manager.cpp index 2d72bb52f319b..c981316849a56 100644 --- a/sycl/source/detail/memory_manager.cpp +++ b/sycl/source/detail/memory_manager.cpp @@ -47,8 +47,8 @@ void MemoryManager::release(ContextImplPtr TargetContext, SYCLMemObjI *MemObj, void MemoryManager::releaseImageBuffer(ContextImplPtr TargetContext, void *ImageBuf) { - auto PIObj = reinterpret_cast(ImageBuf); - TargetContext->getPlugin().call(PIObj); + // TODO remove when ABI breaking changes are allowed. + throw runtime_error("Deprecated release operation", PI_INVALID_OPERATION); } void MemoryManager::releaseMemObj(ContextImplPtr TargetContext, @@ -81,28 +81,10 @@ void *MemoryManager::allocate(ContextImplPtr TargetContext, SYCLMemObjI *MemObj, OutEvent); } -// Creates an image1d buffer wrapper object around given memory object. void *MemoryManager::wrapIntoImageBuffer(ContextImplPtr TargetContext, void *MemBuf, SYCLMemObjI *MemObj) { - // Image format: 1 channel per pixel, each pixel 8 bit, Size pixels occupies - // Size bytes. - pi_image_format Format = {PI_IMAGE_CHANNEL_ORDER_R, - PI_IMAGE_CHANNEL_TYPE_UNSIGNED_INT8}; - - // Image descriptor - request wrapper image1d creation. - pi_image_desc Desc = {}; - Desc.image_type = PI_MEM_TYPE_IMAGE1D_BUFFER; - Desc.image_width = MemObj->getSize(); - Desc.buffer = reinterpret_cast(MemBuf); - - // Create the image object. - const detail::plugin &Plugin = TargetContext->getPlugin(); - pi_mem Res = nullptr; - pi_mem_flags Flags = 0; - // Do not ref count the context handle, as it is not captured by the call. - Plugin.call(TargetContext->getHandleRef(), Flags, - &Format, &Desc, nullptr, &Res); - return Res; + // TODO remove when ABI breaking changes are allowed. + throw runtime_error("Deprecated allocation operation", PI_INVALID_OPERATION); } void *MemoryManager::allocateHostMemory(SYCLMemObjI *MemObj, void *UserPtr, diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 6b310c3aa8b5a..21513a035b732 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -761,14 +761,6 @@ cl_int AllocaCommand::enqueueImp() { detail::getSyclObjImpl(MQueue->get_context()), getSYCLMemObj(), MInitFromUserData, HostPtr, std::move(EventImpls), Event); - // if this is ESIMD accessor, wrap the allocated device memory buffer into - // an image buffer object. - // TODO Address copying SYCL/ESIMD memory between contexts. - if (getRequirement()->MIsESIMDAcc) - ESIMDExt.MWrapperImage = MemoryManager::wrapIntoImageBuffer( - detail::getSyclObjImpl(MQueue->get_context()), MMemAllocation, - getSYCLMemObj()); - return CL_SUCCESS; } @@ -960,10 +952,6 @@ cl_int ReleaseCommand::enqueueImp() { MAllocaCmd->getSYCLMemObj(), MAllocaCmd->getMemAllocation(), std::move(EventImpls), Event); - // Release the wrapper object if present. - if (void *WrapperImage = MAllocaCmd->ESIMDExt.MWrapperImage) - MemoryManager::releaseImageBuffer( - detail::getSyclObjImpl(MQueue->get_context()), WrapperImage); } return CL_SUCCESS; } @@ -1670,9 +1658,7 @@ pi_result ExecCGCommand::SetKernelParamsAndLaunch( case kernel_param_kind_t::kind_accessor: { Requirement *Req = (Requirement *)(Arg.MPtr); AllocaCommandBase *AllocaCmd = getAllocaForReq(Req); - RT::PiMem MemArg = Req->MIsESIMDAcc - ? (RT::PiMem)AllocaCmd->ESIMDExt.MWrapperImage - : (RT::PiMem)AllocaCmd->getMemAllocation(); + RT::PiMem MemArg = (RT::PiMem)AllocaCmd->getMemAllocation(); if (Plugin.getBackend() == backend::opencl) { Plugin.call(Kernel, NextTrueIndex, sizeof(RT::PiMem), &MemArg); diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index c3e3c037c5481..ef5eb059d05d9 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -339,13 +339,6 @@ class AllocaCommandBase : public Command { void *MMemAllocation = nullptr; - // ESIMD-extension-specific fields. - struct { - // If this alloca corresponds to an ESIMD accessor, then this field holds - // an image buffer wrapping the memory allocation above. - void *MWrapperImage = nullptr; - } ESIMDExt; - /// Alloca command linked with current command. /// Device and host alloca commands can be linked, so they may share the same /// memory. Only one allocation from a pair can be accessed at a time. Alloca diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index d6e4beb869934..16a2a0499290d 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -602,8 +602,7 @@ AllocaCommandBase *Scheduler::GraphBuilder::getOrCreateAllocaForReq( const Requirement FullReq(/*Offset*/ {0, 0, 0}, Req->MMemoryRange, Req->MMemoryRange, access::mode::read_write, Req->MSYCLMemObj, Req->MDims, Req->MElemSize, - 0 /*ReMOffsetInBytes*/, false /*MIsSubBuffer*/, - Req->MIsESIMDAcc); + 0 /*ReMOffsetInBytes*/, false /*MIsSubBuffer*/); // Can reuse user data for the first allocation const bool InitFromUserData = Record->MAllocaCommands.empty(); diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 5a54760e813e7..b3dc32070f51e 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -130,9 +130,17 @@ void handler::associateWithHandler(detail::AccessorBaseHost *AccBase, /*index*/ 0); } +// TODO remove this one once ABI breaking changes are allowed. void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, const int Size, const size_t Index, size_t &IndexShift, bool IsKernelCreatedFromSource) { + processArg(Ptr, Kind, Size, Index, IndexShift, IsKernelCreatedFromSource, + false); +} + +void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, + const int Size, const size_t Index, size_t &IndexShift, + bool IsKernelCreatedFromSource, bool IsESIMD) { using detail::kernel_param_kind_t; switch (Kind) { @@ -162,7 +170,7 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, // TODO ESIMD currently does not suport offset, memory and access ranges - // accessor::init for ESIMD-mode accessor has a single field, translated // to a single kernel argument set above. - if (!AccImpl->MIsESIMDAcc && !IsKernelCreatedFromSource) { + if (!IsKernelCreatedFromSource && !IsESIMD) { // Dimensionality of the buffer is 1 when dimensionality of the // accessor is 0. const size_t SizeAccField = @@ -253,13 +261,21 @@ void handler::extractArgsAndReqs() { const detail::kernel_param_kind_t &Kind = UnPreparedArgs[I].MType; const int &Size = UnPreparedArgs[I].MSize; const int Index = UnPreparedArgs[I].MIndex; - processArg(Ptr, Kind, Size, Index, IndexShift, IsKernelCreatedFromSource); + processArg(Ptr, Kind, Size, Index, IndexShift, IsKernelCreatedFromSource, + false); } } +// TODO remove once ABI breaking changes are allowed void handler::extractArgsAndReqsFromLambda( char *LambdaPtr, size_t KernelArgsNum, const detail::kernel_param_desc_t *KernelArgs) { + extractArgsAndReqsFromLambda(LambdaPtr, KernelArgsNum, KernelArgs, false); +} + +void handler::extractArgsAndReqsFromLambda( + char *LambdaPtr, size_t KernelArgsNum, + const detail::kernel_param_desc_t *KernelArgs, bool IsESIMD) { const bool IsKernelCreatedFromSource = false; size_t IndexShift = 0; for (size_t I = 0; I < KernelArgsNum; ++I) { @@ -284,7 +300,8 @@ void handler::extractArgsAndReqsFromLambda( Ptr = detail::getSyclObjImpl(*LocalAccBase).get(); } } - processArg(Ptr, Kind, Size, I, IndexShift, IsKernelCreatedFromSource); + processArg(Ptr, Kind, Size, I, IndexShift, IsKernelCreatedFromSource, + IsESIMD); } } diff --git a/sycl/test/abi/sycl_symbols_linux.dump b/sycl/test/abi/sycl_symbols_linux.dump index 0c9f6da10cdcc..e562c71d1d833 100644 --- a/sycl/test/abi/sycl_symbols_linux.dump +++ b/sycl/test/abi/sycl_symbols_linux.dump @@ -3830,10 +3830,12 @@ _ZN2cl4sycl7contextC2ERKSt6vectorINS0_6deviceESaIS3_EESt8functionIFvNS0_14except _ZN2cl4sycl7contextC2ERKSt8functionIFvNS0_14exception_listEEERKNS0_13property_listE _ZN2cl4sycl7contextC2ESt10shared_ptrINS0_6detail12context_implEE _ZN2cl4sycl7handler10processArgEPvRKNS0_6detail19kernel_param_kind_tEimRmb +_ZN2cl4sycl7handler10processArgEPvRKNS0_6detail19kernel_param_kind_tEimRmbb _ZN2cl4sycl7handler13getKernelNameB5cxx11Ev _ZN2cl4sycl7handler18extractArgsAndReqsEv _ZN2cl4sycl7handler20associateWithHandlerEPNS0_6detail16AccessorBaseHostENS0_6access6targetE _ZN2cl4sycl7handler28extractArgsAndReqsFromLambdaEPcmPKNS0_6detail19kernel_param_desc_tE +_ZN2cl4sycl7handler28extractArgsAndReqsFromLambdaEPcmPKNS0_6detail19kernel_param_desc_tEb _ZN2cl4sycl7handler6memcpyEPvPKvm _ZN2cl4sycl7handler6memsetEPvim _ZN2cl4sycl7handler7barrierERKSt6vectorINS0_5eventESaIS3_EE diff --git a/sycl/test/esimd/intrins_trans.cpp b/sycl/test/esimd/intrins_trans.cpp index 3cfeeb7f57fbf..99dabc3733e22 100644 --- a/sycl/test/esimd/intrins_trans.cpp +++ b/sycl/test/esimd/intrins_trans.cpp @@ -122,22 +122,22 @@ SYCL_ESIMD_FUNCTION SYCL_EXTERNAL simd foo() { // 4-byte element gather simd v = gather(acc, offsets, 100); - // CHECK: %[[SI3:[0-9a-zA-Z_.]+]] = ptrtoint %opencl.image1d_buffer_rw_t addrspace(1)* %{{[0-9a-zA-Z_.]+}} to i32 + // CHECK: %[[SI3:[0-9a-zA-Z_.]+]] = ptrtoint i32 addrspace(1)* %{{[0-9a-zA-Z_.]+}} to i32 // CHECK: %{{[0-9a-zA-Z_.]+}} = call <8 x i32> @llvm.genx.gather.scaled2.v8i32.v8i32(i32 2, i16 0, i32 %[[SI3]], i32 %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}) // 4-byte element scatter scatter(acc, v, offsets, 100, pred); - // CHECK: %[[SI4:[0-9a-zA-Z_.]+]] = ptrtoint %opencl.image1d_buffer_rw_t addrspace(1)* %{{[0-9a-zA-Z_.]+}} to i32 + // CHECK: %[[SI4:[0-9a-zA-Z_.]+]] = ptrtoint i32 addrspace(1)* %{{[0-9a-zA-Z_.]+}} to i32 // CHECK: call void @llvm.genx.scatter.scaled.v8i1.v8i32.v8i32(<8 x i1> %{{[0-9a-zA-Z_.]+}}, i32 2, i16 0, i32 %[[SI4]], i32 %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}) // 1-byte element gather simd v1 = gather(acc, offsets, 100); - // CHECK: %[[SI5:[0-9a-zA-Z_.]+]] = ptrtoint %opencl.image1d_buffer_rw_t addrspace(1)* %{{[0-9a-zA-Z_.]+}} to i32 + // CHECK: %[[SI5:[0-9a-zA-Z_.]+]] = ptrtoint i32 addrspace(1)* %{{[0-9a-zA-Z_.]+}} to i32 // CHECK: %{{[0-9a-zA-Z_.]+}} = call <8 x i32> @llvm.genx.gather.scaled2.v8i32.v8i32(i32 0, i16 0, i32 %[[SI5]], i32 %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}) // 1-byte element scatter scatter(acc, v1, offsets, 100, pred); - // CHECK: %[[SI6:[0-9a-zA-Z_.]+]] = ptrtoint %opencl.image1d_buffer_rw_t addrspace(1)* %{{[0-9a-zA-Z_.]+}} to i32 + // CHECK: %[[SI6:[0-9a-zA-Z_.]+]] = ptrtoint i32 addrspace(1)* %{{[0-9a-zA-Z_.]+}} to i32 // CHECK: call void @llvm.genx.scatter.scaled.v8i1.v8i32.v8i32(<8 x i1> %{{[0-9a-zA-Z_.]+}}, i32 0, i16 0, i32 %[[SI6]], i32 %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}, <8 x i32> %{{[0-9a-zA-Z_.]+}}) } return d;