From 72be466e325b698508a446618226484f8700f127 Mon Sep 17 00:00:00 2001 From: Diptorup Deb Date: Thu, 22 Feb 2024 00:12:28 -0600 Subject: [PATCH 01/13] Support local_accessor kernel arguments. - Adds support in libsyclinterface:: dpctl_sycl_queue_interface for sycl::local_accessor as kernel arguments. - Refactoring to get rid of compiler warnings. --- dpctl/enum_types.py | 1 + .../helper/include/dpctl_error_handlers.h | 2 +- .../include/dpctl_sycl_enum_types.h | 1 + .../dpctl_sycl_kernel_bundle_interface.cpp | 6 +- .../source/dpctl_sycl_queue_interface.cpp | 116 +++++++++++++++++- 5 files changed, 118 insertions(+), 8 deletions(-) diff --git a/dpctl/enum_types.py b/dpctl/enum_types.py index 102ae09015..0a643db65f 100644 --- a/dpctl/enum_types.py +++ b/dpctl/enum_types.py @@ -26,6 +26,7 @@ "device_type", "backend_type", "event_status_type", + "kernel_arg_type", ] diff --git a/libsyclinterface/helper/include/dpctl_error_handlers.h b/libsyclinterface/helper/include/dpctl_error_handlers.h index 42e8efd77e..34fa517cfb 100644 --- a/libsyclinterface/helper/include/dpctl_error_handlers.h +++ b/libsyclinterface/helper/include/dpctl_error_handlers.h @@ -20,7 +20,7 @@ /// /// \file /// A functor to use for passing an error handler callback function to sycl -/// context and queue contructors. +/// context and queue constructors. //===----------------------------------------------------------------------===// #pragma once diff --git a/libsyclinterface/include/dpctl_sycl_enum_types.h b/libsyclinterface/include/dpctl_sycl_enum_types.h index 4edcce45df..c8bc0b1b20 100644 --- a/libsyclinterface/include/dpctl_sycl_enum_types.h +++ b/libsyclinterface/include/dpctl_sycl_enum_types.h @@ -98,6 +98,7 @@ typedef enum DPCTL_FLOAT32_T, DPCTL_FLOAT64_T, DPCTL_VOID_PTR, + DPCTL_LOCAL_ACCESSOR, DPCTL_UNSUPPORTED_KERNEL_ARG } DPCTLKernelArgType; diff --git a/libsyclinterface/source/dpctl_sycl_kernel_bundle_interface.cpp b/libsyclinterface/source/dpctl_sycl_kernel_bundle_interface.cpp index 4aeb9b2bb3..66aa215808 100644 --- a/libsyclinterface/source/dpctl_sycl_kernel_bundle_interface.cpp +++ b/libsyclinterface/source/dpctl_sycl_kernel_bundle_interface.cpp @@ -530,7 +530,7 @@ _GetKernel_ze_impl(const kernel_bundle &kb, else { error_handler("Kernel named " + std::string(kernel_name) + " could not be found.", - __FILE__, __func__, __LINE__); + __FILE__, __func__, __LINE__, error_level::error); return nullptr; } } @@ -541,7 +541,7 @@ bool _HasKernel_ze_impl(const kernel_bundle &kb, auto zeKernelCreateFn = get_zeKernelCreate(); if (zeKernelCreateFn == nullptr) { error_handler("Could not load zeKernelCreate function.", __FILE__, - __func__, __LINE__); + __func__, __LINE__, error_level::error); return false; } @@ -564,7 +564,7 @@ bool _HasKernel_ze_impl(const kernel_bundle &kb, if (ze_status != ZE_RESULT_ERROR_INVALID_KERNEL_NAME) { error_handler("zeKernelCreate failed: " + _GetErrorCode_ze_impl(ze_status), - __FILE__, __func__, __LINE__); + __FILE__, __func__, __LINE__, error_level::error); return false; } } diff --git a/libsyclinterface/source/dpctl_sycl_queue_interface.cpp b/libsyclinterface/source/dpctl_sycl_queue_interface.cpp index dc512fd126..04ab441332 100644 --- a/libsyclinterface/source/dpctl_sycl_queue_interface.cpp +++ b/libsyclinterface/source/dpctl_sycl_queue_interface.cpp @@ -38,6 +38,76 @@ using namespace sycl; +#define SET_LOCAL_ACCESSOR_ARG(CGH, NDIM, ARGTY, R, IDX) \ + do { \ + switch ((ARGTY)) { \ + case DPCTL_INT8_T: \ + { \ + auto la = local_accessor(R, CGH); \ + CGH.set_arg(IDX, la); \ + return true; \ + } \ + case DPCTL_UINT8_T: \ + { \ + auto la = local_accessor(R, CGH); \ + CGH.set_arg(IDX, la); \ + return true; \ + } \ + case DPCTL_INT16_T: \ + { \ + auto la = local_accessor(R, CGH); \ + CGH.set_arg(IDX, la); \ + return true; \ + } \ + case DPCTL_UINT16_T: \ + { \ + auto la = local_accessor(R, CGH); \ + CGH.set_arg(IDX, la); \ + return true; \ + } \ + case DPCTL_INT32_T: \ + { \ + auto la = local_accessor(R, CGH); \ + CGH.set_arg(IDX, la); \ + return true; \ + } \ + case DPCTL_UINT32_T: \ + { \ + auto la = local_accessor(R, CGH); \ + CGH.set_arg(IDX, la); \ + return true; \ + } \ + case DPCTL_INT64_T: \ + { \ + auto la = local_accessor(R, CGH); \ + CGH.set_arg(IDX, la); \ + return true; \ + } \ + case DPCTL_UINT64_T: \ + { \ + auto la = local_accessor(R, CGH); \ + CGH.set_arg(IDX, la); \ + return true; \ + } \ + case DPCTL_FLOAT32_T: \ + { \ + auto la = local_accessor(R, CGH); \ + CGH.set_arg(IDX, la); \ + return true; \ + } \ + case DPCTL_FLOAT64_T: \ + { \ + auto la = local_accessor(R, CGH); \ + CGH.set_arg(IDX, la); \ + return true; \ + } \ + default: \ + error_handler("Kernel argument could not be created.", __FILE__, \ + __func__, __LINE__, error_level::error); \ + return false; \ + } \ + } while (0); + namespace { static_assert(__SYCL_COMPILER_VERSION >= __SYCL_COMPILER_VERSION_REQUIRED, @@ -51,6 +121,15 @@ typedef struct complex uint64_t imag; } complexNumber; +typedef struct MDLocalAccessorTy +{ + size_t ndim; + DPCTLKernelArgType dpctl_type_id; + size_t dim0; + size_t dim1; + size_t dim2; +} MDLocalAccessor; + void set_dependent_events(handler &cgh, __dpctl_keep const DPCTLSyclEventRef *DepEvents, size_t NDepEvents) @@ -62,11 +141,39 @@ void set_dependent_events(handler &cgh, } } +bool set_local_accessor_arg(handler &cgh, + size_t idx, + const MDLocalAccessor *mdstruct) +{ + switch (mdstruct->ndim) { + case 1: + { + auto r = range<1>(mdstruct->dim0); + SET_LOCAL_ACCESSOR_ARG(cgh, 1, mdstruct->dpctl_type_id, r, idx); + } + case 2: + { + auto r = range<2>(mdstruct->dim0, mdstruct->dim1); + SET_LOCAL_ACCESSOR_ARG(cgh, 2, mdstruct->dpctl_type_id, r, idx); + } + case 3: + { + auto r = range<3>(mdstruct->dim0, mdstruct->dim1, mdstruct->dim2); + SET_LOCAL_ACCESSOR_ARG(cgh, 3, mdstruct->dpctl_type_id, r, idx); + } + default: + return false; + } +} /*! * @brief Set the kernel arg object * - * @param cgh My Param doc - * @param Arg My Param doc + * @param cgh SYCL command group handler using which a kernel is going to + * be submitted. + * @param idx The position of the argument in the list of arguments passed + * to a kernel. + * @param Arg A void* representing a kernel argument. + * @param Argty A typeid specifying the C++ type of the Arg parameter. */ bool set_kernel_arg(handler &cgh, size_t idx, @@ -109,10 +216,11 @@ bool set_kernel_arg(handler &cgh, case DPCTL_VOID_PTR: cgh.set_arg(idx, Arg); break; + case DPCTL_LOCAL_ACCESSOR: + arg_set = set_local_accessor_arg(cgh, idx, (MDLocalAccessor *)Arg); + break; default: arg_set = false; - error_handler("Kernel argument could not be created.", __FILE__, - __func__, __LINE__); break; } return arg_set; From f288a2abdbcd9bce2999274722e022ded7bc981c Mon Sep 17 00:00:00 2001 From: Diptorup Deb Date: Thu, 7 Mar 2024 22:59:55 -0600 Subject: [PATCH 02/13] Add unit tests for local accessor kernel arg. --- libsyclinterface/tests/CMakeLists.txt | 3 + .../tests/local_accessor_kernel_fp64.spv | Bin 0 -> 5696 bytes .../local_accessor_kernel_inttys_fp32.spv | Bin 0 -> 12908 bytes ...t_sycl_queue_submit_local_accessor_arg.cpp | 364 ++++++++++++++++++ 4 files changed, 367 insertions(+) create mode 100644 libsyclinterface/tests/local_accessor_kernel_fp64.spv create mode 100644 libsyclinterface/tests/local_accessor_kernel_inttys_fp32.spv create mode 100644 libsyclinterface/tests/test_sycl_queue_submit_local_accessor_arg.cpp diff --git a/libsyclinterface/tests/CMakeLists.txt b/libsyclinterface/tests/CMakeLists.txt index a0be739b2e..19eed6c06f 100644 --- a/libsyclinterface/tests/CMakeLists.txt +++ b/libsyclinterface/tests/CMakeLists.txt @@ -21,6 +21,8 @@ set(spirv-test-files multi_kernel.spv oneD_range_kernel_inttys_fp32.spv oneD_range_kernel_fp64.spv + local_accessor_kernel_inttys_fp32.spv + local_accessor_kernel_fp64.spv ) foreach(tf ${spirv-test-files}) @@ -55,6 +57,7 @@ add_sycl_to_target( ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_platform_invalid_filters.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_queue_manager.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_queue_submit.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_queue_submit_local_accessor_arg.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_queue_interface.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_usm_interface.cpp ) diff --git a/libsyclinterface/tests/local_accessor_kernel_fp64.spv b/libsyclinterface/tests/local_accessor_kernel_fp64.spv new file mode 100644 index 0000000000000000000000000000000000000000..ffc220268a8f6da9ad169350ad2820ea3e66e859 GIT binary patch literal 5696 zcmai$33n4!6vrntfZaf4b3tuE0WG2+f>nW9gi zp)87m3WEAE{0x2x&r#1o{r_I(Hq+^FoO5pPe&5~RODo3A9PcF)-UM%)_p4X>Zt})> zNs-{O0Uj6NssL99cvgUC2l(2QlHYIBJg-VI+c#|STi0z~(dOs!#cVE{mR*%M-pSXj z+T^GA=F^=;Kh@XQzYATpPD}`|&!@A?+vfEZyFBGqp-emn6TM1B^}HJe{+D^VE@acaetX;6)~*$@u5kGe9Ze`25oPQ5^$!7X?gzPH4o;bTwj(x@c4p+y)IGZ4rGm@S&2S?kL>j(_7 zJ2~2JxCV>Y;?*jq=iTdU|9>CY!xrrlK2sf?bTqp^J;cuyUFY~(x0^BXV|U2nvvU?7 zn)tW7cu5zp9KYJ_Tul5CduMUkPm2pp+}nb<@a4EQZck(4j@ng=T`7Ny4NdGBf)?F8 zcw*n+_!_s{F|mhpw*9u)+y#pbP3&7;tfY%o&O50$DtuPbyLkj(<8fa~XOFlGqwb8% zlzXkmy@|0e@7(Y$!vmihuR%D*XL$d@F6`3+*$*gjdi99tIRf6Zk-7?Rsbu}mu0)r7 zD!gUG@)~z1W6zGLtnu)^r zVTZ0>{`&XE$tEjiz3A!kSS8Fk^np07!s7+V;nxQ|eSqH>@c6*D3zIK=yt3@4hVvCjlNG2qV$bC&7P2R!=6q_;lO7vuSD3iHG_o>-M3 z9)I3$^y~+{yhjjxO^YO>2HrhBdc0VDrN?^m98Irz&;L~QlY|?U6+ZkH@UAaWkC-#R zX`EH6-BVvncY)tfbbMyZ?>0F_ZSRl_9iQ3WDVmz2ws%Q}j_}K`ox*^)M}nBdY0gK^?>^faF0ztS9F8G&b(hV_16i= z2mUWboh(d0!GCB&nD>K%c>;R;fI$DkI~_SZB;Y+j$L|(8J7b|}^qljz@`6XtZ#QQ^ zTb&P!rcUhXFZ-}afZS@$DF*Vz0&+U6e!%k^i7oQ1Y{;2Bksp!`{^Uk%-l=e%P zd0aFz!iO0Az9QotI}*s2iRNr{mQRyt&e&1AV`&4O!QLb)bX zLuu#P-GqQQgg!kf%pK#sSS{dQU65_P@H#=8V6A{2!><&q5a0_wAUSW#1_AzFqQcRf zvr@h0o!KNn$1H(;@@W^|EI?;F|CDIXrqyr3$O*9lwP7eu4uvmW@O z<7~G}j*c4G7tTlth_PK@{vD#@{5vH_hoAX(i6#a*+tajYi5)iJ=$X7l z;WOV7Z##ZWba=<#6@Oe{Il#Xocw0bxu(fG6D(BU0_FVIDI{C&yM*?c|_9k=5j zN{)_?9m5A1^WrVG9sfu)IdD#R$3GU2KXv1e9kQtZCz6?ec*j4L9DnTb#}1kK+m3%G zS$M}AwfoHHOTkG2^M2F|KTYO$%^McO`b_s|N zws{pqoHZoz1IXblkiKBuB?bzwkk3{jz!Ok&GNTBRsFY;_
yVfH+?XGt*PTLjrmVzft&fh(9CxY``;Q_p)eYoC}T5 z_0HfgoHH+0&3X_ozSGy-E?;X!H5A7 zX8@Jp3T~(fiZd+A>WYdBtD=CiDLUgmGtTmx5f{Mkd-YD$t(Vu)`^(AwpXHu=&b|Nl z>aopxJ6guEBmF~% zFPS?wetJf2H7dz+*(RGx)+~D-(meIVrTgr+blvj7#r>m0{e#V=OO9UDH$RJ*!zYS4 zqZ6}gqL{Hx%w-eBjCW#gnQ2m26UD5U&uP~3 z`@QO-9!}d6Ctq`HWMFh{^RP7ogX4>a77h+C>l<7&w03xT-}u1r&?2rOkM-8n*vma{+BB}yM-gb; zU7NsBdxIOfJ|lm2EAm%zDDum> z$S*ekOY(g6JYOgOw48Gl`MY=*dF5y16`S|PMPBtz-sw3{EAno_)yO@S^^sd_?p+Yi zNZs69=KA!U+ZDOnH3z>Vx6eZ47MuG;d9He%t5bKKcX(^B*4ldZf-Ze}=JV3pyX#p9 zKDM73!l->l=5tdK-{D->3fD}}=2Wj~^DkW@pY|nwErplyA=uXxc5y3adj8y1)HKj6 zJ$o$4&;JN!&nV7hewSX$PDYP?#8JyB3I2OxmL++_k!N|^Ue5->*5dD*+*sYE*Rl)H zV_k|b!5%e=TQT))W7qn6c6pax%dSq=i6i$l3I2OxZbS9;>i7U+g{I}1)IC| z+}5+_y7XGM9fXQ?Dc%Wt^eK**o!j<$wktTzy<3-F%MMJ|rMS19qn3#y_hD^&JzES; z&tyrLUd#I0>+0D`@SgPL_m$6I$5+c%;fp@RvDaYRUeDHo(>~5d=TFAl<++8tsr>D$ zd{$e((eYE1^W}*Smh>k|`jd%1ljqTQ&Y>O-Lw!24+;*tT zgN6!2eI>KJaLnPC-t`TYEZg8;bBY|hgY~JK!!t?sm!V%)>-guQJ0j;kEw#%w4~_B* zTPyFuh0T4eZ9UT3r&Zh5EKqku?lEj^osWHsdgZ7i>OZH@!!Pd~q06IGmj{j=$Y*Z% zbZ($~cYYIv?)j=4vn_v5V>WI^VfPNyw`O*&`Ycv#!)&Z^`eu=Fhr+fUd1irKcLuZh zJrDH(X7i|<=S*hv7^lyeo0(nXU8&!~>>72~+{)}4Xej(UBTDE$2B)aR>UB8Ig^~UMj5$`y&`MopuT*EvCUAy|Zg&t?NzR+Ex|2%Z- zY1KG6yA0huv}=q0t|{4PvG&Ho7P;<5cW>94`@TXqhx!BP?iKzA3tj&~`0dsHmC*f` zO?C5!Zmy|`?s~uV^vhGv?KFgO=>AykE1pfcascNqSNL4Yv7GVS?`b;UmU$jIT;q46 z-++hkB`rt&{&k;L-&uA%xy62O>vKe`-}u&3j@=2HK1amv47T5L>`SrfbKFR_S>{Ti^j^|{WvUW;hAcKLWHA}7t|9jO}=b;lhXhOHNQhMPMO369HE z@6val=Q4ug^W?k1Zx5fL>&QEcc^$g%cHifosro2#7UEjx_1JxnyajQ6)>F%|`z+vW zeW$#&@OgL2I@b7{zpd~E$2Xw6zr8e&cy`XmuFql5=F?`J&w1=~0XA!J5APz@za5+F z`IE0VPFs2Xh1g=fzkRX(9bnfR@BYSVi}ltoz6hJ|oYd|+;hos4-UL5QG(-sgQgt61KsbBs6qW!g+j{_EIceai7~V0XRo?r)s7SRWk!CbpF0 z^WeVs^BwGdzuT`dawN{k`|?aZPv`P|^|Bsw`@TMa68CilIK8hAqhE={eW`y4`5iPQV~XyGg0*T=Byi~IUGxaz*H!miKZdAUYgoLAh}C$L#V`My4h&Gr1r z*Bhs;y#7y8`|?aZPv`P|9iDfc z<=@otopb~?vELE%5!WtYj^CrBz@C4c>tb-#duj=GeU6Ad8eH|AbPRTVj)*-LT=kuF z9Cm$<_+~gBY>o!vcZquVP5`^6f#_3@?}w#ed(`jUt5^T`YasFMu@q4-8_mSi3`p-h#LmYj+18kqhd%i1~FG93OufffC z7QW!-26SuG9vrt8a|OqvFZYf-7lXZ%$a4u;TgvfE(ao*Tb=LJRM7y=i$9E%g(p=t= zx-n6A+|hfm^&(}CUxv+*a(pAYe3t{4Bc9iLnIqQ+!70Z-gkHt*E3oTx1aGedS8@Eq z*!4Lg_9Ngbj(-%pK1ba5$H3-jAaY7Qd>;q9rh({FkGsDLY>)aqKlLh(e**jFIQ~ie zDaSvBZvPF$J=Nv-`UvSf!tWkS#+*-^P9FM--JMvr)_D&+t4Pb34$8SV8 zx4z)`O^H$x9KQ>@K1ba5x4`CTAaY7QeBTDU zrh({F_x$YdJBU5%_x#lNLaeWjM9<#^&qgMCkKc_i<@i15cOnhMJ=Nv-mzX_I=ivB# zU^(s?--~=1364JimgD;FNAyKs-vjHv6OrTU`oDs>hdBEDKG;5u_k8zceh|?fy#^Pp z!J703r`_AN!RhG3y@JyZfxUys^DtQ3!ASf&oJYXs)@N<vP0?{t9f41|nzF!}n{jYZ{0?^|;&LfbCJg=ciu9;ooAPjZDPh-{DI+ z{O{=Y-$2|`T@F9V?0Gr|hyN2i7x9e$1NjLO9R4q`9M<=HL|^pv-(dajBZt-X{}gc# zarF5=VEZ&aIQ+k0?VfKP3GP{gHR%ryySHnD!_kL(1&9A1>>Wg&KY+ER9DW+z-1>sU ze+0`p*Lnx)#zkGhyFX#;Mamq02AktAh`imB`Pl;hnfW=y^T>KS9Ik^?4o^a_;_zhb z`W(T_DcaDjuYm+Fx5Tc`5wTOzC!?2Rx5BQ^5%)O_-5d>Uaz;IT)4{F@pStH~e>1@L zsNeHb_n+t8)3b>4+Xma_IQ%^PDTkkrZvPE(SdY3K?qT*korA;Mg5_|=`~qZ)!e}E>Qx+n752?>d_Vjt$6u|E#GdMMd^WS^ z=^PwC04&En=%|-L-on?jepo4+7h#@xk$f!P z!OcUk=?iXJlQn7&j$4blg5%Mbdq=;`Ier+rx%CCd4+m=xjvoP*ljib{ z)QyR{gO~HM^>S8ajvs~Hv4~k-FJxX^;3JurAfDIJ%#rJOaLVx$(5pDU6uUl0aQsB{ zDvrMnyS_B`B=jndpNw6fBkue4=;ml(ljG{)I|b~T@Tq%#_ID~`kNQ17_0tf~uZ~2| zZ@{)Wj=vFq%JDa$+kfn-F2|2y_B@?~<8KDbanHDk99#ITVHsGC>+36g(bsZx{q7^j z)m?iW;vVAY^K`I%8t+{!VD3k>N3X-=(7pn(rt>)u^_7UR+J}hQlX|REdvM;`%pIJM zKCLA%sRX`hDR03w<5_2f*J$T%+z@ z-$z`d{vh)Y5ZBm?vDRW8{)TBY-kMUIYpgALF86qC?(4~}VU8X@Q()J87HnTXWRAWb z0;hdFT +#include +#include +#include +#include + +namespace +{ +constexpr size_t SIZE = 100; + +using namespace dpctl::syclinterface; + +typedef struct MDLocalAccessorTy +{ + size_t ndim; + DPCTLKernelArgType dpctl_type_id; + size_t dim0; + size_t dim1; + size_t dim2; +} MDLocalAccessor; + +template +void submit_kernel(DPCTLSyclQueueRef QRef, + DPCTLSyclKernelBundleRef KBRef, + std::vector spirvBuffer, + size_t spirvFileSize, + DPCTLKernelArgType kernelArgTy, + std::string kernelName) +{ + constexpr size_t NARGS = 2; + constexpr size_t RANGE_NDIMS = 1; + + ASSERT_TRUE(DPCTLKernelBundle_HasKernel(KBRef, kernelName.c_str())); + auto kernel = DPCTLKernelBundle_GetKernel(KBRef, kernelName.c_str()); + + // Create the input args + auto a = DPCTLmalloc_shared(SIZE * sizeof(T), QRef); + ASSERT_TRUE(a != nullptr); + auto a_ptr = static_cast(unwrap(a)); + for (auto i = 0ul; i < SIZE; ++i) { + a_ptr[i] = 0; + } + + auto la = MDLocalAccessor{1, kernelArgTy, SIZE / 10, 1, 1}; + + // Create kernel args for vector_add + size_t gRange[] = {SIZE}; + size_t lRange[] = {SIZE / 10}; + void *args[NARGS] = {unwrap(a), (void *)&la}; + DPCTLKernelArgType addKernelArgTypes[] = {DPCTL_VOID_PTR, + DPCTL_LOCAL_ACCESSOR}; + + auto ERef = + DPCTLQueue_SubmitNDRange(kernel, QRef, args, addKernelArgTypes, NARGS, + gRange, lRange, RANGE_NDIMS, nullptr, 0); + ASSERT_TRUE(ERef != nullptr); + DPCTLQueue_Wait(QRef); + + if (kernelArgTy != DPCTL_FLOAT32_T && kernelArgTy != DPCTL_FLOAT64_T) + ASSERT_TRUE(a_ptr[0] == 20); + else + ASSERT_TRUE(a_ptr[0] == 20.0); + + // clean ups + DPCTLEvent_Delete(ERef); + DPCTLKernel_Delete(kernel); + DPCTLfree_with_queue((DPCTLSyclUSMRef)a, QRef); +} + +} /* end of anonymous namespace */ + +/* +// The local_accessor_kernel spv files were generated from the SYCL program +// included in this comment. The program can be compiled using +// `icpx -fsycl local_accessor_kernel.cpp`. After that if the generated +// executable is run with the environment variable `SYCL_DUMP_IMAGES=1`, icpx +// runtime will dump all offload sections of fat binary to the current working +// directory. When tested with DPC++ 2024.0 the kernels are split across two +// separate SPV files. One contains all kernels for integers and FP32 +// data type, and another contains the kernel for FP64. +// +// Note that, `SYCL_DUMP_IMAGES=1` will also generate extra SPV files that +// contain the code for built in functions such as indexing and barriers. To +// figure which SPV file contains the kernels, use `spirv-dis` from the +// spirv-tools package to translate the SPV binary format to a human-readable +// textual format. +#include +#include +#include + +template +class SyclKernel_SLM +{ +private: + T N_; + T *a_ = nullptr; + sycl::local_accessor slm_; + +public: + SyclKernel_SLM(T *a, sycl::local_accessor slm) + : a_(a), slm_(slm) + { + } + + void operator()(sycl::nd_item<1> it) const + { + int i = it.get_global_id(); + int j = it.get_local_id(); + slm_[j] = 2; + auto g = it.get_group(); + group_barrier(g); + auto temp = 0; + for (auto idx = 0ul; idx < it.get_local_range(0); ++idx) + temp += slm_[idx]; + a_[i] = temp * (i + 1); + } +}; + +template +void submit_kernel(sycl::queue q, const unsigned long N, T *a) +{ + q.submit([&](auto &h) + { + sycl::local_accessor slm(sycl::range(N/10), h); + h.parallel_for(sycl::nd_range(sycl::range{N}, sycl::range{N/10}), + SyclKernel_SLM(a, slm)); }); +} + +template +void driver(size_t N) +{ + sycl::queue q; + auto *a = sycl::malloc_shared(N, q); + submit_kernel(q, N, a); + q.wait(); + sycl::free(a, q); +} + +int main(int argc, const char **argv) +{ + size_t N = 0; + std::cout << "Enter problem size in N:\n"; + std::cin >> N; + std::cout << "Executing with N = " << N << std::endl; + + driver(N); + driver(N); + driver(N); + driver(N); + driver(N); + driver(N); + driver(N); + driver(N); + driver(N); + driver(N); + + return 0; +} + +*/ + +struct TestQueueSubmitWithLocalAccessor : public ::testing::Test +{ + std::ifstream spirvFile; + size_t spirvFileSize_; + std::vector spirvBuffer_; + DPCTLSyclQueueRef QRef = nullptr; + DPCTLSyclKernelBundleRef KBRef = nullptr; + + TestQueueSubmitWithLocalAccessor() + { + DPCTLSyclDeviceSelectorRef DSRef = nullptr; + DPCTLSyclDeviceRef DRef = nullptr; + + spirvFile.open("./local_accessor_kernel_inttys_fp32.spv", + std::ios::binary | std::ios::ate); + spirvFileSize_ = std::filesystem::file_size( + "./local_accessor_kernel_inttys_fp32.spv"); + spirvBuffer_.reserve(spirvFileSize_); + spirvFile.seekg(0, std::ios::beg); + spirvFile.read(spirvBuffer_.data(), spirvFileSize_); + + DSRef = DPCTLDefaultSelector_Create(); + DRef = DPCTLDevice_CreateFromSelector(DSRef); + QRef = + DPCTLQueue_CreateForDevice(DRef, nullptr, DPCTL_DEFAULT_PROPERTY); + auto CRef = DPCTLQueue_GetContext(QRef); + + KBRef = DPCTLKernelBundle_CreateFromSpirv( + CRef, DRef, spirvBuffer_.data(), spirvFileSize_, nullptr); + DPCTLDevice_Delete(DRef); + DPCTLDeviceSelector_Delete(DSRef); + } + + ~TestQueueSubmitWithLocalAccessor() + { + spirvFile.close(); + DPCTLQueue_Delete(QRef); + DPCTLKernelBundle_Delete(KBRef); + } +}; + +struct TestQueueSubmitWithLocalAccessorFP64 : public ::testing::Test +{ + std::ifstream spirvFile; + size_t spirvFileSize_; + std::vector spirvBuffer_; + DPCTLSyclQueueRef QRef = nullptr; + DPCTLSyclKernelBundleRef KBRef = nullptr; + + TestQueueSubmitWithLocalAccessorFP64() + { + DPCTLSyclDeviceSelectorRef DSRef = nullptr; + DPCTLSyclDeviceRef DRef = nullptr; + + spirvFile.open("./local_accessor_kernel_fp64.spv", + std::ios::binary | std::ios::ate); + spirvFileSize_ = + std::filesystem::file_size("./local_accessor_kernel_fp64.spv"); + spirvBuffer_.reserve(spirvFileSize_); + spirvFile.seekg(0, std::ios::beg); + spirvFile.read(spirvBuffer_.data(), spirvFileSize_); + DSRef = DPCTLDefaultSelector_Create(); + DRef = DPCTLDevice_CreateFromSelector(DSRef); + QRef = + DPCTLQueue_CreateForDevice(DRef, nullptr, DPCTL_DEFAULT_PROPERTY); + auto CRef = DPCTLQueue_GetContext(QRef); + + KBRef = DPCTLKernelBundle_CreateFromSpirv( + CRef, DRef, spirvBuffer_.data(), spirvFileSize_, nullptr); + DPCTLDevice_Delete(DRef); + DPCTLDeviceSelector_Delete(DSRef); + } + + ~TestQueueSubmitWithLocalAccessorFP64() + { + spirvFile.close(); + DPCTLQueue_Delete(QRef); + DPCTLKernelBundle_Delete(KBRef); + } +}; + +TEST_F(TestQueueSubmitWithLocalAccessor, CheckForInt8) +{ + submit_kernel(QRef, KBRef, spirvBuffer_, spirvFileSize_, + DPCTLKernelArgType::DPCTL_INT8_T, + "_ZTS14SyclKernel_SLMIaE"); +} + +TEST_F(TestQueueSubmitWithLocalAccessor, CheckForUInt8) +{ + submit_kernel(QRef, KBRef, spirvBuffer_, spirvFileSize_, + DPCTLKernelArgType::DPCTL_UINT8_T, + "_ZTS14SyclKernel_SLMIhE"); +} + +TEST_F(TestQueueSubmitWithLocalAccessor, CheckForInt16) +{ + submit_kernel(QRef, KBRef, spirvBuffer_, spirvFileSize_, + DPCTLKernelArgType::DPCTL_INT16_T, + "_ZTS14SyclKernel_SLMIsE"); +} + +TEST_F(TestQueueSubmitWithLocalAccessor, CheckForUInt16) +{ + submit_kernel(QRef, KBRef, spirvBuffer_, spirvFileSize_, + DPCTLKernelArgType::DPCTL_UINT16_T, + "_ZTS14SyclKernel_SLMItE"); +} + +TEST_F(TestQueueSubmitWithLocalAccessor, CheckForInt32) +{ + submit_kernel(QRef, KBRef, spirvBuffer_, spirvFileSize_, + DPCTLKernelArgType::DPCTL_INT32_T, + "_ZTS14SyclKernel_SLMIiE"); +} + +TEST_F(TestQueueSubmitWithLocalAccessor, CheckForUInt32) +{ + submit_kernel(QRef, KBRef, spirvBuffer_, spirvFileSize_, + DPCTLKernelArgType::DPCTL_UINT32_T, + "_ZTS14SyclKernel_SLMIjE"); +} + +TEST_F(TestQueueSubmitWithLocalAccessor, CheckForInt64) +{ + submit_kernel(QRef, KBRef, spirvBuffer_, spirvFileSize_, + DPCTLKernelArgType::DPCTL_INT64_T, + "_ZTS14SyclKernel_SLMIlE"); +} + +TEST_F(TestQueueSubmitWithLocalAccessor, CheckForUInt64) +{ + submit_kernel(QRef, KBRef, spirvBuffer_, spirvFileSize_, + DPCTLKernelArgType::DPCTL_UINT64_T, + "_ZTS14SyclKernel_SLMImE"); +} + +TEST_F(TestQueueSubmitWithLocalAccessor, CheckForFloat) +{ + submit_kernel(QRef, KBRef, spirvBuffer_, spirvFileSize_, + DPCTLKernelArgType::DPCTL_FLOAT32_T, + "_ZTS14SyclKernel_SLMIfE"); +} + +TEST_F(TestQueueSubmitWithLocalAccessorFP64, CheckForDouble) +{ + submit_kernel(QRef, KBRef, spirvBuffer_, spirvFileSize_, + DPCTLKernelArgType::DPCTL_FLOAT64_T, + "_ZTS14SyclKernel_SLMIdE"); +} + +TEST_F(TestQueueSubmitWithLocalAccessor, CheckForUnsupportedArgTy) +{ + size_t gRange[] = {SIZE}; + size_t lRange[] = {SIZE / 10}; + size_t RANGE_NDIMS = 1; + constexpr size_t NARGS = 2; + + auto la = MDLocalAccessor{1, DPCTL_UNSUPPORTED_KERNEL_ARG, SIZE / 10, 1, 1}; + auto kernel = DPCTLKernelBundle_GetKernel(KBRef, "_ZTS14SyclKernel_SLMImE"); + void *args[NARGS] = {unwrap(nullptr), (void *)&la}; + DPCTLKernelArgType addKernelArgTypes[] = {DPCTL_VOID_PTR, + DPCTL_LOCAL_ACCESSOR}; + auto ERef = + DPCTLQueue_SubmitNDRange(kernel, QRef, args, addKernelArgTypes, NARGS, + gRange, lRange, RANGE_NDIMS, nullptr, 0); + + ASSERT_TRUE(ERef == nullptr); +} From 4736de1704e5ed74b56ac8383cea27985ce6c93a Mon Sep 17 00:00:00 2001 From: Diptorup Deb Date: Fri, 8 Mar 2024 14:02:00 -0600 Subject: [PATCH 03/13] Remove leftover class from enum_types.__all__ --- dpctl/enum_types.py | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/dpctl/enum_types.py b/dpctl/enum_types.py index 0a643db65f..017497eb37 100644 --- a/dpctl/enum_types.py +++ b/dpctl/enum_types.py @@ -22,12 +22,7 @@ """ from enum import Enum, auto -__all__ = [ - "device_type", - "backend_type", - "event_status_type", - "kernel_arg_type", -] +__all__ = ["device_type", "backend_type", "event_status_type"] class device_type(Enum): From a3d87a52e780830576b6169a910e0116209e15c6 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Fri, 8 Mar 2024 12:42:08 -0800 Subject: [PATCH 04/13] Add local_accessor kernel_arg_type --- dpctl/_backend.pxd | 3 ++- dpctl/_sycl_queue.pyx | 9 +++++++++ dpctl/tests/test_sycl_kernel_submit.py | 1 + 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/dpctl/_backend.pxd b/dpctl/_backend.pxd index e8627e8241..6cbf1500ee 100644 --- a/dpctl/_backend.pxd +++ b/dpctl/_backend.pxd @@ -67,7 +67,8 @@ cdef extern from "syclinterface/dpctl_sycl_enum_types.h": _UINT64_T 'DPCTL_UINT64_T', _FLOAT 'DPCTL_FLOAT32_T', _DOUBLE 'DPCTL_FLOAT64_T', - _VOID_PTR 'DPCTL_VOID_PTR' + _VOID_PTR 'DPCTL_VOID_PTR', + _LOCAL_ACCESSOR 'DPCTL_LOCAL_ACCESSOR' ctypedef enum _queue_property_type 'DPCTLQueuePropertyType': _DEFAULT_PROPERTY 'DPCTL_DEFAULT_PROPERTY' diff --git a/dpctl/_sycl_queue.pyx b/dpctl/_sycl_queue.pyx index 0dec0990c3..75135c6fc6 100644 --- a/dpctl/_sycl_queue.pyx +++ b/dpctl/_sycl_queue.pyx @@ -233,6 +233,15 @@ cdef class _kernel_arg_type: _arg_data_type._VOID_PTR ) + @property + def dpctl_local_accessor(self): + cdef str p_name = "dpctl_local_accessor" + return kernel_arg_type_attribute( + self._name, + p_name, + _arg_data_type._LOCAL_ACCESSOR + ) + kernel_arg_type = _kernel_arg_type() diff --git a/dpctl/tests/test_sycl_kernel_submit.py b/dpctl/tests/test_sycl_kernel_submit.py index 01558dd4df..04a25335ac 100644 --- a/dpctl/tests/test_sycl_kernel_submit.py +++ b/dpctl/tests/test_sycl_kernel_submit.py @@ -274,3 +274,4 @@ def test_kernel_arg_type(): _check_kernel_arg_type_instance(kernel_arg_type.dpctl_float32) _check_kernel_arg_type_instance(kernel_arg_type.dpctl_float64) _check_kernel_arg_type_instance(kernel_arg_type.dpctl_void_ptr) + _check_kernel_arg_type_instance(kernel_arg_type.dpctl_local_accessor) From ae1884be423ef1a630569f84eae66cdaa1b478d5 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Sat, 9 Mar 2024 19:05:37 -0600 Subject: [PATCH 05/13] Improve coverage of dpctl_sycl_queue_interface Test submit for 1D range, 2D range, and 3D range, and also specify dependent events. --- .../tests/test_sycl_queue_submit.cpp | 34 +++++++++++++++---- 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/libsyclinterface/tests/test_sycl_queue_submit.cpp b/libsyclinterface/tests/test_sycl_queue_submit.cpp index cc9bc836ce..8330d6da1e 100644 --- a/libsyclinterface/tests/test_sycl_queue_submit.cpp +++ b/libsyclinterface/tests/test_sycl_queue_submit.cpp @@ -56,7 +56,9 @@ void submit_kernel(DPCTLSyclQueueRef QRef, { T scalarVal = 3; constexpr size_t NARGS = 4; - constexpr size_t RANGE_NDIMS = 1; + constexpr size_t RANGE_NDIMS_1 = 1; + constexpr size_t RANGE_NDIMS_2 = 2; + constexpr size_t RANGE_NDIMS_3 = 3; ASSERT_TRUE(DPCTLKernelBundle_HasKernel(KBRef, kernelName.c_str())); auto kernel = DPCTLKernelBundle_GetKernel(KBRef, kernelName.c_str()); @@ -75,13 +77,33 @@ void submit_kernel(DPCTLSyclQueueRef QRef, (void *)&scalarVal}; DPCTLKernelArgType addKernelArgTypes[] = {DPCTL_VOID_PTR, DPCTL_VOID_PTR, DPCTL_VOID_PTR, kernelArgTy}; - auto ERef = DPCTLQueue_SubmitRange(kernel, QRef, args, addKernelArgTypes, - NARGS, Range, RANGE_NDIMS, nullptr, 0); - ASSERT_TRUE(ERef != nullptr); - DPCTLQueue_Wait(QRef); + auto E1Ref = + DPCTLQueue_SubmitRange(kernel, QRef, args, addKernelArgTypes, NARGS, + Range, RANGE_NDIMS_1, nullptr, 0); + ASSERT_TRUE(E1Ref != nullptr); + + // Create kernel args for vector_add + size_t Range2D[] = {SIZE, 1}; + DPCTLSyclEventRef DepEvs[] = {E1Ref}; + auto E2Ref = + DPCTLQueue_SubmitRange(kernel, QRef, args, addKernelArgTypes, NARGS, + Range2D, RANGE_NDIMS_2, DepEvs, 1); + ASSERT_TRUE(E2Ref != nullptr); + + // Create kernel args for vector_add + size_t Range3D[] = {SIZE, 1, 1}; + DPCTLSyclEventRef DepEvs2[] = {E1Ref, E2Ref}; + auto E3Ref = + DPCTLQueue_SubmitRange(kernel, QRef, args, addKernelArgTypes, NARGS, + Range3D, RANGE_NDIMS_3, DepEvs2, 2); + ASSERT_TRUE(E3Ref != nullptr); + + DPCTLEvent_Wait(E3Ref); // clean ups - DPCTLEvent_Delete(ERef); + DPCTLEvent_Delete(E1Ref); + DPCTLEvent_Delete(E2Ref); + DPCTLEvent_Delete(E3Ref); DPCTLKernel_Delete(kernel); DPCTLfree_with_queue((DPCTLSyclUSMRef)a, QRef); DPCTLfree_with_queue((DPCTLSyclUSMRef)b, QRef); From 78d55c87405a55fa836231c31c9acce4e1610372 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Sat, 9 Mar 2024 19:06:44 -0600 Subject: [PATCH 06/13] host is no longer a valid string selector --- libsyclinterface/tests/test_sycl_device_aspects.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libsyclinterface/tests/test_sycl_device_aspects.cpp b/libsyclinterface/tests/test_sycl_device_aspects.cpp index 61f0eaba8b..093c6730df 100644 --- a/libsyclinterface/tests/test_sycl_device_aspects.cpp +++ b/libsyclinterface/tests/test_sycl_device_aspects.cpp @@ -97,7 +97,7 @@ auto build_gtest_values(const std::array, N> ¶ms) auto build_params() { constexpr auto param_1 = get_param_list( - "opencl:gpu", "opencl:cpu", "level_zero:gpu", "host"); + "opencl:gpu", "opencl:cpu", "level_zero:gpu"); constexpr auto param_2 = get_param_list>( From 24574220e092510112b7de2a93390033f5dad08e Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 11 Mar 2024 03:58:29 -0500 Subject: [PATCH 07/13] Add test for DPCTLQueue_Create for some nullptr args --- .../tests/test_sycl_queue_interface.cpp | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/libsyclinterface/tests/test_sycl_queue_interface.cpp b/libsyclinterface/tests/test_sycl_queue_interface.cpp index cddc007f4d..1c7fe55561 100644 --- a/libsyclinterface/tests/test_sycl_queue_interface.cpp +++ b/libsyclinterface/tests/test_sycl_queue_interface.cpp @@ -90,6 +90,34 @@ struct TestDPCTLQueueMemberFunctions } /* End of anonymous namespace */ +TEST(TestDPCTLSyclQueueInterface, CheckCreate) +{ + /* We are testing that we do not crash even when input is NULL. */ + DPCTLSyclQueueRef QRef = nullptr; + + EXPECT_NO_FATAL_FAILURE( + QRef = DPCTLQueue_Create(nullptr, nullptr, nullptr, 0)); + ASSERT_TRUE(QRef == nullptr); +} + +TEST(TestDPCTLSyclQueueInterface, CheckCreate2) +{ + /* We are testing that we do not crash even when input is NULL. */ + DPCTLSyclQueueRef QRef = nullptr; + DPCTLSyclDeviceSelectorRef DSRef = nullptr; + DPCTLSyclDeviceRef DRef = nullptr; + + EXPECT_NO_FATAL_FAILURE(DSRef = DPCTLDefaultSelector_Create()); + EXPECT_NO_FATAL_FAILURE(DRef = DPCTLDevice_CreateFromSelector(DSRef)); + EXPECT_NO_FATAL_FAILURE(DPCTLDeviceSelector_Delete(DSRef)); + + EXPECT_NO_FATAL_FAILURE(QRef = + DPCTLQueue_Create(nullptr, DRef, nullptr, 0)); + ASSERT_TRUE(QRef == nullptr); + + EXPECT_NO_FATAL_FAILURE(DPCTLDevice_Delete(DRef)); +} + TEST(TestDPCTLSyclQueueInterface, CheckCreateForDevice) { /* We are testing that we do not crash even when input is NULL. */ From 0a12c3156b7ba1070b88f8c62d33de86eb3a456a Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 11 Mar 2024 03:59:17 -0500 Subject: [PATCH 08/13] Only run test submit if device has aspect fp64 --- libsyclinterface/tests/test_sycl_queue_submit.cpp | 12 +++++++----- .../test_sycl_queue_submit_local_accessor_arg.cpp | 12 +++++++----- 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/libsyclinterface/tests/test_sycl_queue_submit.cpp b/libsyclinterface/tests/test_sycl_queue_submit.cpp index 8330d6da1e..d89ec3d3ce 100644 --- a/libsyclinterface/tests/test_sycl_queue_submit.cpp +++ b/libsyclinterface/tests/test_sycl_queue_submit.cpp @@ -256,13 +256,13 @@ struct TestQueueSubmitFP64 : public ::testing::Test std::ifstream spirvFile; size_t spirvFileSize_; std::vector spirvBuffer_; + DPCTLSyclDeviceRef DRef = nullptr; DPCTLSyclQueueRef QRef = nullptr; DPCTLSyclKernelBundleRef KBRef = nullptr; TestQueueSubmitFP64() { DPCTLSyclDeviceSelectorRef DSRef = nullptr; - DPCTLSyclDeviceRef DRef = nullptr; spirvFile.open("./oneD_range_kernel_fp64.spv", std::ios::binary | std::ios::ate); @@ -279,13 +279,13 @@ struct TestQueueSubmitFP64 : public ::testing::Test KBRef = DPCTLKernelBundle_CreateFromSpirv( CRef, DRef, spirvBuffer_.data(), spirvFileSize_, nullptr); - DPCTLDevice_Delete(DRef); DPCTLDeviceSelector_Delete(DSRef); } ~TestQueueSubmitFP64() { spirvFile.close(); + DPCTLDevice_Delete(DRef); DPCTLQueue_Delete(QRef); DPCTLKernelBundle_Delete(KBRef); } @@ -356,9 +356,11 @@ TEST_F(TestQueueSubmit, CheckForFloat) TEST_F(TestQueueSubmitFP64, CheckForDouble) { - submit_kernel(QRef, KBRef, spirvBuffer_, spirvFileSize_, - DPCTLKernelArgType::DPCTL_FLOAT64_T, - "_ZTS11RangeKernelIdE"); + if (DPCTLDevice_HasAspect(DRef, DPCTLSyclAspectType::fp64)) { + submit_kernel(QRef, KBRef, spirvBuffer_, spirvFileSize_, + DPCTLKernelArgType::DPCTL_FLOAT64_T, + "_ZTS11RangeKernelIdE"); + } } TEST_F(TestQueueSubmit, CheckForUnsupportedArgTy) diff --git a/libsyclinterface/tests/test_sycl_queue_submit_local_accessor_arg.cpp b/libsyclinterface/tests/test_sycl_queue_submit_local_accessor_arg.cpp index b14bef9e7b..0d39c4b281 100644 --- a/libsyclinterface/tests/test_sycl_queue_submit_local_accessor_arg.cpp +++ b/libsyclinterface/tests/test_sycl_queue_submit_local_accessor_arg.cpp @@ -239,13 +239,13 @@ struct TestQueueSubmitWithLocalAccessorFP64 : public ::testing::Test std::ifstream spirvFile; size_t spirvFileSize_; std::vector spirvBuffer_; + DPCTLSyclDeviceRef DRef = nullptr; DPCTLSyclQueueRef QRef = nullptr; DPCTLSyclKernelBundleRef KBRef = nullptr; TestQueueSubmitWithLocalAccessorFP64() { DPCTLSyclDeviceSelectorRef DSRef = nullptr; - DPCTLSyclDeviceRef DRef = nullptr; spirvFile.open("./local_accessor_kernel_fp64.spv", std::ios::binary | std::ios::ate); @@ -262,13 +262,13 @@ struct TestQueueSubmitWithLocalAccessorFP64 : public ::testing::Test KBRef = DPCTLKernelBundle_CreateFromSpirv( CRef, DRef, spirvBuffer_.data(), spirvFileSize_, nullptr); - DPCTLDevice_Delete(DRef); DPCTLDeviceSelector_Delete(DSRef); } ~TestQueueSubmitWithLocalAccessorFP64() { spirvFile.close(); + DPCTLDevice_Delete(DRef); DPCTLQueue_Delete(QRef); DPCTLKernelBundle_Delete(KBRef); } @@ -339,9 +339,11 @@ TEST_F(TestQueueSubmitWithLocalAccessor, CheckForFloat) TEST_F(TestQueueSubmitWithLocalAccessorFP64, CheckForDouble) { - submit_kernel(QRef, KBRef, spirvBuffer_, spirvFileSize_, - DPCTLKernelArgType::DPCTL_FLOAT64_T, - "_ZTS14SyclKernel_SLMIdE"); + if (DPCTLDevice_HasAspect(DRef, DPCTLSyclAspectType::fp64)) { + submit_kernel(QRef, KBRef, spirvBuffer_, spirvFileSize_, + DPCTLKernelArgType::DPCTL_FLOAT64_T, + "_ZTS14SyclKernel_SLMIdE"); + } } TEST_F(TestQueueSubmitWithLocalAccessor, CheckForUnsupportedArgTy) From 2c85fa77ff17c717c987604e6f9cc53734e7bb30 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 11 Mar 2024 04:02:49 -0500 Subject: [PATCH 09/13] Test Local accessor for ndim=1, 2, 3 --- ...t_sycl_queue_submit_local_accessor_arg.cpp | 37 +++++++++++++++---- 1 file changed, 29 insertions(+), 8 deletions(-) diff --git a/libsyclinterface/tests/test_sycl_queue_submit_local_accessor_arg.cpp b/libsyclinterface/tests/test_sycl_queue_submit_local_accessor_arg.cpp index 0d39c4b281..8fa8e828d0 100644 --- a/libsyclinterface/tests/test_sycl_queue_submit_local_accessor_arg.cpp +++ b/libsyclinterface/tests/test_sycl_queue_submit_local_accessor_arg.cpp @@ -75,20 +75,39 @@ void submit_kernel(DPCTLSyclQueueRef QRef, a_ptr[i] = 0; } - auto la = MDLocalAccessor{1, kernelArgTy, SIZE / 10, 1, 1}; + auto la1 = MDLocalAccessor{1, kernelArgTy, SIZE / 10, 1, 1}; // Create kernel args for vector_add size_t gRange[] = {SIZE}; size_t lRange[] = {SIZE / 10}; - void *args[NARGS] = {unwrap(a), (void *)&la}; + void *args_1d[NARGS] = {unwrap(a), (void *)&la1}; DPCTLKernelArgType addKernelArgTypes[] = {DPCTL_VOID_PTR, DPCTL_LOCAL_ACCESSOR}; - auto ERef = - DPCTLQueue_SubmitNDRange(kernel, QRef, args, addKernelArgTypes, NARGS, - gRange, lRange, RANGE_NDIMS, nullptr, 0); - ASSERT_TRUE(ERef != nullptr); - DPCTLQueue_Wait(QRef); + DPCTLSyclEventRef E1Ref = DPCTLQueue_SubmitNDRange( + kernel, QRef, args_1d, addKernelArgTypes, NARGS, gRange, lRange, + RANGE_NDIMS, nullptr, 0); + ASSERT_TRUE(E1Ref != nullptr); + + DPCTLSyclEventRef DepEv1[] = {E1Ref}; + auto la2 = MDLocalAccessor{2, kernelArgTy, SIZE / 10, 1, 1}; + void *args_2d[NARGS] = {unwrap(a), (void *)&la2}; + + DPCTLSyclEventRef E2Ref = + DPCTLQueue_SubmitNDRange(kernel, QRef, args_2d, addKernelArgTypes, + NARGS, gRange, lRange, RANGE_NDIMS, DepEv1, 1); + ASSERT_TRUE(E2Ref != nullptr); + + DPCTLSyclEventRef DepEv2[] = {E1Ref, E2Ref}; + auto la3 = MDLocalAccessor{3, kernelArgTy, SIZE / 10, 1, 1}; + void *args_3d[NARGS] = {unwrap(a), (void *)&la3}; + + DPCTLSyclEventRef E3Ref = + DPCTLQueue_SubmitNDRange(kernel, QRef, args_3d, addKernelArgTypes, + NARGS, gRange, lRange, RANGE_NDIMS, DepEv2, 2); + ASSERT_TRUE(E3Ref != nullptr); + + DPCTLEvent_Wait(E3Ref); if (kernelArgTy != DPCTL_FLOAT32_T && kernelArgTy != DPCTL_FLOAT64_T) ASSERT_TRUE(a_ptr[0] == 20); @@ -96,7 +115,9 @@ void submit_kernel(DPCTLSyclQueueRef QRef, ASSERT_TRUE(a_ptr[0] == 20.0); // clean ups - DPCTLEvent_Delete(ERef); + DPCTLEvent_Delete(E1Ref); + DPCTLEvent_Delete(E2Ref); + DPCTLEvent_Delete(E3Ref); DPCTLKernel_Delete(kernel); DPCTLfree_with_queue((DPCTLSyclUSMRef)a, QRef); } From 05677e6ffdbc1e8f9dc72150102e7de684c6545a Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 11 Mar 2024 04:05:03 -0500 Subject: [PATCH 10/13] Renamed target llvm-cov to llvm-cov-report, added target llvm-cov-show The llvm-cov-report outputs the coverage summary, while llvm-cov-show displays per-line coverage statistics to identify missed lines. --- libsyclinterface/tests/CMakeLists.txt | 34 +++++++++++++++++++++++---- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/libsyclinterface/tests/CMakeLists.txt b/libsyclinterface/tests/CMakeLists.txt index 19eed6c06f..4a991340e7 100644 --- a/libsyclinterface/tests/CMakeLists.txt +++ b/libsyclinterface/tests/CMakeLists.txt @@ -89,8 +89,35 @@ if(DPCTL_GENERATE_COVERAGE) ${CMAKE_DL_LIBS} ) set(object_arg "-object;") - add_custom_target(llvm-cov + add_custom_target(run-c-api-tests COMMAND ${CMAKE_COMMAND} -E env DPCTL_VERBOSITY=warning ${CMAKE_CURRENT_BINARY_DIR}/dpctl_c_api_tests + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + COMMAND_EXPAND_LISTS + DEPENDS dpctl_c_api_tests + ) + add_custom_target(llvm-cov-show + COMMAND ${LLVMProfdata_EXE} + merge + -sparse default.profraw + -o + dpctl.profdata + COMMAND ${LLVMCov_EXE} + export + -format=lcov + -ignore-filename-regex=/tmp/icpx* + -instr-profile=dpctl.profdata + "${object_arg}$,;${object_arg}>" + > dpctl.lcov + COMMAND ${LLVMCov_EXE} + show + -instr-profile=dpctl.profdata + "${object_arg}$,;${object_arg}>" + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + COMMAND_EXPAND_LISTS + DEPENDS run-c-api-tests + ) + + add_custom_target(llvm-cov-report COMMAND ${LLVMProfdata_EXE} merge -sparse default.profraw @@ -109,11 +136,10 @@ if(DPCTL_GENERATE_COVERAGE) "${object_arg}$,;${object_arg}>" WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} COMMAND_EXPAND_LISTS - DEPENDS dpctl_c_api_tests + DEPENDS run-c-api-tests ) add_custom_target(lcov-genhtml - COMMAND ${CMAKE_COMMAND} -E env DPCTL_VERBOSITY=warning ${CMAKE_CURRENT_BINARY_DIR}/dpctl_c_api_tests COMMAND ${LLVMProfdata_EXE} merge -sparse default.profraw @@ -132,7 +158,7 @@ if(DPCTL_GENERATE_COVERAGE) ${COVERAGE_OUTPUT_DIR}/dpctl-c-api-coverage WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} COMMAND_EXPAND_LISTS - DEPENDS dpctl_c_api_tests + DEPENDS run-c-api-tests ) else() target_link_libraries(dpctl_c_api_tests From 30661fdaa54f5eac2df68920cfa82257dcefbaa9 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 11 Mar 2024 04:09:53 -0500 Subject: [PATCH 11/13] Update to dbg_build Let LLVM_TOOLS_HOME and PATH to make llvm-cov, and llvm-profdata discoverable by cmake. --- libsyclinterface/dbg_build.sh | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/libsyclinterface/dbg_build.sh b/libsyclinterface/dbg_build.sh index f036796a2e..cba4e71d71 100755 --- a/libsyclinterface/dbg_build.sh +++ b/libsyclinterface/dbg_build.sh @@ -7,6 +7,11 @@ pushd build || exit 1 INSTALL_PREFIX=$(pwd)/../install rm -rf ${INSTALL_PREFIX} +# With DPC++ 2024.0 adn newer set these to ensure that +# cmake can find llvm-cov and other utilities +LLVM_TOOLS_HOME=${CMPLR_ROOT}/bin/compiler +PATH=$PATH:${CMPLR_ROOT}/bin/compiler + cmake \ -DCMAKE_BUILD_TYPE=Debug \ -DCMAKE_C_COMPILER=icx \ @@ -16,13 +21,19 @@ cmake \ -DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} \ -DDPCTL_ENABLE_L0_PROGRAM_CREATION=ON \ -DDPCTL_BUILD_CAPI_TESTS=ON \ + -DDPCTL_GENERATE_COVERAGE=OFF \ .. -make V=1 -n -j 4 && make check && make install +# build +make V=1 -n -j 4 +# run ctest +make check +# install +make install # Turn on to generate coverage report html files reconfigure with # -DDPCTL_GENERATE_COVERAGE=ON and then -# make lcov-genhtml +# make llvm-cov-report # For more verbose tests use: # cd tests From 278fec209421d21365bb26e11ab8498316e90899 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 11 Mar 2024 04:51:36 -0500 Subject: [PATCH 12/13] Change script to use llvm-cov-report instead of llvm-cov --- scripts/gen_coverage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/gen_coverage.py b/scripts/gen_coverage.py index fece9228fd..e81f7b301b 100644 --- a/scripts/gen_coverage.py +++ b/scripts/gen_coverage.py @@ -82,7 +82,7 @@ def run( .strip("\n") ) subprocess.check_call( - ["cmake", "--build", ".", "--target", "llvm-cov"], + ["cmake", "--build", ".", "--target", "llvm-cov-report"], cwd=cmake_build_dir, ) env["LLVM_PROFILE_FILE"] = "dpctl_pytest.profraw" From bbb2d5401cf8dc452fcad76880cf3ed26d7d54b0 Mon Sep 17 00:00:00 2001 From: Oleksandr Pavlyk Date: Mon, 11 Mar 2024 08:35:31 -0500 Subject: [PATCH 13/13] Moved typedef for MDLocalAccessor to dpctl_sycl_queue_interface.hpp --- .../include/dpctl_sycl_queue_interface.h | 12 ++++++++++++ .../source/dpctl_sycl_queue_interface.cpp | 9 --------- .../test_sycl_queue_submit_local_accessor_arg.cpp | 9 --------- 3 files changed, 12 insertions(+), 18 deletions(-) diff --git a/libsyclinterface/include/dpctl_sycl_queue_interface.h b/libsyclinterface/include/dpctl_sycl_queue_interface.h index 18d55808ea..1763e1d2d5 100644 --- a/libsyclinterface/include/dpctl_sycl_queue_interface.h +++ b/libsyclinterface/include/dpctl_sycl_queue_interface.h @@ -171,6 +171,18 @@ DPCTL_API __dpctl_give DPCTLSyclDeviceRef DPCTLQueue_GetDevice(__dpctl_keep const DPCTLSyclQueueRef QRef); +/*! @brief Structure to be used to specify dimensionality and type of + * local_accessor kernel type argument. + */ +typedef struct MDLocalAccessorTy +{ + size_t ndim; + DPCTLKernelArgType dpctl_type_id; + size_t dim0; + size_t dim1; + size_t dim2; +} MDLocalAccessor; + /*! * @brief Submits the kernel to the specified queue with the provided range * argument. diff --git a/libsyclinterface/source/dpctl_sycl_queue_interface.cpp b/libsyclinterface/source/dpctl_sycl_queue_interface.cpp index 04ab441332..dce5a06a99 100644 --- a/libsyclinterface/source/dpctl_sycl_queue_interface.cpp +++ b/libsyclinterface/source/dpctl_sycl_queue_interface.cpp @@ -121,15 +121,6 @@ typedef struct complex uint64_t imag; } complexNumber; -typedef struct MDLocalAccessorTy -{ - size_t ndim; - DPCTLKernelArgType dpctl_type_id; - size_t dim0; - size_t dim1; - size_t dim2; -} MDLocalAccessor; - void set_dependent_events(handler &cgh, __dpctl_keep const DPCTLSyclEventRef *DepEvents, size_t NDepEvents) diff --git a/libsyclinterface/tests/test_sycl_queue_submit_local_accessor_arg.cpp b/libsyclinterface/tests/test_sycl_queue_submit_local_accessor_arg.cpp index 8fa8e828d0..7f28fc0041 100644 --- a/libsyclinterface/tests/test_sycl_queue_submit_local_accessor_arg.cpp +++ b/libsyclinterface/tests/test_sycl_queue_submit_local_accessor_arg.cpp @@ -44,15 +44,6 @@ constexpr size_t SIZE = 100; using namespace dpctl::syclinterface; -typedef struct MDLocalAccessorTy -{ - size_t ndim; - DPCTLKernelArgType dpctl_type_id; - size_t dim0; - size_t dim1; - size_t dim2; -} MDLocalAccessor; - template void submit_kernel(DPCTLSyclQueueRef QRef, DPCTLSyclKernelBundleRef KBRef,