diff --git a/dpctl/_backend.pxd b/dpctl/_backend.pxd index e8627e8241..6cbf1500ee 100644 --- a/dpctl/_backend.pxd +++ b/dpctl/_backend.pxd @@ -67,7 +67,8 @@ cdef extern from "syclinterface/dpctl_sycl_enum_types.h": _UINT64_T 'DPCTL_UINT64_T', _FLOAT 'DPCTL_FLOAT32_T', _DOUBLE 'DPCTL_FLOAT64_T', - _VOID_PTR 'DPCTL_VOID_PTR' + _VOID_PTR 'DPCTL_VOID_PTR', + _LOCAL_ACCESSOR 'DPCTL_LOCAL_ACCESSOR' ctypedef enum _queue_property_type 'DPCTLQueuePropertyType': _DEFAULT_PROPERTY 'DPCTL_DEFAULT_PROPERTY' diff --git a/dpctl/_sycl_queue.pyx b/dpctl/_sycl_queue.pyx index 0dec0990c3..75135c6fc6 100644 --- a/dpctl/_sycl_queue.pyx +++ b/dpctl/_sycl_queue.pyx @@ -233,6 +233,15 @@ cdef class _kernel_arg_type: _arg_data_type._VOID_PTR ) + @property + def dpctl_local_accessor(self): + cdef str p_name = "dpctl_local_accessor" + return kernel_arg_type_attribute( + self._name, + p_name, + _arg_data_type._LOCAL_ACCESSOR + ) + kernel_arg_type = _kernel_arg_type() diff --git a/dpctl/enum_types.py b/dpctl/enum_types.py index 102ae09015..017497eb37 100644 --- a/dpctl/enum_types.py +++ b/dpctl/enum_types.py @@ -22,11 +22,7 @@ """ from enum import Enum, auto -__all__ = [ - "device_type", - "backend_type", - "event_status_type", -] +__all__ = ["device_type", "backend_type", "event_status_type"] class device_type(Enum): diff --git a/dpctl/tests/test_sycl_kernel_submit.py b/dpctl/tests/test_sycl_kernel_submit.py index 01558dd4df..04a25335ac 100644 --- a/dpctl/tests/test_sycl_kernel_submit.py +++ b/dpctl/tests/test_sycl_kernel_submit.py @@ -274,3 +274,4 @@ def test_kernel_arg_type(): _check_kernel_arg_type_instance(kernel_arg_type.dpctl_float32) _check_kernel_arg_type_instance(kernel_arg_type.dpctl_float64) _check_kernel_arg_type_instance(kernel_arg_type.dpctl_void_ptr) + _check_kernel_arg_type_instance(kernel_arg_type.dpctl_local_accessor) diff --git a/libsyclinterface/dbg_build.sh b/libsyclinterface/dbg_build.sh index f036796a2e..cba4e71d71 100755 --- a/libsyclinterface/dbg_build.sh +++ b/libsyclinterface/dbg_build.sh @@ -7,6 +7,11 @@ pushd build || exit 1 INSTALL_PREFIX=$(pwd)/../install rm -rf ${INSTALL_PREFIX} +# With DPC++ 2024.0 adn newer set these to ensure that +# cmake can find llvm-cov and other utilities +LLVM_TOOLS_HOME=${CMPLR_ROOT}/bin/compiler +PATH=$PATH:${CMPLR_ROOT}/bin/compiler + cmake \ -DCMAKE_BUILD_TYPE=Debug \ -DCMAKE_C_COMPILER=icx \ @@ -16,13 +21,19 @@ cmake \ -DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} \ -DDPCTL_ENABLE_L0_PROGRAM_CREATION=ON \ -DDPCTL_BUILD_CAPI_TESTS=ON \ + -DDPCTL_GENERATE_COVERAGE=OFF \ .. -make V=1 -n -j 4 && make check && make install +# build +make V=1 -n -j 4 +# run ctest +make check +# install +make install # Turn on to generate coverage report html files reconfigure with # -DDPCTL_GENERATE_COVERAGE=ON and then -# make lcov-genhtml +# make llvm-cov-report # For more verbose tests use: # cd tests diff --git a/libsyclinterface/helper/include/dpctl_error_handlers.h b/libsyclinterface/helper/include/dpctl_error_handlers.h index 42e8efd77e..34fa517cfb 100644 --- a/libsyclinterface/helper/include/dpctl_error_handlers.h +++ b/libsyclinterface/helper/include/dpctl_error_handlers.h @@ -20,7 +20,7 @@ /// /// \file /// A functor to use for passing an error handler callback function to sycl -/// context and queue contructors. +/// context and queue constructors. //===----------------------------------------------------------------------===// #pragma once diff --git a/libsyclinterface/include/dpctl_sycl_enum_types.h b/libsyclinterface/include/dpctl_sycl_enum_types.h index 4edcce45df..c8bc0b1b20 100644 --- a/libsyclinterface/include/dpctl_sycl_enum_types.h +++ b/libsyclinterface/include/dpctl_sycl_enum_types.h @@ -98,6 +98,7 @@ typedef enum DPCTL_FLOAT32_T, DPCTL_FLOAT64_T, DPCTL_VOID_PTR, + DPCTL_LOCAL_ACCESSOR, DPCTL_UNSUPPORTED_KERNEL_ARG } DPCTLKernelArgType; diff --git a/libsyclinterface/include/dpctl_sycl_queue_interface.h b/libsyclinterface/include/dpctl_sycl_queue_interface.h index 18d55808ea..1763e1d2d5 100644 --- a/libsyclinterface/include/dpctl_sycl_queue_interface.h +++ b/libsyclinterface/include/dpctl_sycl_queue_interface.h @@ -171,6 +171,18 @@ DPCTL_API __dpctl_give DPCTLSyclDeviceRef DPCTLQueue_GetDevice(__dpctl_keep const DPCTLSyclQueueRef QRef); +/*! @brief Structure to be used to specify dimensionality and type of + * local_accessor kernel type argument. + */ +typedef struct MDLocalAccessorTy +{ + size_t ndim; + DPCTLKernelArgType dpctl_type_id; + size_t dim0; + size_t dim1; + size_t dim2; +} MDLocalAccessor; + /*! * @brief Submits the kernel to the specified queue with the provided range * argument. diff --git a/libsyclinterface/source/dpctl_sycl_kernel_bundle_interface.cpp b/libsyclinterface/source/dpctl_sycl_kernel_bundle_interface.cpp index 4aeb9b2bb3..66aa215808 100644 --- a/libsyclinterface/source/dpctl_sycl_kernel_bundle_interface.cpp +++ b/libsyclinterface/source/dpctl_sycl_kernel_bundle_interface.cpp @@ -530,7 +530,7 @@ _GetKernel_ze_impl(const kernel_bundle &kb, else { error_handler("Kernel named " + std::string(kernel_name) + " could not be found.", - __FILE__, __func__, __LINE__); + __FILE__, __func__, __LINE__, error_level::error); return nullptr; } } @@ -541,7 +541,7 @@ bool _HasKernel_ze_impl(const kernel_bundle &kb, auto zeKernelCreateFn = get_zeKernelCreate(); if (zeKernelCreateFn == nullptr) { error_handler("Could not load zeKernelCreate function.", __FILE__, - __func__, __LINE__); + __func__, __LINE__, error_level::error); return false; } @@ -564,7 +564,7 @@ bool _HasKernel_ze_impl(const kernel_bundle &kb, if (ze_status != ZE_RESULT_ERROR_INVALID_KERNEL_NAME) { error_handler("zeKernelCreate failed: " + _GetErrorCode_ze_impl(ze_status), - __FILE__, __func__, __LINE__); + __FILE__, __func__, __LINE__, error_level::error); return false; } } diff --git a/libsyclinterface/source/dpctl_sycl_queue_interface.cpp b/libsyclinterface/source/dpctl_sycl_queue_interface.cpp index dc512fd126..dce5a06a99 100644 --- a/libsyclinterface/source/dpctl_sycl_queue_interface.cpp +++ b/libsyclinterface/source/dpctl_sycl_queue_interface.cpp @@ -38,6 +38,76 @@ using namespace sycl; +#define SET_LOCAL_ACCESSOR_ARG(CGH, NDIM, ARGTY, R, IDX) \ + do { \ + switch ((ARGTY)) { \ + case DPCTL_INT8_T: \ + { \ + auto la = local_accessor(R, CGH); \ + CGH.set_arg(IDX, la); \ + return true; \ + } \ + case DPCTL_UINT8_T: \ + { \ + auto la = local_accessor(R, CGH); \ + CGH.set_arg(IDX, la); \ + return true; \ + } \ + case DPCTL_INT16_T: \ + { \ + auto la = local_accessor(R, CGH); \ + CGH.set_arg(IDX, la); \ + return true; \ + } \ + case DPCTL_UINT16_T: \ + { \ + auto la = local_accessor(R, CGH); \ + CGH.set_arg(IDX, la); \ + return true; \ + } \ + case DPCTL_INT32_T: \ + { \ + auto la = local_accessor(R, CGH); \ + CGH.set_arg(IDX, la); \ + return true; \ + } \ + case DPCTL_UINT32_T: \ + { \ + auto la = local_accessor(R, CGH); \ + CGH.set_arg(IDX, la); \ + return true; \ + } \ + case DPCTL_INT64_T: \ + { \ + auto la = local_accessor(R, CGH); \ + CGH.set_arg(IDX, la); \ + return true; \ + } \ + case DPCTL_UINT64_T: \ + { \ + auto la = local_accessor(R, CGH); \ + CGH.set_arg(IDX, la); \ + return true; \ + } \ + case DPCTL_FLOAT32_T: \ + { \ + auto la = local_accessor(R, CGH); \ + CGH.set_arg(IDX, la); \ + return true; \ + } \ + case DPCTL_FLOAT64_T: \ + { \ + auto la = local_accessor(R, CGH); \ + CGH.set_arg(IDX, la); \ + return true; \ + } \ + default: \ + error_handler("Kernel argument could not be created.", __FILE__, \ + __func__, __LINE__, error_level::error); \ + return false; \ + } \ + } while (0); + namespace { static_assert(__SYCL_COMPILER_VERSION >= __SYCL_COMPILER_VERSION_REQUIRED, @@ -62,11 +132,39 @@ void set_dependent_events(handler &cgh, } } +bool set_local_accessor_arg(handler &cgh, + size_t idx, + const MDLocalAccessor *mdstruct) +{ + switch (mdstruct->ndim) { + case 1: + { + auto r = range<1>(mdstruct->dim0); + SET_LOCAL_ACCESSOR_ARG(cgh, 1, mdstruct->dpctl_type_id, r, idx); + } + case 2: + { + auto r = range<2>(mdstruct->dim0, mdstruct->dim1); + SET_LOCAL_ACCESSOR_ARG(cgh, 2, mdstruct->dpctl_type_id, r, idx); + } + case 3: + { + auto r = range<3>(mdstruct->dim0, mdstruct->dim1, mdstruct->dim2); + SET_LOCAL_ACCESSOR_ARG(cgh, 3, mdstruct->dpctl_type_id, r, idx); + } + default: + return false; + } +} /*! * @brief Set the kernel arg object * - * @param cgh My Param doc - * @param Arg My Param doc + * @param cgh SYCL command group handler using which a kernel is going to + * be submitted. + * @param idx The position of the argument in the list of arguments passed + * to a kernel. + * @param Arg A void* representing a kernel argument. + * @param Argty A typeid specifying the C++ type of the Arg parameter. */ bool set_kernel_arg(handler &cgh, size_t idx, @@ -109,10 +207,11 @@ bool set_kernel_arg(handler &cgh, case DPCTL_VOID_PTR: cgh.set_arg(idx, Arg); break; + case DPCTL_LOCAL_ACCESSOR: + arg_set = set_local_accessor_arg(cgh, idx, (MDLocalAccessor *)Arg); + break; default: arg_set = false; - error_handler("Kernel argument could not be created.", __FILE__, - __func__, __LINE__); break; } return arg_set; diff --git a/libsyclinterface/tests/CMakeLists.txt b/libsyclinterface/tests/CMakeLists.txt index a0be739b2e..4a991340e7 100644 --- a/libsyclinterface/tests/CMakeLists.txt +++ b/libsyclinterface/tests/CMakeLists.txt @@ -21,6 +21,8 @@ set(spirv-test-files multi_kernel.spv oneD_range_kernel_inttys_fp32.spv oneD_range_kernel_fp64.spv + local_accessor_kernel_inttys_fp32.spv + local_accessor_kernel_fp64.spv ) foreach(tf ${spirv-test-files}) @@ -55,6 +57,7 @@ add_sycl_to_target( ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_platform_invalid_filters.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_queue_manager.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_queue_submit.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_queue_submit_local_accessor_arg.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_queue_interface.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_usm_interface.cpp ) @@ -86,8 +89,35 @@ if(DPCTL_GENERATE_COVERAGE) ${CMAKE_DL_LIBS} ) set(object_arg "-object;") - add_custom_target(llvm-cov + add_custom_target(run-c-api-tests COMMAND ${CMAKE_COMMAND} -E env DPCTL_VERBOSITY=warning ${CMAKE_CURRENT_BINARY_DIR}/dpctl_c_api_tests + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + COMMAND_EXPAND_LISTS + DEPENDS dpctl_c_api_tests + ) + add_custom_target(llvm-cov-show + COMMAND ${LLVMProfdata_EXE} + merge + -sparse default.profraw + -o + dpctl.profdata + COMMAND ${LLVMCov_EXE} + export + -format=lcov + -ignore-filename-regex=/tmp/icpx* + -instr-profile=dpctl.profdata + "${object_arg}$,;${object_arg}>" + > dpctl.lcov + COMMAND ${LLVMCov_EXE} + show + -instr-profile=dpctl.profdata + "${object_arg}$,;${object_arg}>" + WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} + COMMAND_EXPAND_LISTS + DEPENDS run-c-api-tests + ) + + add_custom_target(llvm-cov-report COMMAND ${LLVMProfdata_EXE} merge -sparse default.profraw @@ -106,11 +136,10 @@ if(DPCTL_GENERATE_COVERAGE) "${object_arg}$,;${object_arg}>" WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} COMMAND_EXPAND_LISTS - DEPENDS dpctl_c_api_tests + DEPENDS run-c-api-tests ) add_custom_target(lcov-genhtml - COMMAND ${CMAKE_COMMAND} -E env DPCTL_VERBOSITY=warning ${CMAKE_CURRENT_BINARY_DIR}/dpctl_c_api_tests COMMAND ${LLVMProfdata_EXE} merge -sparse default.profraw @@ -129,7 +158,7 @@ if(DPCTL_GENERATE_COVERAGE) ${COVERAGE_OUTPUT_DIR}/dpctl-c-api-coverage WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} COMMAND_EXPAND_LISTS - DEPENDS dpctl_c_api_tests + DEPENDS run-c-api-tests ) else() target_link_libraries(dpctl_c_api_tests diff --git a/libsyclinterface/tests/local_accessor_kernel_fp64.spv b/libsyclinterface/tests/local_accessor_kernel_fp64.spv new file mode 100644 index 0000000000..ffc220268a Binary files /dev/null and b/libsyclinterface/tests/local_accessor_kernel_fp64.spv differ diff --git a/libsyclinterface/tests/local_accessor_kernel_inttys_fp32.spv b/libsyclinterface/tests/local_accessor_kernel_inttys_fp32.spv new file mode 100644 index 0000000000..3e2d145ad8 Binary files /dev/null and b/libsyclinterface/tests/local_accessor_kernel_inttys_fp32.spv differ diff --git a/libsyclinterface/tests/test_sycl_device_aspects.cpp b/libsyclinterface/tests/test_sycl_device_aspects.cpp index 61f0eaba8b..093c6730df 100644 --- a/libsyclinterface/tests/test_sycl_device_aspects.cpp +++ b/libsyclinterface/tests/test_sycl_device_aspects.cpp @@ -97,7 +97,7 @@ auto build_gtest_values(const std::array, N> ¶ms) auto build_params() { constexpr auto param_1 = get_param_list( - "opencl:gpu", "opencl:cpu", "level_zero:gpu", "host"); + "opencl:gpu", "opencl:cpu", "level_zero:gpu"); constexpr auto param_2 = get_param_list>( diff --git a/libsyclinterface/tests/test_sycl_queue_interface.cpp b/libsyclinterface/tests/test_sycl_queue_interface.cpp index cddc007f4d..1c7fe55561 100644 --- a/libsyclinterface/tests/test_sycl_queue_interface.cpp +++ b/libsyclinterface/tests/test_sycl_queue_interface.cpp @@ -90,6 +90,34 @@ struct TestDPCTLQueueMemberFunctions } /* End of anonymous namespace */ +TEST(TestDPCTLSyclQueueInterface, CheckCreate) +{ + /* We are testing that we do not crash even when input is NULL. */ + DPCTLSyclQueueRef QRef = nullptr; + + EXPECT_NO_FATAL_FAILURE( + QRef = DPCTLQueue_Create(nullptr, nullptr, nullptr, 0)); + ASSERT_TRUE(QRef == nullptr); +} + +TEST(TestDPCTLSyclQueueInterface, CheckCreate2) +{ + /* We are testing that we do not crash even when input is NULL. */ + DPCTLSyclQueueRef QRef = nullptr; + DPCTLSyclDeviceSelectorRef DSRef = nullptr; + DPCTLSyclDeviceRef DRef = nullptr; + + EXPECT_NO_FATAL_FAILURE(DSRef = DPCTLDefaultSelector_Create()); + EXPECT_NO_FATAL_FAILURE(DRef = DPCTLDevice_CreateFromSelector(DSRef)); + EXPECT_NO_FATAL_FAILURE(DPCTLDeviceSelector_Delete(DSRef)); + + EXPECT_NO_FATAL_FAILURE(QRef = + DPCTLQueue_Create(nullptr, DRef, nullptr, 0)); + ASSERT_TRUE(QRef == nullptr); + + EXPECT_NO_FATAL_FAILURE(DPCTLDevice_Delete(DRef)); +} + TEST(TestDPCTLSyclQueueInterface, CheckCreateForDevice) { /* We are testing that we do not crash even when input is NULL. */ diff --git a/libsyclinterface/tests/test_sycl_queue_submit.cpp b/libsyclinterface/tests/test_sycl_queue_submit.cpp index cc9bc836ce..d89ec3d3ce 100644 --- a/libsyclinterface/tests/test_sycl_queue_submit.cpp +++ b/libsyclinterface/tests/test_sycl_queue_submit.cpp @@ -56,7 +56,9 @@ void submit_kernel(DPCTLSyclQueueRef QRef, { T scalarVal = 3; constexpr size_t NARGS = 4; - constexpr size_t RANGE_NDIMS = 1; + constexpr size_t RANGE_NDIMS_1 = 1; + constexpr size_t RANGE_NDIMS_2 = 2; + constexpr size_t RANGE_NDIMS_3 = 3; ASSERT_TRUE(DPCTLKernelBundle_HasKernel(KBRef, kernelName.c_str())); auto kernel = DPCTLKernelBundle_GetKernel(KBRef, kernelName.c_str()); @@ -75,13 +77,33 @@ void submit_kernel(DPCTLSyclQueueRef QRef, (void *)&scalarVal}; DPCTLKernelArgType addKernelArgTypes[] = {DPCTL_VOID_PTR, DPCTL_VOID_PTR, DPCTL_VOID_PTR, kernelArgTy}; - auto ERef = DPCTLQueue_SubmitRange(kernel, QRef, args, addKernelArgTypes, - NARGS, Range, RANGE_NDIMS, nullptr, 0); - ASSERT_TRUE(ERef != nullptr); - DPCTLQueue_Wait(QRef); + auto E1Ref = + DPCTLQueue_SubmitRange(kernel, QRef, args, addKernelArgTypes, NARGS, + Range, RANGE_NDIMS_1, nullptr, 0); + ASSERT_TRUE(E1Ref != nullptr); + + // Create kernel args for vector_add + size_t Range2D[] = {SIZE, 1}; + DPCTLSyclEventRef DepEvs[] = {E1Ref}; + auto E2Ref = + DPCTLQueue_SubmitRange(kernel, QRef, args, addKernelArgTypes, NARGS, + Range2D, RANGE_NDIMS_2, DepEvs, 1); + ASSERT_TRUE(E2Ref != nullptr); + + // Create kernel args for vector_add + size_t Range3D[] = {SIZE, 1, 1}; + DPCTLSyclEventRef DepEvs2[] = {E1Ref, E2Ref}; + auto E3Ref = + DPCTLQueue_SubmitRange(kernel, QRef, args, addKernelArgTypes, NARGS, + Range3D, RANGE_NDIMS_3, DepEvs2, 2); + ASSERT_TRUE(E3Ref != nullptr); + + DPCTLEvent_Wait(E3Ref); // clean ups - DPCTLEvent_Delete(ERef); + DPCTLEvent_Delete(E1Ref); + DPCTLEvent_Delete(E2Ref); + DPCTLEvent_Delete(E3Ref); DPCTLKernel_Delete(kernel); DPCTLfree_with_queue((DPCTLSyclUSMRef)a, QRef); DPCTLfree_with_queue((DPCTLSyclUSMRef)b, QRef); @@ -234,13 +256,13 @@ struct TestQueueSubmitFP64 : public ::testing::Test std::ifstream spirvFile; size_t spirvFileSize_; std::vector spirvBuffer_; + DPCTLSyclDeviceRef DRef = nullptr; DPCTLSyclQueueRef QRef = nullptr; DPCTLSyclKernelBundleRef KBRef = nullptr; TestQueueSubmitFP64() { DPCTLSyclDeviceSelectorRef DSRef = nullptr; - DPCTLSyclDeviceRef DRef = nullptr; spirvFile.open("./oneD_range_kernel_fp64.spv", std::ios::binary | std::ios::ate); @@ -257,13 +279,13 @@ struct TestQueueSubmitFP64 : public ::testing::Test KBRef = DPCTLKernelBundle_CreateFromSpirv( CRef, DRef, spirvBuffer_.data(), spirvFileSize_, nullptr); - DPCTLDevice_Delete(DRef); DPCTLDeviceSelector_Delete(DSRef); } ~TestQueueSubmitFP64() { spirvFile.close(); + DPCTLDevice_Delete(DRef); DPCTLQueue_Delete(QRef); DPCTLKernelBundle_Delete(KBRef); } @@ -334,9 +356,11 @@ TEST_F(TestQueueSubmit, CheckForFloat) TEST_F(TestQueueSubmitFP64, CheckForDouble) { - submit_kernel(QRef, KBRef, spirvBuffer_, spirvFileSize_, - DPCTLKernelArgType::DPCTL_FLOAT64_T, - "_ZTS11RangeKernelIdE"); + if (DPCTLDevice_HasAspect(DRef, DPCTLSyclAspectType::fp64)) { + submit_kernel(QRef, KBRef, spirvBuffer_, spirvFileSize_, + DPCTLKernelArgType::DPCTL_FLOAT64_T, + "_ZTS11RangeKernelIdE"); + } } TEST_F(TestQueueSubmit, CheckForUnsupportedArgTy) diff --git a/libsyclinterface/tests/test_sycl_queue_submit_local_accessor_arg.cpp b/libsyclinterface/tests/test_sycl_queue_submit_local_accessor_arg.cpp new file mode 100644 index 0000000000..7f28fc0041 --- /dev/null +++ b/libsyclinterface/tests/test_sycl_queue_submit_local_accessor_arg.cpp @@ -0,0 +1,378 @@ +//===-- test_sycl_queue_submit.cpp - Test cases for kernel submission fns. ===// +// +// Data Parallel Control (dpctl) +// +// Copyright 2020-2024 Intel Corporation +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file has unit test cases for the various submit functions defined +/// inside dpctl_sycl_queue_interface.cpp. +//===----------------------------------------------------------------------===// + +#include "dpctl_sycl_context_interface.h" +#include "dpctl_sycl_device_interface.h" +#include "dpctl_sycl_device_selector_interface.h" +#include "dpctl_sycl_event_interface.h" +#include "dpctl_sycl_kernel_bundle_interface.h" +#include "dpctl_sycl_kernel_interface.h" +#include "dpctl_sycl_queue_interface.h" +#include "dpctl_sycl_type_casters.hpp" +#include "dpctl_sycl_usm_interface.h" +#include +#include +#include +#include +#include + +namespace +{ +constexpr size_t SIZE = 100; + +using namespace dpctl::syclinterface; + +template +void submit_kernel(DPCTLSyclQueueRef QRef, + DPCTLSyclKernelBundleRef KBRef, + std::vector spirvBuffer, + size_t spirvFileSize, + DPCTLKernelArgType kernelArgTy, + std::string kernelName) +{ + constexpr size_t NARGS = 2; + constexpr size_t RANGE_NDIMS = 1; + + ASSERT_TRUE(DPCTLKernelBundle_HasKernel(KBRef, kernelName.c_str())); + auto kernel = DPCTLKernelBundle_GetKernel(KBRef, kernelName.c_str()); + + // Create the input args + auto a = DPCTLmalloc_shared(SIZE * sizeof(T), QRef); + ASSERT_TRUE(a != nullptr); + auto a_ptr = static_cast(unwrap(a)); + for (auto i = 0ul; i < SIZE; ++i) { + a_ptr[i] = 0; + } + + auto la1 = MDLocalAccessor{1, kernelArgTy, SIZE / 10, 1, 1}; + + // Create kernel args for vector_add + size_t gRange[] = {SIZE}; + size_t lRange[] = {SIZE / 10}; + void *args_1d[NARGS] = {unwrap(a), (void *)&la1}; + DPCTLKernelArgType addKernelArgTypes[] = {DPCTL_VOID_PTR, + DPCTL_LOCAL_ACCESSOR}; + + DPCTLSyclEventRef E1Ref = DPCTLQueue_SubmitNDRange( + kernel, QRef, args_1d, addKernelArgTypes, NARGS, gRange, lRange, + RANGE_NDIMS, nullptr, 0); + ASSERT_TRUE(E1Ref != nullptr); + + DPCTLSyclEventRef DepEv1[] = {E1Ref}; + auto la2 = MDLocalAccessor{2, kernelArgTy, SIZE / 10, 1, 1}; + void *args_2d[NARGS] = {unwrap(a), (void *)&la2}; + + DPCTLSyclEventRef E2Ref = + DPCTLQueue_SubmitNDRange(kernel, QRef, args_2d, addKernelArgTypes, + NARGS, gRange, lRange, RANGE_NDIMS, DepEv1, 1); + ASSERT_TRUE(E2Ref != nullptr); + + DPCTLSyclEventRef DepEv2[] = {E1Ref, E2Ref}; + auto la3 = MDLocalAccessor{3, kernelArgTy, SIZE / 10, 1, 1}; + void *args_3d[NARGS] = {unwrap(a), (void *)&la3}; + + DPCTLSyclEventRef E3Ref = + DPCTLQueue_SubmitNDRange(kernel, QRef, args_3d, addKernelArgTypes, + NARGS, gRange, lRange, RANGE_NDIMS, DepEv2, 2); + ASSERT_TRUE(E3Ref != nullptr); + + DPCTLEvent_Wait(E3Ref); + + if (kernelArgTy != DPCTL_FLOAT32_T && kernelArgTy != DPCTL_FLOAT64_T) + ASSERT_TRUE(a_ptr[0] == 20); + else + ASSERT_TRUE(a_ptr[0] == 20.0); + + // clean ups + DPCTLEvent_Delete(E1Ref); + DPCTLEvent_Delete(E2Ref); + DPCTLEvent_Delete(E3Ref); + DPCTLKernel_Delete(kernel); + DPCTLfree_with_queue((DPCTLSyclUSMRef)a, QRef); +} + +} /* end of anonymous namespace */ + +/* +// The local_accessor_kernel spv files were generated from the SYCL program +// included in this comment. The program can be compiled using +// `icpx -fsycl local_accessor_kernel.cpp`. After that if the generated +// executable is run with the environment variable `SYCL_DUMP_IMAGES=1`, icpx +// runtime will dump all offload sections of fat binary to the current working +// directory. When tested with DPC++ 2024.0 the kernels are split across two +// separate SPV files. One contains all kernels for integers and FP32 +// data type, and another contains the kernel for FP64. +// +// Note that, `SYCL_DUMP_IMAGES=1` will also generate extra SPV files that +// contain the code for built in functions such as indexing and barriers. To +// figure which SPV file contains the kernels, use `spirv-dis` from the +// spirv-tools package to translate the SPV binary format to a human-readable +// textual format. +#include +#include +#include + +template +class SyclKernel_SLM +{ +private: + T N_; + T *a_ = nullptr; + sycl::local_accessor slm_; + +public: + SyclKernel_SLM(T *a, sycl::local_accessor slm) + : a_(a), slm_(slm) + { + } + + void operator()(sycl::nd_item<1> it) const + { + int i = it.get_global_id(); + int j = it.get_local_id(); + slm_[j] = 2; + auto g = it.get_group(); + group_barrier(g); + auto temp = 0; + for (auto idx = 0ul; idx < it.get_local_range(0); ++idx) + temp += slm_[idx]; + a_[i] = temp * (i + 1); + } +}; + +template +void submit_kernel(sycl::queue q, const unsigned long N, T *a) +{ + q.submit([&](auto &h) + { + sycl::local_accessor slm(sycl::range(N/10), h); + h.parallel_for(sycl::nd_range(sycl::range{N}, sycl::range{N/10}), + SyclKernel_SLM(a, slm)); }); +} + +template +void driver(size_t N) +{ + sycl::queue q; + auto *a = sycl::malloc_shared(N, q); + submit_kernel(q, N, a); + q.wait(); + sycl::free(a, q); +} + +int main(int argc, const char **argv) +{ + size_t N = 0; + std::cout << "Enter problem size in N:\n"; + std::cin >> N; + std::cout << "Executing with N = " << N << std::endl; + + driver(N); + driver(N); + driver(N); + driver(N); + driver(N); + driver(N); + driver(N); + driver(N); + driver(N); + driver(N); + + return 0; +} + +*/ + +struct TestQueueSubmitWithLocalAccessor : public ::testing::Test +{ + std::ifstream spirvFile; + size_t spirvFileSize_; + std::vector spirvBuffer_; + DPCTLSyclQueueRef QRef = nullptr; + DPCTLSyclKernelBundleRef KBRef = nullptr; + + TestQueueSubmitWithLocalAccessor() + { + DPCTLSyclDeviceSelectorRef DSRef = nullptr; + DPCTLSyclDeviceRef DRef = nullptr; + + spirvFile.open("./local_accessor_kernel_inttys_fp32.spv", + std::ios::binary | std::ios::ate); + spirvFileSize_ = std::filesystem::file_size( + "./local_accessor_kernel_inttys_fp32.spv"); + spirvBuffer_.reserve(spirvFileSize_); + spirvFile.seekg(0, std::ios::beg); + spirvFile.read(spirvBuffer_.data(), spirvFileSize_); + + DSRef = DPCTLDefaultSelector_Create(); + DRef = DPCTLDevice_CreateFromSelector(DSRef); + QRef = + DPCTLQueue_CreateForDevice(DRef, nullptr, DPCTL_DEFAULT_PROPERTY); + auto CRef = DPCTLQueue_GetContext(QRef); + + KBRef = DPCTLKernelBundle_CreateFromSpirv( + CRef, DRef, spirvBuffer_.data(), spirvFileSize_, nullptr); + DPCTLDevice_Delete(DRef); + DPCTLDeviceSelector_Delete(DSRef); + } + + ~TestQueueSubmitWithLocalAccessor() + { + spirvFile.close(); + DPCTLQueue_Delete(QRef); + DPCTLKernelBundle_Delete(KBRef); + } +}; + +struct TestQueueSubmitWithLocalAccessorFP64 : public ::testing::Test +{ + std::ifstream spirvFile; + size_t spirvFileSize_; + std::vector spirvBuffer_; + DPCTLSyclDeviceRef DRef = nullptr; + DPCTLSyclQueueRef QRef = nullptr; + DPCTLSyclKernelBundleRef KBRef = nullptr; + + TestQueueSubmitWithLocalAccessorFP64() + { + DPCTLSyclDeviceSelectorRef DSRef = nullptr; + + spirvFile.open("./local_accessor_kernel_fp64.spv", + std::ios::binary | std::ios::ate); + spirvFileSize_ = + std::filesystem::file_size("./local_accessor_kernel_fp64.spv"); + spirvBuffer_.reserve(spirvFileSize_); + spirvFile.seekg(0, std::ios::beg); + spirvFile.read(spirvBuffer_.data(), spirvFileSize_); + DSRef = DPCTLDefaultSelector_Create(); + DRef = DPCTLDevice_CreateFromSelector(DSRef); + QRef = + DPCTLQueue_CreateForDevice(DRef, nullptr, DPCTL_DEFAULT_PROPERTY); + auto CRef = DPCTLQueue_GetContext(QRef); + + KBRef = DPCTLKernelBundle_CreateFromSpirv( + CRef, DRef, spirvBuffer_.data(), spirvFileSize_, nullptr); + DPCTLDeviceSelector_Delete(DSRef); + } + + ~TestQueueSubmitWithLocalAccessorFP64() + { + spirvFile.close(); + DPCTLDevice_Delete(DRef); + DPCTLQueue_Delete(QRef); + DPCTLKernelBundle_Delete(KBRef); + } +}; + +TEST_F(TestQueueSubmitWithLocalAccessor, CheckForInt8) +{ + submit_kernel(QRef, KBRef, spirvBuffer_, spirvFileSize_, + DPCTLKernelArgType::DPCTL_INT8_T, + "_ZTS14SyclKernel_SLMIaE"); +} + +TEST_F(TestQueueSubmitWithLocalAccessor, CheckForUInt8) +{ + submit_kernel(QRef, KBRef, spirvBuffer_, spirvFileSize_, + DPCTLKernelArgType::DPCTL_UINT8_T, + "_ZTS14SyclKernel_SLMIhE"); +} + +TEST_F(TestQueueSubmitWithLocalAccessor, CheckForInt16) +{ + submit_kernel(QRef, KBRef, spirvBuffer_, spirvFileSize_, + DPCTLKernelArgType::DPCTL_INT16_T, + "_ZTS14SyclKernel_SLMIsE"); +} + +TEST_F(TestQueueSubmitWithLocalAccessor, CheckForUInt16) +{ + submit_kernel(QRef, KBRef, spirvBuffer_, spirvFileSize_, + DPCTLKernelArgType::DPCTL_UINT16_T, + "_ZTS14SyclKernel_SLMItE"); +} + +TEST_F(TestQueueSubmitWithLocalAccessor, CheckForInt32) +{ + submit_kernel(QRef, KBRef, spirvBuffer_, spirvFileSize_, + DPCTLKernelArgType::DPCTL_INT32_T, + "_ZTS14SyclKernel_SLMIiE"); +} + +TEST_F(TestQueueSubmitWithLocalAccessor, CheckForUInt32) +{ + submit_kernel(QRef, KBRef, spirvBuffer_, spirvFileSize_, + DPCTLKernelArgType::DPCTL_UINT32_T, + "_ZTS14SyclKernel_SLMIjE"); +} + +TEST_F(TestQueueSubmitWithLocalAccessor, CheckForInt64) +{ + submit_kernel(QRef, KBRef, spirvBuffer_, spirvFileSize_, + DPCTLKernelArgType::DPCTL_INT64_T, + "_ZTS14SyclKernel_SLMIlE"); +} + +TEST_F(TestQueueSubmitWithLocalAccessor, CheckForUInt64) +{ + submit_kernel(QRef, KBRef, spirvBuffer_, spirvFileSize_, + DPCTLKernelArgType::DPCTL_UINT64_T, + "_ZTS14SyclKernel_SLMImE"); +} + +TEST_F(TestQueueSubmitWithLocalAccessor, CheckForFloat) +{ + submit_kernel(QRef, KBRef, spirvBuffer_, spirvFileSize_, + DPCTLKernelArgType::DPCTL_FLOAT32_T, + "_ZTS14SyclKernel_SLMIfE"); +} + +TEST_F(TestQueueSubmitWithLocalAccessorFP64, CheckForDouble) +{ + if (DPCTLDevice_HasAspect(DRef, DPCTLSyclAspectType::fp64)) { + submit_kernel(QRef, KBRef, spirvBuffer_, spirvFileSize_, + DPCTLKernelArgType::DPCTL_FLOAT64_T, + "_ZTS14SyclKernel_SLMIdE"); + } +} + +TEST_F(TestQueueSubmitWithLocalAccessor, CheckForUnsupportedArgTy) +{ + size_t gRange[] = {SIZE}; + size_t lRange[] = {SIZE / 10}; + size_t RANGE_NDIMS = 1; + constexpr size_t NARGS = 2; + + auto la = MDLocalAccessor{1, DPCTL_UNSUPPORTED_KERNEL_ARG, SIZE / 10, 1, 1}; + auto kernel = DPCTLKernelBundle_GetKernel(KBRef, "_ZTS14SyclKernel_SLMImE"); + void *args[NARGS] = {unwrap(nullptr), (void *)&la}; + DPCTLKernelArgType addKernelArgTypes[] = {DPCTL_VOID_PTR, + DPCTL_LOCAL_ACCESSOR}; + auto ERef = + DPCTLQueue_SubmitNDRange(kernel, QRef, args, addKernelArgTypes, NARGS, + gRange, lRange, RANGE_NDIMS, nullptr, 0); + + ASSERT_TRUE(ERef == nullptr); +} diff --git a/scripts/gen_coverage.py b/scripts/gen_coverage.py index fece9228fd..e81f7b301b 100644 --- a/scripts/gen_coverage.py +++ b/scripts/gen_coverage.py @@ -82,7 +82,7 @@ def run( .strip("\n") ) subprocess.check_call( - ["cmake", "--build", ".", "--target", "llvm-cov"], + ["cmake", "--build", ".", "--target", "llvm-cov-report"], cwd=cmake_build_dir, ) env["LLVM_PROFILE_FILE"] = "dpctl_pytest.profraw"