diff --git a/dpctl/_backend.pxd b/dpctl/_backend.pxd index 2c0605fd33..e8627e8241 100644 --- a/dpctl/_backend.pxd +++ b/dpctl/_backend.pxd @@ -57,21 +57,16 @@ cdef extern from "syclinterface/dpctl_sycl_enum_types.h": _UNKNOWN_DEVICE 'DPCTL_UNKNOWN_DEVICE' ctypedef enum _arg_data_type 'DPCTLKernelArgType': - _CHAR 'DPCTL_CHAR', - _SIGNED_CHAR 'DPCTL_SIGNED_CHAR', - _UNSIGNED_CHAR 'DPCTL_UNSIGNED_CHAR', - _SHORT 'DPCTL_SHORT', - _INT 'DPCTL_INT', - _UNSIGNED_INT 'DPCTL_UNSIGNED_INT', - _UNSIGNED_INT8 'DPCTL_UNSIGNED_INT8', - _LONG 'DPCTL_LONG', - _UNSIGNED_LONG 'DPCTL_UNSIGNED_LONG', - _LONG_LONG 'DPCTL_LONG_LONG', - _UNSIGNED_LONG_LONG 'DPCTL_UNSIGNED_LONG_LONG', - _SIZE_T 'DPCTL_SIZE_T', - _FLOAT 'DPCTL_FLOAT', - _DOUBLE 'DPCTL_DOUBLE', - _LONG_DOUBLE 'DPCTL_DOUBLE', + _INT8_T 'DPCTL_INT8_T', + _UINT8_T 'DPCTL_UINT8_T', + _INT16_T 'DPCTL_INT16_T', + _UINT16_T 'DPCTL_UINT16_T', + _INT32_T 'DPCTL_INT32_T', + _UINT32_T 'DPCTL_UINT32_T', + _INT64_T 'DPCTL_INT64_T', + _UINT64_T 'DPCTL_UINT64_T', + _FLOAT 'DPCTL_FLOAT32_T', + _DOUBLE 'DPCTL_FLOAT64_T', _VOID_PTR 'DPCTL_VOID_PTR' ctypedef enum _queue_property_type 'DPCTLQueuePropertyType': diff --git a/dpctl/_sycl_queue.pyx b/dpctl/_sycl_queue.pyx index 5add749403..542b7b5a47 100644 --- a/dpctl/_sycl_queue.pyx +++ b/dpctl/_sycl_queue.pyx @@ -631,34 +631,28 @@ cdef class SyclQueue(_SyclQueue): for idx, arg in enumerate(args): if isinstance(arg, ctypes.c_char): kargs[idx] = (ctypes.addressof(arg)) - kargty[idx] = _arg_data_type._CHAR - elif isinstance(arg, ctypes.c_int): + kargty[idx] = _arg_data_type._INT8_T + elif isinstance(arg, ctypes.c_uint8): kargs[idx] = (ctypes.addressof(arg)) - kargty[idx] = _arg_data_type._INT - elif isinstance(arg, ctypes.c_uint): + kargty[idx] = _arg_data_type._UINT8_T + elif isinstance(arg, ctypes.c_short): kargs[idx] = (ctypes.addressof(arg)) - kargty[idx] = _arg_data_type._UNSIGNED_INT - elif isinstance(arg, ctypes.c_uint8): + kargty[idx] = _arg_data_type._INT16_T + elif isinstance(arg, ctypes.c_ushort): kargs[idx] = (ctypes.addressof(arg)) - kargty[idx] = _arg_data_type._UNSIGNED_INT8 - elif isinstance(arg, ctypes.c_long): + kargty[idx] = _arg_data_type._UINT16_T + elif isinstance(arg, ctypes.c_int): kargs[idx] = (ctypes.addressof(arg)) - kargty[idx] = _arg_data_type._LONG - elif isinstance(arg, ctypes.c_ulong): + kargty[idx] = _arg_data_type._INT32_T + elif isinstance(arg, ctypes.c_uint): kargs[idx] = (ctypes.addressof(arg)) - kargty[idx] = _arg_data_type._UNSIGNED_LONG + kargty[idx] = _arg_data_type._UINT32_T elif isinstance(arg, ctypes.c_longlong): kargs[idx] = (ctypes.addressof(arg)) - kargty[idx] = _arg_data_type._LONG_LONG + kargty[idx] = _arg_data_type._INT64_T elif isinstance(arg, ctypes.c_ulonglong): kargs[idx] = (ctypes.addressof(arg)) - kargty[idx] = _arg_data_type._UNSIGNED_LONG_LONG - elif isinstance(arg, ctypes.c_short): - kargs[idx] = (ctypes.addressof(arg)) - kargty[idx] = _arg_data_type._SHORT - elif isinstance(arg, ctypes.c_size_t): - kargs[idx] = (ctypes.addressof(arg)) - kargty[idx] = _arg_data_type._SIZE_T + kargty[idx] = _arg_data_type._UINT64_T elif isinstance(arg, ctypes.c_float): kargs[idx] = (ctypes.addressof(arg)) kargty[idx] = _arg_data_type._FLOAT diff --git a/dpctl/enum_types.py b/dpctl/enum_types.py index 102ae09015..bb8c54b7be 100644 --- a/dpctl/enum_types.py +++ b/dpctl/enum_types.py @@ -113,3 +113,22 @@ class global_mem_cache_type(Enum): none = auto() read_only = auto() read_write = auto() + + +class kernel_arg_type(Enum): + """ + An enumeration of supported kernel argument types in + :func:`dpctl.SyclQueue.submit` + """ + + dpctl_int8 = auto() + dpctl_uint8 = auto() + dpctl_int16 = auto() + dpctl_uint16 = auto() + dpctl_int32 = auto() + dpctl_uint32 = auto() + dpctl_int64 = auto() + dpctl_uint64 = auto() + dpctl_float32 = auto() + dpctl_float64 = auto() + dpctl_void_ptr = auto() diff --git a/libsyclinterface/dbg_build.sh b/libsyclinterface/dbg_build.sh index eaa1a02785..f036796a2e 100755 --- a/libsyclinterface/dbg_build.sh +++ b/libsyclinterface/dbg_build.sh @@ -2,30 +2,31 @@ set +xe rm -rf build mkdir build -pushd build +pushd build || exit 1 -INSTALL_PREFIX=`pwd`/../install +INSTALL_PREFIX=$(pwd)/../install rm -rf ${INSTALL_PREFIX} cmake \ -DCMAKE_BUILD_TYPE=Debug \ -DCMAKE_C_COMPILER=icx \ - -DCMAKE_CXX_COMPILER=dpcpp \ + -DCMAKE_CXX_COMPILER=icpx \ + -DCMAKE_CXX_FLAGS=-fsycl \ -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \ -DCMAKE_PREFIX_PATH=${INSTALL_PREFIX} \ -DDPCTL_ENABLE_L0_PROGRAM_CREATION=ON \ -DDPCTL_BUILD_CAPI_TESTS=ON \ - -DDPCTL_GENERATE_COVERAGE=ON \ .. make V=1 -n -j 4 && make check && make install -# Turn on to generate coverage report html files -make lcov-genhtml +# Turn on to generate coverage report html files reconfigure with +# -DDPCTL_GENERATE_COVERAGE=ON and then +# make lcov-genhtml # For more verbose tests use: # cd tests # ctest -V --progress --output-on-failure -j 4 # cd .. -popd +popd || exit 1 diff --git a/libsyclinterface/include/dpctl_sycl_enum_types.h b/libsyclinterface/include/dpctl_sycl_enum_types.h index 2a1da04ee1..4edcce45df 100644 --- a/libsyclinterface/include/dpctl_sycl_enum_types.h +++ b/libsyclinterface/include/dpctl_sycl_enum_types.h @@ -87,22 +87,18 @@ typedef enum */ typedef enum { - DPCTL_CHAR, - DPCTL_SIGNED_CHAR, - DPCTL_UNSIGNED_CHAR, - DPCTL_SHORT, - DPCTL_INT, - DPCTL_UNSIGNED_INT, - DPCTL_UNSIGNED_INT8, - DPCTL_LONG, - DPCTL_UNSIGNED_LONG, - DPCTL_LONG_LONG, - DPCTL_UNSIGNED_LONG_LONG, - DPCTL_SIZE_T, - DPCTL_FLOAT, - DPCTL_DOUBLE, - DPCTL_LONG_DOUBLE, - DPCTL_VOID_PTR + DPCTL_INT8_T, + DPCTL_UINT8_T, + DPCTL_INT16_T, + DPCTL_UINT16_T, + DPCTL_INT32_T, + DPCTL_UINT32_T, + DPCTL_INT64_T, + DPCTL_UINT64_T, + DPCTL_FLOAT32_T, + DPCTL_FLOAT64_T, + DPCTL_VOID_PTR, + DPCTL_UNSUPPORTED_KERNEL_ARG } DPCTLKernelArgType; /*! diff --git a/libsyclinterface/source/dpctl_sycl_queue_interface.cpp b/libsyclinterface/source/dpctl_sycl_queue_interface.cpp index 63c4720ff3..dc512fd126 100644 --- a/libsyclinterface/source/dpctl_sycl_queue_interface.cpp +++ b/libsyclinterface/source/dpctl_sycl_queue_interface.cpp @@ -51,6 +51,17 @@ typedef struct complex uint64_t imag; } complexNumber; +void set_dependent_events(handler &cgh, + __dpctl_keep const DPCTLSyclEventRef *DepEvents, + size_t NDepEvents) +{ + for (auto i = 0ul; i < NDepEvents; ++i) { + auto ei = unwrap(DepEvents[i]); + if (ei) + cgh.depends_on(*ei); + } +} + /*! * @brief Set the kernel arg object * @@ -65,51 +76,36 @@ bool set_kernel_arg(handler &cgh, bool arg_set = true; switch (ArgTy) { - case DPCTL_CHAR: - cgh.set_arg(idx, *(char *)Arg); - break; - case DPCTL_SIGNED_CHAR: - cgh.set_arg(idx, *(signed char *)Arg); - break; - case DPCTL_UNSIGNED_CHAR: - cgh.set_arg(idx, *(unsigned char *)Arg); + case DPCTL_INT8_T: + cgh.set_arg(idx, *(int8_t *)Arg); break; - case DPCTL_SHORT: - cgh.set_arg(idx, *(short *)Arg); - break; - case DPCTL_INT: - cgh.set_arg(idx, *(int *)Arg); - break; - case DPCTL_UNSIGNED_INT: - cgh.set_arg(idx, *(unsigned int *)Arg); - break; - case DPCTL_UNSIGNED_INT8: + case DPCTL_UINT8_T: cgh.set_arg(idx, *(uint8_t *)Arg); break; - case DPCTL_LONG: - cgh.set_arg(idx, *(long *)Arg); + case DPCTL_INT16_T: + cgh.set_arg(idx, *(int16_t *)Arg); break; - case DPCTL_UNSIGNED_LONG: - cgh.set_arg(idx, *(unsigned long *)Arg); + case DPCTL_UINT16_T: + cgh.set_arg(idx, *(uint16_t *)Arg); break; - case DPCTL_LONG_LONG: - cgh.set_arg(idx, *(long long *)Arg); + case DPCTL_INT32_T: + cgh.set_arg(idx, *(int32_t *)Arg); break; - case DPCTL_UNSIGNED_LONG_LONG: - cgh.set_arg(idx, *(unsigned long long *)Arg); + case DPCTL_UINT32_T: + cgh.set_arg(idx, *(uint32_t *)Arg); break; - case DPCTL_SIZE_T: - cgh.set_arg(idx, *(size_t *)Arg); + case DPCTL_INT64_T: + cgh.set_arg(idx, *(int64_t *)Arg); break; - case DPCTL_FLOAT: + case DPCTL_UINT64_T: + cgh.set_arg(idx, *(uint64_t *)Arg); + break; + case DPCTL_FLOAT32_T: cgh.set_arg(idx, *(float *)Arg); break; - case DPCTL_DOUBLE: + case DPCTL_FLOAT64_T: cgh.set_arg(idx, *(double *)Arg); break; - case DPCTL_LONG_DOUBLE: - cgh.set_arg(idx, *(long double *)Arg); - break; case DPCTL_VOID_PTR: cgh.set_arg(idx, Arg); break; @@ -122,6 +118,21 @@ bool set_kernel_arg(handler &cgh, return arg_set; } +void set_kernel_args(handler &cgh, + __dpctl_keep void **Args, + __dpctl_keep const DPCTLKernelArgType *ArgTypes, + size_t NArgs) +{ + for (auto i = 0ul; i < NArgs; ++i) { + if (!set_kernel_arg(cgh, i, Args[i], ArgTypes[i])) { + error_handler("Kernel argument could not be created.", __FILE__, + __func__, __LINE__); + throw std::invalid_argument( + "Kernel argument could not be created."); + } + } +} + std::unique_ptr create_property_list(int properties) { std::unique_ptr propList; @@ -356,39 +367,52 @@ DPCTLQueue_SubmitRange(__dpctl_keep const DPCTLSyclKernelRef KRef, event e; try { - e = Queue->submit([&](handler &cgh) { - // Depend on any event that was specified by the caller. - if (NDepEvents) - for (auto i = 0ul; i < NDepEvents; ++i) - cgh.depends_on(*unwrap(DepEvents[i])); - - for (auto i = 0ul; i < NArgs; ++i) { - // \todo add support for Sycl buffers - if (!set_kernel_arg(cgh, i, Args[i], ArgTypes[i])) - exit(1); - } - switch (NDims) { - case 1: + switch (NDims) { + case 1: + { + e = Queue->submit([&](handler &cgh) { + // Depend on any event that was specified by the caller. + set_dependent_events(cgh, DepEvents, NDepEvents); + set_kernel_args(cgh, Args, ArgTypes, NArgs); cgh.parallel_for(range<1>{Range[0]}, *Kernel); - break; - case 2: + }); + return wrap(new event(std::move(e))); + } + case 2: + { + e = Queue->submit([&](handler &cgh) { + // Depend on any event that was specified by the caller. + set_dependent_events(cgh, DepEvents, NDepEvents); + set_kernel_args(cgh, Args, ArgTypes, NArgs); cgh.parallel_for(range<2>{Range[0], Range[1]}, *Kernel); - break; - case 3: + }); + return wrap(new event(std::move(e))); + } + case 3: + { + e = Queue->submit([&](handler &cgh) { + // Depend on any event that was specified by the caller. + set_dependent_events(cgh, DepEvents, NDepEvents); + set_kernel_args(cgh, Args, ArgTypes, NArgs); cgh.parallel_for(range<3>{Range[0], Range[1], Range[2]}, *Kernel); - break; - default: - throw std::runtime_error("Range cannot be greater than three " - "dimensions."); - } - }); + }); + return wrap(new event(std::move(e))); + } + default: + error_handler("Range cannot be greater than three " + "dimensions.", + __FILE__, __func__, __LINE__, error_level::error); + return nullptr; + } } catch (std::exception const &e) { - error_handler(e, __FILE__, __func__, __LINE__); + error_handler(e, __FILE__, __func__, __LINE__, error_level::error); + return nullptr; + } catch (...) { + error_handler("Unknown exception encountered", __FILE__, __func__, + __LINE__, error_level::error); return nullptr; } - - return wrap(new event(std::move(e))); } __dpctl_give DPCTLSyclEventRef @@ -408,46 +432,56 @@ DPCTLQueue_SubmitNDRange(__dpctl_keep const DPCTLSyclKernelRef KRef, event e; try { - e = Queue->submit([&](handler &cgh) { - // Depend on any event that was specified by the caller. - if (DepEvents) - for (auto i = 0ul; i < NDepEvents; ++i) { - auto ei = unwrap(DepEvents[i]); - if (ei) - cgh.depends_on(*ei); - } - - for (auto i = 0ul; i < NArgs; ++i) { - // \todo add support for Sycl buffers - if (!set_kernel_arg(cgh, i, Args[i], ArgTypes[i])) - exit(1); - } - switch (NDims) { - case 1: + switch (NDims) { + case 1: + { + e = Queue->submit([&](handler &cgh) { + // Depend on any event that was specified by the caller. + set_dependent_events(cgh, DepEvents, NDepEvents); + set_kernel_args(cgh, Args, ArgTypes, NArgs); cgh.parallel_for(nd_range<1>{{gRange[0]}, {lRange[0]}}, *Kernel); - break; - case 2: + }); + return wrap(new event(std::move(e))); + } + case 2: + { + e = Queue->submit([&](handler &cgh) { + // Depend on any event that was specified by the caller. + set_dependent_events(cgh, DepEvents, NDepEvents); + set_kernel_args(cgh, Args, ArgTypes, NArgs); cgh.parallel_for( nd_range<2>{{gRange[0], gRange[1]}, {lRange[0], lRange[1]}}, *Kernel); - break; - case 3: + }); + return wrap(new event(std::move(e))); + } + case 3: + { + e = Queue->submit([&](handler &cgh) { + // Depend on any event that was specified by the caller. + set_dependent_events(cgh, DepEvents, NDepEvents); + set_kernel_args(cgh, Args, ArgTypes, NArgs); cgh.parallel_for(nd_range<3>{{gRange[0], gRange[1], gRange[2]}, {lRange[0], lRange[1], lRange[2]}}, *Kernel); - break; - default: - throw std::runtime_error("Range cannot be greater than three " - "dimensions."); - } - }); + }); + return wrap(new event(std::move(e))); + } + default: + error_handler("Range cannot be greater than three " + "dimensions.", + __FILE__, __func__, __LINE__, error_level::error); + return nullptr; + } } catch (std::exception const &e) { - error_handler(e, __FILE__, __func__, __LINE__); + error_handler(e, __FILE__, __func__, __LINE__, error_level::error); + return nullptr; + } catch (...) { + error_handler("Unknown exception encountered", __FILE__, __func__, + __LINE__, error_level::error); return nullptr; } - - return wrap(new event(std::move(e))); } void DPCTLQueue_Wait(__dpctl_keep DPCTLSyclQueueRef QRef) diff --git a/libsyclinterface/tests/CMakeLists.txt b/libsyclinterface/tests/CMakeLists.txt index 13f36c39d7..a0be739b2e 100644 --- a/libsyclinterface/tests/CMakeLists.txt +++ b/libsyclinterface/tests/CMakeLists.txt @@ -1,4 +1,5 @@ find_package(GTest REQUIRED) + # We need thread support for gtest find_package(Threads REQUIRED) @@ -16,7 +17,12 @@ include_directories( link_directories(${GTEST_LIB_DIR}) # Copy the spir-v input files to test build directory -set(spirv-test-files multi_kernel.spv) +set(spirv-test-files + multi_kernel.spv + oneD_range_kernel_inttys_fp32.spv + oneD_range_kernel_fp64.spv +) + foreach(tf ${spirv-test-files}) file(COPY ${tf} DESTINATION ${CMAKE_CURRENT_BINARY_DIR}) endforeach() @@ -32,38 +38,39 @@ add_executable(dpctl_c_api_tests ${sources} ) add_sycl_to_target( - TARGET dpctl_c_api_tests - SOURCES - ${CMAKE_CURRENT_SOURCE_DIR}/test_helper.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_context_interface.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_device_invalid_filters.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_device_subdevices.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_device_interface.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_device_manager.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_device_selector_interface.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_device_aspects.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_event_interface.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_platform_interface.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_kernel_interface.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_kernel_bundle_interface.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_platform_invalid_filters.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_queue_manager.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_queue_submit.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_queue_interface.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_usm_interface.cpp -) -if (_dpctl_sycl_targets) -# make fat binary -target_compile_options( - dpctl_c_api_tests - PRIVATE - -fsycl-targets=${_dpctl_sycl_targets} -) -target_link_options( - dpctl_c_api_tests - PRIVATE - -fsycl-targets=${_dpctl_sycl_targets} + TARGET dpctl_c_api_tests + SOURCES + ${CMAKE_CURRENT_SOURCE_DIR}/test_helper.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_context_interface.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_device_invalid_filters.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_device_subdevices.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_device_interface.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_device_manager.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_device_selector_interface.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_device_aspects.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_event_interface.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_platform_interface.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_kernel_interface.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_kernel_bundle_interface.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_platform_invalid_filters.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_queue_manager.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_queue_submit.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_queue_interface.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/test_sycl_usm_interface.cpp ) + +if(_dpctl_sycl_targets) + # make fat binary + target_compile_options( + dpctl_c_api_tests + PRIVATE + -fsycl-targets=${_dpctl_sycl_targets} + ) + target_link_options( + dpctl_c_api_tests + PRIVATE + -fsycl-targets=${_dpctl_sycl_targets} + ) endif() if(DPCTL_GENERATE_COVERAGE) @@ -82,21 +89,21 @@ if(DPCTL_GENERATE_COVERAGE) add_custom_target(llvm-cov COMMAND ${CMAKE_COMMAND} -E env DPCTL_VERBOSITY=warning ${CMAKE_CURRENT_BINARY_DIR}/dpctl_c_api_tests COMMAND ${LLVMProfdata_EXE} - merge - -sparse default.profraw - -o - dpctl.profdata + merge + -sparse default.profraw + -o + dpctl.profdata COMMAND ${LLVMCov_EXE} - export - -format=lcov - -ignore-filename-regex=/tmp/icpx* - -instr-profile=dpctl.profdata - "${object_arg}$,;${object_arg}>" - > dpctl.lcov + export + -format=lcov + -ignore-filename-regex=/tmp/icpx* + -instr-profile=dpctl.profdata + "${object_arg}$,;${object_arg}>" + > dpctl.lcov COMMAND ${LLVMCov_EXE} - report - -instr-profile=dpctl.profdata - "${object_arg}$,;${object_arg}>" + report + -instr-profile=dpctl.profdata + "${object_arg}$,;${object_arg}>" WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} COMMAND_EXPAND_LISTS DEPENDS dpctl_c_api_tests @@ -105,21 +112,21 @@ if(DPCTL_GENERATE_COVERAGE) add_custom_target(lcov-genhtml COMMAND ${CMAKE_COMMAND} -E env DPCTL_VERBOSITY=warning ${CMAKE_CURRENT_BINARY_DIR}/dpctl_c_api_tests COMMAND ${LLVMProfdata_EXE} - merge - -sparse default.profraw - -o - dpctl.profdata + merge + -sparse default.profraw + -o + dpctl.profdata COMMAND ${LLVMCov_EXE} - export - -format=lcov - -instr-profile=dpctl.profdata - "${object_arg}$,;${object_arg}>" - > dpctl.lcov + export + -format=lcov + -instr-profile=dpctl.profdata + "${object_arg}$,;${object_arg}>" + > dpctl.lcov COMMAND ${GENHTML_EXE} - ${CMAKE_CURRENT_BINARY_DIR}/dpctl.lcov - --no-source - --output-directory - ${COVERAGE_OUTPUT_DIR}/dpctl-c-api-coverage + ${CMAKE_CURRENT_BINARY_DIR}/dpctl.lcov + --no-source + --output-directory + ${COVERAGE_OUTPUT_DIR}/dpctl-c-api-coverage WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR} COMMAND_EXPAND_LISTS DEPENDS dpctl_c_api_tests diff --git a/libsyclinterface/tests/oneD_range_kernel_fp64.spv b/libsyclinterface/tests/oneD_range_kernel_fp64.spv new file mode 100644 index 0000000000..32f5438ff1 Binary files /dev/null and b/libsyclinterface/tests/oneD_range_kernel_fp64.spv differ diff --git a/libsyclinterface/tests/oneD_range_kernel_inttys_fp32.spv b/libsyclinterface/tests/oneD_range_kernel_inttys_fp32.spv new file mode 100644 index 0000000000..08cef17e04 Binary files /dev/null and b/libsyclinterface/tests/oneD_range_kernel_inttys_fp32.spv differ diff --git a/libsyclinterface/tests/test_sycl_queue_submit.cpp b/libsyclinterface/tests/test_sycl_queue_submit.cpp index 34f6f71099..cc9bc836ce 100644 --- a/libsyclinterface/tests/test_sycl_queue_submit.cpp +++ b/libsyclinterface/tests/test_sycl_queue_submit.cpp @@ -35,7 +35,9 @@ #include #include #include +#include #include +#include namespace { @@ -44,154 +46,317 @@ static_assert(SIZE % 8 == 0); using namespace dpctl::syclinterface; +template +void submit_kernel(DPCTLSyclQueueRef QRef, + DPCTLSyclKernelBundleRef KBRef, + std::vector spirvBuffer, + size_t spirvFileSize, + DPCTLKernelArgType kernelArgTy, + std::string kernelName) +{ + T scalarVal = 3; + constexpr size_t NARGS = 4; + constexpr size_t RANGE_NDIMS = 1; + + ASSERT_TRUE(DPCTLKernelBundle_HasKernel(KBRef, kernelName.c_str())); + auto kernel = DPCTLKernelBundle_GetKernel(KBRef, kernelName.c_str()); + + // Create the input args + auto a = DPCTLmalloc_shared(SIZE * sizeof(T), QRef); + ASSERT_TRUE(a != nullptr); + auto b = DPCTLmalloc_shared(SIZE * sizeof(T), QRef); + ASSERT_TRUE(b != nullptr); + auto c = DPCTLmalloc_shared(SIZE * sizeof(T), QRef); + ASSERT_TRUE(c != nullptr); + + // Create kernel args for vector_add + size_t Range[] = {SIZE}; + void *args[NARGS] = {unwrap(a), unwrap(b), unwrap(c), + (void *)&scalarVal}; + DPCTLKernelArgType addKernelArgTypes[] = {DPCTL_VOID_PTR, DPCTL_VOID_PTR, + DPCTL_VOID_PTR, kernelArgTy}; + auto ERef = DPCTLQueue_SubmitRange(kernel, QRef, args, addKernelArgTypes, + NARGS, Range, RANGE_NDIMS, nullptr, 0); + ASSERT_TRUE(ERef != nullptr); + DPCTLQueue_Wait(QRef); + + // clean ups + DPCTLEvent_Delete(ERef); + DPCTLKernel_Delete(kernel); + DPCTLfree_with_queue((DPCTLSyclUSMRef)a, QRef); + DPCTLfree_with_queue((DPCTLSyclUSMRef)b, QRef); + DPCTLfree_with_queue((DPCTLSyclUSMRef)c, QRef); +} + } /* end of anonymous namespace */ +/* +// The oneD_range_kernel spv files were generated from the SYCL program included +// in this comment. The program can be compiled using +// `icpx -fsycl oneD_range_kernel.cpp`. After that if the generated executable +// is run with the environment variable `SYCL_DUMP_IMAGES=1`, icpx runtime +// will dump all offload sections of fat binary to the current working +// directory. When tested with DPC++ 2024.0 the kernels are split across two +// separate SPV files. One contains all kernels for integers and FP32 +// data type, and another contains the kernel for FP64. +// +// Note that, `SYCL_DUMP_IMAGES=1` will also generate extra SPV files that +// contain the code for built in functions such as indexing and barriers. To +// figure which SPV file contains the kernels, use `spirv-dis` from the +// spirv-tools package to translate the SPV binary format to a human-readable +// textual format. + +#include +#include +#include + +template +class Range1DKernel +{ +private: + T *a_ = nullptr; + T *b_ = nullptr; + T *c_ = nullptr; + T scalarVal_; + +public: + RangeKernel(T *a, T *b, T *c, T scalarVal) + : a_(a), b_(b), c_(c), scalarVal_(scalarVal) + { + } + + void operator()(sycl::item<1> it) const + { + auto i = it.get_id(); + a_[i] = i + 1; + b_[i] = i + 2; + c_[i] = scalarVal_ * (a_[i] + b_[i]); + } +}; + +template +void submit_kernel( + sycl::queue q, + const unsigned long N, + T *a, + T *b, + T *c, + T scalarVal) +{ + // clang-format off + q.submit([&](auto &h) { + h.parallel_for(sycl::range(N), RangeKernel(a, b, c, scalarVal)); + }); + // clang-format on +} + +template +void driver(size_t N) +{ + sycl::queue q; + auto *a = sycl::malloc_shared(N, q); + auto *b = sycl::malloc_shared(N, q); + auto *c = sycl::malloc_shared(N, q); + T scalarVal = 3; + + submit_kernel(q, N, a, b, c, scalarVal); + q.wait(); + + std::cout << "C[0] : " << (size_t)c[0] << " " << std::endl; + sycl::free(a, q); +} + +int main(int argc, const char **argv) +{ + size_t N = 0; + std::cout << "Enter problem size in N:\n"; + std::cin >> N; + std::cout << "Executing with N = " << N << std::endl; + + driver(N); + driver(N); + driver(N); + driver(N); + driver(N); + driver(N); + driver(N); + driver(N); + driver(N); + driver(N); + + return 0; +} +*/ + struct TestQueueSubmit : public ::testing::Test { std::ifstream spirvFile; - size_t spirvFileSize; - std::vector spirvBuffer; + size_t spirvFileSize_; + std::vector spirvBuffer_; + DPCTLSyclQueueRef QRef = nullptr; + DPCTLSyclKernelBundleRef KBRef = nullptr; TestQueueSubmit() { - spirvFile.open("./multi_kernel.spv", std::ios::binary | std::ios::ate); - spirvFileSize = std::filesystem::file_size("./multi_kernel.spv"); - spirvBuffer.reserve(spirvFileSize); + DPCTLSyclDeviceSelectorRef DSRef = nullptr; + DPCTLSyclDeviceRef DRef = nullptr; + + spirvFile.open("./oneD_range_kernel_inttys_fp32.spv", + std::ios::binary | std::ios::ate); + spirvFileSize_ = + std::filesystem::file_size("./oneD_range_kernel_inttys_fp32.spv"); + spirvBuffer_.reserve(spirvFileSize_); spirvFile.seekg(0, std::ios::beg); - spirvFile.read(spirvBuffer.data(), spirvFileSize); + spirvFile.read(spirvBuffer_.data(), spirvFileSize_); + + DSRef = DPCTLDefaultSelector_Create(); + DRef = DPCTLDevice_CreateFromSelector(DSRef); + QRef = + DPCTLQueue_CreateForDevice(DRef, nullptr, DPCTL_DEFAULT_PROPERTY); + auto CRef = DPCTLQueue_GetContext(QRef); + + KBRef = DPCTLKernelBundle_CreateFromSpirv( + CRef, DRef, spirvBuffer_.data(), spirvFileSize_, nullptr); + DPCTLDevice_Delete(DRef); + DPCTLDeviceSelector_Delete(DSRef); } ~TestQueueSubmit() { spirvFile.close(); + DPCTLQueue_Delete(QRef); + DPCTLKernelBundle_Delete(KBRef); } }; -TEST_F(TestQueueSubmit, CheckSubmitRange_saxpy) -{ - DPCTLSyclDeviceSelectorRef DSRef = nullptr; - DPCTLSyclDeviceRef DRef = nullptr; - - EXPECT_NO_FATAL_FAILURE(DSRef = DPCTLDefaultSelector_Create()); - EXPECT_NO_FATAL_FAILURE(DRef = DPCTLDevice_CreateFromSelector(DSRef)); - DPCTLDeviceMgr_PrintDeviceInfo(DRef); - ASSERT_TRUE(DRef); - auto QRef = - DPCTLQueue_CreateForDevice(DRef, nullptr, DPCTL_DEFAULT_PROPERTY); - ASSERT_TRUE(QRef); - auto CRef = DPCTLQueue_GetContext(QRef); - ASSERT_TRUE(CRef); - auto KBRef = DPCTLKernelBundle_CreateFromSpirv( - CRef, DRef, spirvBuffer.data(), spirvFileSize, nullptr); - ASSERT_TRUE(KBRef != nullptr); - ASSERT_TRUE(DPCTLKernelBundle_HasKernel(KBRef, "axpy")); - auto AxpyKernel = DPCTLKernelBundle_GetKernel(KBRef, "axpy"); +struct TestQueueSubmitFP64 : public ::testing::Test +{ + std::ifstream spirvFile; + size_t spirvFileSize_; + std::vector spirvBuffer_; + DPCTLSyclQueueRef QRef = nullptr; + DPCTLSyclKernelBundleRef KBRef = nullptr; - // Create the input args - auto a = DPCTLmalloc_shared(SIZE * sizeof(float), QRef); - ASSERT_TRUE(a != nullptr); - auto b = DPCTLmalloc_shared(SIZE * sizeof(float), QRef); - ASSERT_TRUE(b != nullptr); - auto c = DPCTLmalloc_shared(SIZE * sizeof(float), QRef); - ASSERT_TRUE(c != nullptr); + TestQueueSubmitFP64() + { + DPCTLSyclDeviceSelectorRef DSRef = nullptr; + DPCTLSyclDeviceRef DRef = nullptr; + + spirvFile.open("./oneD_range_kernel_fp64.spv", + std::ios::binary | std::ios::ate); + spirvFileSize_ = + std::filesystem::file_size("./oneD_range_kernel_fp64.spv"); + spirvBuffer_.reserve(spirvFileSize_); + spirvFile.seekg(0, std::ios::beg); + spirvFile.read(spirvBuffer_.data(), spirvFileSize_); + DSRef = DPCTLDefaultSelector_Create(); + DRef = DPCTLDevice_CreateFromSelector(DSRef); + QRef = + DPCTLQueue_CreateForDevice(DRef, nullptr, DPCTL_DEFAULT_PROPERTY); + auto CRef = DPCTLQueue_GetContext(QRef); - auto a_ptr = reinterpret_cast(unwrap(a)); - auto b_ptr = reinterpret_cast(unwrap(b)); - // Initialize a,b - for (auto i = 0ul; i < SIZE; ++i) { - a_ptr[i] = i + 1.0; - b_ptr[i] = i + 2.0; + KBRef = DPCTLKernelBundle_CreateFromSpirv( + CRef, DRef, spirvBuffer_.data(), spirvFileSize_, nullptr); + DPCTLDevice_Delete(DRef); + DPCTLDeviceSelector_Delete(DSRef); } - // Create kernel args for axpy - float d = 10.0; - size_t Range[] = {SIZE}; - void *args2[4] = {unwrap(a), unwrap(b), unwrap(c), - (void *)&d}; - DPCTLKernelArgType addKernelArgTypes[] = {DPCTL_VOID_PTR, DPCTL_VOID_PTR, - DPCTL_VOID_PTR, DPCTL_FLOAT}; - auto ERef = DPCTLQueue_SubmitRange( - AxpyKernel, QRef, args2, addKernelArgTypes, 4, Range, 1, nullptr, 0); - ASSERT_TRUE(ERef != nullptr); - DPCTLQueue_Wait(QRef); + ~TestQueueSubmitFP64() + { + spirvFile.close(); + DPCTLQueue_Delete(QRef); + DPCTLKernelBundle_Delete(KBRef); + } +}; - // clean ups - DPCTLEvent_Delete(ERef); - DPCTLKernel_Delete(AxpyKernel); - DPCTLfree_with_queue((DPCTLSyclUSMRef)a, QRef); - DPCTLfree_with_queue((DPCTLSyclUSMRef)b, QRef); - DPCTLfree_with_queue((DPCTLSyclUSMRef)c, QRef); - DPCTLQueue_Delete(QRef); - DPCTLContext_Delete(CRef); - DPCTLKernelBundle_Delete(KBRef); - DPCTLDevice_Delete(DRef); - DPCTLDeviceSelector_Delete(DSRef); -} - -TEST_F(TestQueueSubmit, CheckSubmitNDRange_saxpy) -{ - DPCTLSyclDeviceSelectorRef DSRef = nullptr; - DPCTLSyclDeviceRef DRef = nullptr; - - EXPECT_NO_FATAL_FAILURE(DSRef = DPCTLDefaultSelector_Create()); - EXPECT_NO_FATAL_FAILURE(DRef = DPCTLDevice_CreateFromSelector(DSRef)); - DPCTLDeviceMgr_PrintDeviceInfo(DRef); - ASSERT_TRUE(DRef); - auto QRef = - DPCTLQueue_CreateForDevice(DRef, nullptr, DPCTL_DEFAULT_PROPERTY); - ASSERT_TRUE(QRef); - auto CRef = DPCTLQueue_GetContext(QRef); - ASSERT_TRUE(CRef); - auto KBRef = DPCTLKernelBundle_CreateFromSpirv( - CRef, DRef, spirvBuffer.data(), spirvFileSize, nullptr); - ASSERT_TRUE(KBRef != nullptr); - ASSERT_TRUE(DPCTLKernelBundle_HasKernel(KBRef, "axpy")); - auto AxpyKernel = DPCTLKernelBundle_GetKernel(KBRef, "axpy"); +TEST_F(TestQueueSubmit, CheckForInt8) +{ + submit_kernel(QRef, KBRef, spirvBuffer_, spirvFileSize_, + DPCTLKernelArgType::DPCTL_INT8_T, + "_ZTS11RangeKernelIaE"); +} - // Create the input args - auto a = DPCTLmalloc_shared(SIZE * sizeof(float), QRef); - ASSERT_TRUE(a != nullptr); - auto b = DPCTLmalloc_shared(SIZE * sizeof(float), QRef); - ASSERT_TRUE(b != nullptr); - auto c = DPCTLmalloc_shared(SIZE * sizeof(float), QRef); - ASSERT_TRUE(c != nullptr); +TEST_F(TestQueueSubmit, CheckForUInt8) +{ + submit_kernel(QRef, KBRef, spirvBuffer_, spirvFileSize_, + DPCTLKernelArgType::DPCTL_UINT8_T, + "_ZTS11RangeKernelIhE"); +} - auto a_ptr = reinterpret_cast(unwrap(a)); - auto b_ptr = reinterpret_cast(unwrap(b)); - // Initialize a,b - for (auto i = 0ul; i < SIZE; ++i) { - a_ptr[i] = i + 1.0; - b_ptr[i] = i + 2.0; - } +TEST_F(TestQueueSubmit, CheckForInt16) +{ + submit_kernel(QRef, KBRef, spirvBuffer_, spirvFileSize_, + DPCTLKernelArgType::DPCTL_INT16_T, + "_ZTS11RangeKernelIsE"); +} - // Create kernel args for axpy - float d = 10.0; - size_t gRange[] = {1, 1, SIZE}; - size_t lRange[] = {1, 1, 8}; - void *args2[4] = {unwrap(a), unwrap(b), unwrap(c), - (void *)&d}; - DPCTLKernelArgType addKernelArgTypes[] = {DPCTL_VOID_PTR, DPCTL_VOID_PTR, - DPCTL_VOID_PTR, DPCTL_FLOAT}; - DPCTLSyclEventRef events[1]; - events[0] = DPCTLEvent_Create(); +TEST_F(TestQueueSubmit, CheckForUInt16) +{ + submit_kernel(QRef, KBRef, spirvBuffer_, spirvFileSize_, + DPCTLKernelArgType::DPCTL_UINT16_T, + "_ZTS11RangeKernelItE"); +} - auto ERef = - DPCTLQueue_SubmitNDRange(AxpyKernel, QRef, args2, addKernelArgTypes, 4, - gRange, lRange, 3, events, 1); - ASSERT_TRUE(ERef != nullptr); - DPCTLQueue_Wait(QRef); +TEST_F(TestQueueSubmit, CheckForInt32) +{ + submit_kernel(QRef, KBRef, spirvBuffer_, spirvFileSize_, + DPCTLKernelArgType::DPCTL_INT32_T, + "_ZTS11RangeKernelIiE"); +} - // clean ups - DPCTLEvent_Delete(ERef); - DPCTLKernel_Delete(AxpyKernel); - DPCTLfree_with_queue((DPCTLSyclUSMRef)a, QRef); - DPCTLfree_with_queue((DPCTLSyclUSMRef)b, QRef); - DPCTLfree_with_queue((DPCTLSyclUSMRef)c, QRef); - DPCTLQueue_Delete(QRef); - DPCTLContext_Delete(CRef); - DPCTLKernelBundle_Delete(KBRef); - DPCTLDevice_Delete(DRef); - DPCTLDeviceSelector_Delete(DSRef); +TEST_F(TestQueueSubmit, CheckForUInt32) +{ + submit_kernel(QRef, KBRef, spirvBuffer_, spirvFileSize_, + DPCTLKernelArgType::DPCTL_UINT32_T, + "_ZTS11RangeKernelIjE"); +} + +TEST_F(TestQueueSubmit, CheckForInt64) +{ + submit_kernel(QRef, KBRef, spirvBuffer_, spirvFileSize_, + DPCTLKernelArgType::DPCTL_INT64_T, + "_ZTS11RangeKernelIlE"); +} + +TEST_F(TestQueueSubmit, CheckForUInt64) +{ + submit_kernel(QRef, KBRef, spirvBuffer_, spirvFileSize_, + DPCTLKernelArgType::DPCTL_UINT64_T, + "_ZTS11RangeKernelImE"); +} + +TEST_F(TestQueueSubmit, CheckForFloat) +{ + submit_kernel(QRef, KBRef, spirvBuffer_, spirvFileSize_, + DPCTLKernelArgType::DPCTL_FLOAT32_T, + "_ZTS11RangeKernelIfE"); +} + +TEST_F(TestQueueSubmitFP64, CheckForDouble) +{ + submit_kernel(QRef, KBRef, spirvBuffer_, spirvFileSize_, + DPCTLKernelArgType::DPCTL_FLOAT64_T, + "_ZTS11RangeKernelIdE"); +} + +TEST_F(TestQueueSubmit, CheckForUnsupportedArgTy) +{ + + int scalarVal = 3; + size_t Range[] = {SIZE}; + size_t RANGE_NDIMS = 1; + constexpr size_t NARGS = 4; + + auto kernel = DPCTLKernelBundle_GetKernel(KBRef, "_ZTS11RangeKernelIdE"); + void *args[NARGS] = {unwrap(nullptr), unwrap(nullptr), + unwrap(nullptr), (void *)&scalarVal}; + DPCTLKernelArgType addKernelArgTypes[] = {DPCTL_VOID_PTR, DPCTL_VOID_PTR, + DPCTL_VOID_PTR, + DPCTL_UNSUPPORTED_KERNEL_ARG}; + auto ERef = DPCTLQueue_SubmitRange(kernel, QRef, args, addKernelArgTypes, + NARGS, Range, RANGE_NDIMS, nullptr, 0); + + ASSERT_TRUE(ERef == nullptr); } struct TestQueueSubmitBarrier : public ::testing::Test