From fc6914bfdd5d90f04784ad7799b81efd342fd269 Mon Sep 17 00:00:00 2001 From: Cory Levels Date: Mon, 29 Jun 2020 12:47:55 -0700 Subject: [PATCH 01/13] Adding a test case for reqd_work_size using L0 backend. Resolved issues test case discovered. Signed-off-by: Cory Levels --- sycl/plugins/level_zero/pi_level0.cpp | 4 +- .../detail/error_handling/enqueue_kernel.cpp | 38 +++++- .../test/basic_tests/reqd_work_group_size.cpp | 109 ++++++++++++++++++ 3 files changed, 147 insertions(+), 4 deletions(-) create mode 100644 sycl/test/basic_tests/reqd_work_group_size.cpp diff --git a/sycl/plugins/level_zero/pi_level0.cpp b/sycl/plugins/level_zero/pi_level0.cpp index dc4ccfd988ca9..d5157c7028af5 100644 --- a/sycl/plugins/level_zero/pi_level0.cpp +++ b/sycl/plugins/level_zero/pi_level0.cpp @@ -259,7 +259,9 @@ static pi_result mapError(ze_result_t ZeResult) { {ZE_RESULT_ERROR_INVALID_NATIVE_BINARY, PI_INVALID_BINARY}, {ZE_RESULT_ERROR_INVALID_KERNEL_NAME, PI_INVALID_KERNEL_NAME}, {ZE_RESULT_ERROR_INVALID_FUNCTION_NAME, PI_BUILD_PROGRAM_FAILURE}, - {ZE_RESULT_ERROR_OVERLAPPING_REGIONS, PI_INVALID_OPERATION}}; + {ZE_RESULT_ERROR_OVERLAPPING_REGIONS, PI_INVALID_OPERATION}, + {ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION, + PI_INVALID_WORK_GROUP_SIZE}}; auto It = ErrorMapping.find(ZeResult); if (It == ErrorMapping.end()) { return PI_ERROR_UNKNOWN; diff --git a/sycl/source/detail/error_handling/enqueue_kernel.cpp b/sycl/source/detail/error_handling/enqueue_kernel.cpp index 1438e66a1e80b..499146574b35e 100644 --- a/sycl/source/detail/error_handling/enqueue_kernel.cpp +++ b/sycl/source/detail/error_handling/enqueue_kernel.cpp @@ -21,6 +21,35 @@ namespace detail { namespace enqueue_kernel_launch { +bool L0HandleInvalidWorkGroupSize(const device_impl &DeviceImpl, + pi_kernel Kernel, const NDRDescT &NDRDesc) { + + const plugin &Plugin = DeviceImpl.getPlugin(); + RT::PiDevice Device = DeviceImpl.getHandleRef(); + + size_t CompileWGSize[3] = {0}; + Plugin.call( + Kernel, Device, PI_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE, + sizeof(size_t) * 3, CompileWGSize, nullptr); + + if (CompileWGSize[0] != 0) { + // PI_INVALID_WORK_GROUP_SIZE if local_work_size is specified and does not + // match the required work-group size for kernel in the program source. + if (NDRDesc.LocalSize[0] != CompileWGSize[0] || + NDRDesc.LocalSize[1] != CompileWGSize[1] || + NDRDesc.LocalSize[2] != CompileWGSize[2]) + throw sycl::nd_range_error( + "Specified local size doesn't match the required work-group size " + "specified in the program source", + PI_INVALID_WORK_GROUP_SIZE); + } + + // Fallback + constexpr pi_result Error = PI_INVALID_WORK_GROUP_SIZE; + throw runtime_error( + "OpenCL API failed. OpenCL API returns: " + codeToString(Error), Error); +} + bool oclHandleInvalidWorkGroupSize(const device_impl &DeviceImpl, pi_kernel Kernel, const NDRDescT &NDRDesc) { const bool HasLocalSize = (NDRDesc.LocalSize[0] != 0); @@ -86,7 +115,7 @@ bool oclHandleInvalidWorkGroupSize(const device_impl &DeviceImpl, std::to_string(MaxWGSize), PI_INVALID_WORK_GROUP_SIZE); } else { - // OpenCL 2.x: + // RELEVENT // OpenCL 2.x: // PI_INVALID_WORK_GROUP_SIZE if local_work_size is specified and the // total number of work-items in the work-group computed as // local_work_size[0] * ... * local_work_size[work_dim – 1] is greater @@ -228,6 +257,8 @@ bool handleInvalidWorkGroupSize(const device_impl &DeviceImpl, pi_kernel Kernel, DeviceImpl.get_platform().get_info(); if (PlatformName.find("OpenCL") != std::string::npos) { return oclHandleInvalidWorkGroupSize(DeviceImpl, Kernel, NDRDesc); + } else if (PlatformName.find("Level-Zero") != std::string::npos) { + return L0HandleInvalidWorkGroupSize(DeviceImpl, Kernel, NDRDesc); } // Fallback @@ -308,8 +339,9 @@ bool handleError(pi_result Error, const device_impl &DeviceImpl, // TODO: Handle other error codes default: - throw runtime_error( - "OpenCL API failed. OpenCL API returns: " + codeToString(Error), Error); + throw runtime_error("OpenCL API failed2. OpenCL API returns: " + + codeToString(Error), + Error); } } diff --git a/sycl/test/basic_tests/reqd_work_group_size.cpp b/sycl/test/basic_tests/reqd_work_group_size.cpp new file mode 100644 index 0000000000000..77926c7cbec74 --- /dev/null +++ b/sycl/test/basic_tests/reqd_work_group_size.cpp @@ -0,0 +1,109 @@ +// XFAIL: opencl +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + +#include + +#include + +using namespace cl::sycl; + +int main() { + auto AsyncHandler = [](exception_list ES) { + for (auto &E : ES) { + std::rethrow_exception(E); + } + }; + + queue Q(AsyncHandler); + device D(Q.get_device()); + + string_class DeviceVendorName = D.get_info(); + auto DeviceType = D.get_info(); + + // parallel_for, (16, 16, 16) global, (8, 8, 8) local, reqd_wg_size(4, 4, 4) + // -> fail + try { + Q.submit([&](handler &CGH) { + CGH.parallel_for( + nd_range<3>(range<3>(16, 16, 16), range<3>(8, 8, 8)), + [=](nd_item<3>) [[intel::reqd_work_group_size(4, 4, 4)]]{ + + }); + }); + Q.wait_and_throw(); + std::cerr << "Test case ReqdWGSizeNegativeA failed: no exception has been " + "thrown\n"; + return 1; // We shouldn't be here, exception is expected + } catch (nd_range_error &E) { + if (string_class(E.what()).find( + "Specified local size doesn't match the required work-group size " + "specified in the program source") == string_class::npos) { + std::cerr + << "Test case ReqdWGSizeNegativeA failed 1: unexpected exception: " + << E.what() << std::endl; + return 1; + } + } catch (runtime_error &E) { + std::cerr << "Test case ReqdWGSizeNegativeA failed 2: unexpected exception: " + << E.what() << std::endl; + return 1; + } catch (...) { + std::cerr << "Test case ReqdWGSizeNegativeA failed: something unexpected " + "has been caught" + << std::endl; + return 1; + } + + // Positive test-cases that should pass on any underlying OpenCL runtime + // parallel_for, (8, 8, 8) global, (4, 4, 4) local, reqd_wg_size(4, 4, 4) -> + // pass + try { + Q.submit([&](handler &CGH) { + CGH.parallel_for( + nd_range<3>(range<3>(8, 8, 8), range<3>(4, 4, 4)), + [=](nd_item<3>) [[intel::reqd_work_group_size(4, 4, 4)]]{}); + }); + Q.wait_and_throw(); + } catch (nd_range_error &E) { + std::cerr << "Test case ReqdWGSizePositiveA failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } catch (runtime_error &E) { + std::cerr << "Test case ReqdWGSizePositiveA failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } catch (...) { + std::cerr << "Test case ReqdWGSizePositiveA failed: something unexpected " + "has been caught" + << std::endl; + return 1; + } + + try { + Q.submit([&](handler &CGH) { + CGH.parallel_for( + range<3>(16, 16, 16), [=](item<3>) [[intel::reqd_work_group_size(4, 4, 4)]]{}); + }); + Q.wait_and_throw(); + + } catch (nd_range_error &E) { + std::cerr << "Test case ReqdWGSizePositiveB failed 1: unexpected exception: " + << E.what() << std::endl; + return 1; + } catch (runtime_error &E) { + std::cerr + << "Test case ReqdWGSizePositiveB failed 2: unexpected exception: " + << E.what() << std::endl; + return 1; + } catch (...) { + std::cerr << "Test case ReqdWGSizePositiveB failed: something unexpected " + "has been caught" + << std::endl; + return 1; + } + + return 0; +} From 2a3296eaf99198312bf2c19295785f5176c57687 Mon Sep 17 00:00:00 2001 From: Cory Levels Date: Thu, 2 Jul 2020 09:42:57 -0700 Subject: [PATCH 02/13] Inline HandleInvalidWorkGroupSize function, formatting, use get_backend() method --- sycl/plugins/level_zero/pi_level0.cpp | 2 +- .../detail/error_handling/enqueue_kernel.cpp | 333 ++++++++---------- 2 files changed, 149 insertions(+), 186 deletions(-) diff --git a/sycl/plugins/level_zero/pi_level0.cpp b/sycl/plugins/level_zero/pi_level0.cpp index 48908e8985d44..87479d8b86cab 100644 --- a/sycl/plugins/level_zero/pi_level0.cpp +++ b/sycl/plugins/level_zero/pi_level0.cpp @@ -262,7 +262,7 @@ static pi_result mapError(ze_result_t ZeResult) { {ZE_RESULT_ERROR_INVALID_FUNCTION_NAME, PI_BUILD_PROGRAM_FAILURE}, {ZE_RESULT_ERROR_OVERLAPPING_REGIONS, PI_INVALID_OPERATION}, {ZE_RESULT_ERROR_INVALID_GROUP_SIZE_DIMENSION, - PI_INVALID_WORK_GROUP_SIZE}}; + PI_INVALID_WORK_GROUP_SIZE}, {ZE_RESULT_ERROR_MODULE_BUILD_FAILURE, PI_BUILD_PROGRAM_FAILURE}}; auto It = ErrorMapping.find(ZeResult); if (It == ErrorMapping.end()) { diff --git a/sycl/source/detail/error_handling/enqueue_kernel.cpp b/sycl/source/detail/error_handling/enqueue_kernel.cpp index 499146574b35e..ccb8efa632825 100644 --- a/sycl/source/detail/error_handling/enqueue_kernel.cpp +++ b/sycl/source/detail/error_handling/enqueue_kernel.cpp @@ -12,6 +12,7 @@ #include "error_handling.hpp" +#include #include #include @@ -21,47 +22,40 @@ namespace detail { namespace enqueue_kernel_launch { -bool L0HandleInvalidWorkGroupSize(const device_impl &DeviceImpl, - pi_kernel Kernel, const NDRDescT &NDRDesc) { +bool handleInvalidWorkGroupSize(const device_impl &DeviceImpl, pi_kernel Kernel, + const NDRDescT &NDRDesc) { + const bool HasLocalSize = (NDRDesc.LocalSize[0] != 0); const plugin &Plugin = DeviceImpl.getPlugin(); RT::PiDevice Device = DeviceImpl.getHandleRef(); + // std::string Platform = DeviceImpl.get_platform(); + cl::sycl::platform Platform = DeviceImpl.get_platform(); + if (HasLocalSize) { + size_t MaxThreadsPerBlock[3] = {}; + Plugin.call( + Device, PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES, sizeof(MaxThreadsPerBlock), + MaxThreadsPerBlock, nullptr); - size_t CompileWGSize[3] = {0}; - Plugin.call( - Kernel, Device, PI_KERNEL_GROUP_INFO_COMPILE_WORK_GROUP_SIZE, - sizeof(size_t) * 3, CompileWGSize, nullptr); - - if (CompileWGSize[0] != 0) { - // PI_INVALID_WORK_GROUP_SIZE if local_work_size is specified and does not - // match the required work-group size for kernel in the program source. - if (NDRDesc.LocalSize[0] != CompileWGSize[0] || - NDRDesc.LocalSize[1] != CompileWGSize[1] || - NDRDesc.LocalSize[2] != CompileWGSize[2]) - throw sycl::nd_range_error( - "Specified local size doesn't match the required work-group size " - "specified in the program source", - PI_INVALID_WORK_GROUP_SIZE); + for (size_t I = 0; I < 3; ++I) { + if (MaxThreadsPerBlock[I] < NDRDesc.LocalSize[I]) { + throw sycl::nd_range_error( + "The number of work-items in each dimension of a work-group cannot " + "exceed info::device::max_work_item_sizes which is {" + + std::to_string(MaxThreadsPerBlock[0]) + ", " + + std::to_string(MaxThreadsPerBlock[1]) + ", " + + std::to_string(MaxThreadsPerBlock[2]) + "} for this device", + PI_INVALID_WORK_GROUP_SIZE); + } + } } - // Fallback - constexpr pi_result Error = PI_INVALID_WORK_GROUP_SIZE; - throw runtime_error( - "OpenCL API failed. OpenCL API returns: " + codeToString(Error), Error); -} - -bool oclHandleInvalidWorkGroupSize(const device_impl &DeviceImpl, - pi_kernel Kernel, const NDRDescT &NDRDesc) { - const bool HasLocalSize = (NDRDesc.LocalSize[0] != 0); - - const plugin &Plugin = DeviceImpl.getPlugin(); - RT::PiDevice Device = DeviceImpl.getHandleRef(); - size_t VerSize = 0; Plugin.call(Device, PI_DEVICE_INFO_VERSION, 0, nullptr, &VerSize); - assert(VerSize >= 10 && - "Unexpected device version string"); // strlen("OpenCL X.Y") + if (Platform.get_backend() == cl::sycl::backend::opencl) { + assert(VerSize >= 10 && + "Unexpected device version string"); // strlen("OpenCL X.Y") + } string_class VerStr(VerSize, '\0'); Plugin.call(Device, PI_DEVICE_INFO_VERSION, VerSize, &VerStr.front(), nullptr); @@ -77,12 +71,13 @@ bool oclHandleInvalidWorkGroupSize(const device_impl &DeviceImpl, // PI_INVALID_WORK_GROUP_SIZE if local_work_size is NULL and the // reqd_work_group_size attribute is used to declare the work-group size // for kernel in the program source. - if (!HasLocalSize && (Ver[0] == '1' || (Ver[0] == '2' && Ver[2] == '0'))) - throw sycl::nd_range_error( - "OpenCL 1.x and 2.0 requires to pass local size argument even if " - "required work-group size was specified in the program source", - PI_INVALID_WORK_GROUP_SIZE); - + if (Platform.get_backend() == cl::sycl::backend::opencl) { + if (!HasLocalSize && (Ver[0] == '1' || (Ver[0] == '2' && Ver[2] == '0'))) + throw sycl::nd_range_error( + "OpenCL 1.x and 2.0 requires to pass local size argument even if " + "required work-group size was specified in the program source", + PI_INVALID_WORK_GROUP_SIZE); + } // Any OpenCL version: // PI_INVALID_WORK_GROUP_SIZE if local_work_size is specified and does not // match the required work-group size for kernel in the program source. @@ -90,49 +85,50 @@ bool oclHandleInvalidWorkGroupSize(const device_impl &DeviceImpl, NDRDesc.LocalSize[1] != CompileWGSize[1] || NDRDesc.LocalSize[2] != CompileWGSize[2]) throw sycl::nd_range_error( - "Specified local size doesn't match the required work-group size " + "2Specified local size doesn't match the required work-group size " "specified in the program source", PI_INVALID_WORK_GROUP_SIZE); } - - if (Ver[0] == '1') { - // OpenCL 1.x: - // PI_INVALID_WORK_GROUP_SIZE if local_work_size is specified and the - // total number of work-items in the work-group computed as - // local_work_size[0] * ... * local_work_size[work_dim – 1] is greater - // than the value specified by PI_DEVICE_MAX_WORK_GROUP_SIZE in - // table 4.3 - size_t MaxWGSize = 0; - Plugin.call( - Device, PI_DEVICE_INFO_MAX_WORK_GROUP_SIZE, sizeof(size_t), &MaxWGSize, - nullptr); - const size_t TotalNumberOfWIs = - NDRDesc.LocalSize[0] * NDRDesc.LocalSize[1] * NDRDesc.LocalSize[2]; - if (TotalNumberOfWIs > MaxWGSize) - throw sycl::nd_range_error( - "Total number of work-items in a work-group cannot exceed " - "info::device::max_work_group_size which is equal to " + - std::to_string(MaxWGSize), - PI_INVALID_WORK_GROUP_SIZE); - } else { - // RELEVENT // OpenCL 2.x: - // PI_INVALID_WORK_GROUP_SIZE if local_work_size is specified and the - // total number of work-items in the work-group computed as - // local_work_size[0] * ... * local_work_size[work_dim – 1] is greater - // than the value specified by PI_KERNEL_GROUP_INFO_WORK_GROUP_SIZE in - // table 5.21. - size_t KernelWGSize = 0; - Plugin.call( - Kernel, Device, PI_KERNEL_GROUP_INFO_WORK_GROUP_SIZE, sizeof(size_t), - &KernelWGSize, nullptr); - const size_t TotalNumberOfWIs = - NDRDesc.LocalSize[0] * NDRDesc.LocalSize[1] * NDRDesc.LocalSize[2]; - if (TotalNumberOfWIs > KernelWGSize) - throw sycl::nd_range_error( - "Total number of work-items in a work-group cannot exceed " - "info::kernel_work_group::work_group_size which is equal to " + - std::to_string(KernelWGSize) + " for this kernel", - PI_INVALID_WORK_GROUP_SIZE); + if (Platform.get_backend() == cl::sycl::backend::opencl) { + if (Ver[0] == '1') { + // OpenCL 1.x: + // PI_INVALID_WORK_GROUP_SIZE if local_work_size is specified and the + // total number of work-items in the work-group computed as + // local_work_size[0] * ... * local_work_size[work_dim – 1] is greater + // than the value specified by PI_DEVICE_MAX_WORK_GROUP_SIZE in + // table 4.3 + size_t MaxWGSize = 0; + Plugin.call( + Device, PI_DEVICE_INFO_MAX_WORK_GROUP_SIZE, sizeof(size_t), + &MaxWGSize, nullptr); + const size_t TotalNumberOfWIs = + NDRDesc.LocalSize[0] * NDRDesc.LocalSize[1] * NDRDesc.LocalSize[2]; + if (TotalNumberOfWIs > MaxWGSize) + throw sycl::nd_range_error( + "Total number of work-items in a work-group cannot exceed " + "info::device::max_work_group_size which is equal to " + + std::to_string(MaxWGSize), + PI_INVALID_WORK_GROUP_SIZE); + } else { + // RELEVENT // OpenCL 2.x: + // PI_INVALID_WORK_GROUP_SIZE if local_work_size is specified and the + // total number of work-items in the work-group computed as + // local_work_size[0] * ... * local_work_size[work_dim – 1] is greater + // than the value specified by PI_KERNEL_GROUP_INFO_WORK_GROUP_SIZE in + // table 5.21. + size_t KernelWGSize = 0; + Plugin.call( + Kernel, Device, PI_KERNEL_GROUP_INFO_WORK_GROUP_SIZE, sizeof(size_t), + &KernelWGSize, nullptr); + const size_t TotalNumberOfWIs = + NDRDesc.LocalSize[0] * NDRDesc.LocalSize[1] * NDRDesc.LocalSize[2]; + if (TotalNumberOfWIs > KernelWGSize) + throw sycl::nd_range_error( + "Total number of work-items in a work-group cannot exceed " + "info::kernel_work_group::work_group_size which is equal to " + + std::to_string(KernelWGSize) + " for this kernel", + PI_INVALID_WORK_GROUP_SIZE); + } } if (HasLocalSize) { @@ -146,121 +142,88 @@ bool oclHandleInvalidWorkGroupSize(const device_impl &DeviceImpl, NDRDesc.GlobalSize[2] % NDRDesc.LocalSize[2] != 0); // Is the local size of the workgroup greater than the global range size in // any dimension? This is a sub-case of NonUniformWGs. - const bool LocalExceedsGlobal = - NonUniformWGs && (NDRDesc.LocalSize[0] > NDRDesc.GlobalSize[0] || - NDRDesc.LocalSize[1] > NDRDesc.GlobalSize[1] || - NDRDesc.LocalSize[2] > NDRDesc.GlobalSize[2]); - - if (NonUniformWGs) { - if (Ver[0] == '1') { - // OpenCL 1.x: - // PI_INVALID_WORK_GROUP_SIZE if local_work_size is specified and - // number of workitems specified by global_work_size is not evenly - // divisible by size of work-group given by local_work_size - if (LocalExceedsGlobal) - throw sycl::nd_range_error("Local workgroup size cannot be greater " - "than global range in any dimension", - PI_INVALID_WORK_GROUP_SIZE); - else - throw sycl::nd_range_error( - "Global_work_size must be evenly divisible by local_work_size. " - "Non-uniform work-groups are not supported by the target device", - PI_INVALID_WORK_GROUP_SIZE); - } else { - // OpenCL 2.x: - // PI_INVALID_WORK_GROUP_SIZE if the program was compiled with - // –cl-uniform-work-group-size and the number of work-items specified - // by global_work_size is not evenly divisible by size of work-group - // given by local_work_size - - pi_program Program = nullptr; - Plugin.call(Kernel, PI_KERNEL_INFO_PROGRAM, - sizeof(pi_program), &Program, - nullptr); - size_t OptsSize = 0; - Plugin.call( - Program, Device, PI_PROGRAM_BUILD_INFO_OPTIONS, 0, nullptr, - &OptsSize); - string_class Opts(OptsSize, '\0'); - Plugin.call( - Program, Device, PI_PROGRAM_BUILD_INFO_OPTIONS, OptsSize, - &Opts.front(), nullptr); - const bool HasStd20 = Opts.find("-cl-std=CL2.0") != string_class::npos; - const bool RequiresUniformWGSize = - Opts.find("-cl-uniform-work-group-size") != string_class::npos; - std::string message = - LocalExceedsGlobal - ? "Local workgroup size greater than global range size. " - : "Global_work_size not evenly divisible by local_work_size. "; - if (!HasStd20) - throw sycl::nd_range_error( - message.append("Non-uniform work-groups are not allowed by " - "default. Underlying " - "OpenCL 2.x implementation supports this feature " - "and to enable " - "it, build device program with -cl-std=CL2.0"), - PI_INVALID_WORK_GROUP_SIZE); - else if (RequiresUniformWGSize) - throw sycl::nd_range_error( - message.append( - "Non-uniform work-groups are not allowed by when " - "-cl-uniform-work-group-size flag is used. Underlying " - "OpenCL 2.x implementation supports this feature, but it is " - "being " - "disabled by -cl-uniform-work-group-size build flag"), - PI_INVALID_WORK_GROUP_SIZE); - // else unknown. fallback (below) + if (Platform.get_backend() == cl::sycl::backend::opencl) { + const bool LocalExceedsGlobal = + NonUniformWGs && (NDRDesc.LocalSize[0] > NDRDesc.GlobalSize[0] || + NDRDesc.LocalSize[1] > NDRDesc.GlobalSize[1] || + NDRDesc.LocalSize[2] > NDRDesc.GlobalSize[2]); + + if (NonUniformWGs) { + if (Ver[0] == '1') { + // OpenCL 1.x: + // PI_INVALID_WORK_GROUP_SIZE if local_work_size is specified and + // number of workitems specified by global_work_size is not evenly + // divisible by size of work-group given by local_work_size + if (LocalExceedsGlobal) + throw sycl::nd_range_error("Local workgroup size cannot be greater " + "than global range in any dimension", + PI_INVALID_WORK_GROUP_SIZE); + else + throw sycl::nd_range_error( + "Global_work_size must be evenly divisible by local_work_size. " + "Non-uniform work-groups are not supported by the target " + "device", + PI_INVALID_WORK_GROUP_SIZE); + } else { + // OpenCL 2.x: + // PI_INVALID_WORK_GROUP_SIZE if the program was compiled with + // –cl-uniform-work-group-size and the number of work-items specified + // by global_work_size is not evenly divisible by size of work-group + // given by local_work_size + + pi_program Program = nullptr; + Plugin.call( + Kernel, PI_KERNEL_INFO_PROGRAM, sizeof(pi_program), &Program, + nullptr); + size_t OptsSize = 0; + Plugin.call( + Program, Device, PI_PROGRAM_BUILD_INFO_OPTIONS, 0, nullptr, + &OptsSize); + string_class Opts(OptsSize, '\0'); + Plugin.call( + Program, Device, PI_PROGRAM_BUILD_INFO_OPTIONS, OptsSize, + &Opts.front(), nullptr); + const bool HasStd20 = + Opts.find("-cl-std=CL2.0") != string_class::npos; + const bool RequiresUniformWGSize = + Opts.find("-cl-uniform-work-group-size") != string_class::npos; + std::string message = + LocalExceedsGlobal + ? "Local workgroup size greater than global range size. " + : "Global_work_size not evenly divisible by " + "local_work_size. "; + if (!HasStd20) + throw sycl::nd_range_error( + message.append( + "Non-uniform work-groups are not allowed by " + "default. Underlying " + "OpenCL 2.x implementation supports this feature " + "and to enable " + "it, build device program with -cl-std=CL2.0"), + PI_INVALID_WORK_GROUP_SIZE); + else if (RequiresUniformWGSize) + throw sycl::nd_range_error( + message.append( + "Non-uniform work-groups are not allowed by when " + "-cl-uniform-work-group-size flag is used. Underlying " + "OpenCL 2.x implementation supports this feature, but it " + "is " + "being " + "disabled by -cl-uniform-work-group-size build flag"), + PI_INVALID_WORK_GROUP_SIZE); + // else unknown. fallback (below) + } } } + throw sycl::nd_range_error( + "Non-uniform work-groups are not supported by the target device", + PI_INVALID_WORK_GROUP_SIZE); } - // TODO: required number of sub-groups, OpenCL 2.1: // PI_INVALID_WORK_GROUP_SIZE if local_work_size is specified and is not // consistent with the required number of sub-groups for kernel in the // program source. - // Fallback - constexpr pi_result Error = PI_INVALID_WORK_GROUP_SIZE; - throw runtime_error( - "OpenCL API failed. OpenCL API returns: " + codeToString(Error), Error); -} - -bool handleInvalidWorkGroupSize(const device_impl &DeviceImpl, pi_kernel Kernel, - const NDRDescT &NDRDesc) { - const bool HasLocalSize = (NDRDesc.LocalSize[0] != 0); - - const plugin &Plugin = DeviceImpl.getPlugin(); - RT::PiDevice Device = DeviceImpl.getHandleRef(); - - if (HasLocalSize) { - size_t MaxThreadsPerBlock[3] = {}; - Plugin.call( - Device, PI_DEVICE_INFO_MAX_WORK_ITEM_SIZES, sizeof(MaxThreadsPerBlock), - MaxThreadsPerBlock, nullptr); - - for (size_t I = 0; I < 3; ++I) { - if (MaxThreadsPerBlock[I] < NDRDesc.LocalSize[I]) { - throw sycl::nd_range_error( - "The number of work-items in each dimension of a work-group cannot " - "exceed info::device::max_work_item_sizes which is {" + - std::to_string(MaxThreadsPerBlock[0]) + ", " + - std::to_string(MaxThreadsPerBlock[1]) + ", " + - std::to_string(MaxThreadsPerBlock[2]) + "} for this device", - PI_INVALID_WORK_GROUP_SIZE); - } - } - } - - // Backend specific invalid work group size handing - // TODO: Find a better way to determine the backend - std::string PlatformName = - DeviceImpl.get_platform().get_info(); - if (PlatformName.find("OpenCL") != std::string::npos) { - return oclHandleInvalidWorkGroupSize(DeviceImpl, Kernel, NDRDesc); - } else if (PlatformName.find("Level-Zero") != std::string::npos) { - return L0HandleInvalidWorkGroupSize(DeviceImpl, Kernel, NDRDesc); - } - // Fallback constexpr pi_result Error = PI_INVALID_WORK_GROUP_SIZE; throw runtime_error( From af50f436c1288d00a87bb19a37c1dc3415fa22e9 Mon Sep 17 00:00:00 2001 From: Cory Levels Date: Thu, 2 Jul 2020 09:50:05 -0700 Subject: [PATCH 03/13] Remove old comment. --- sycl/source/detail/error_handling/enqueue_kernel.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/source/detail/error_handling/enqueue_kernel.cpp b/sycl/source/detail/error_handling/enqueue_kernel.cpp index ccb8efa632825..4162617b48a2a 100644 --- a/sycl/source/detail/error_handling/enqueue_kernel.cpp +++ b/sycl/source/detail/error_handling/enqueue_kernel.cpp @@ -28,8 +28,8 @@ bool handleInvalidWorkGroupSize(const device_impl &DeviceImpl, pi_kernel Kernel, const plugin &Plugin = DeviceImpl.getPlugin(); RT::PiDevice Device = DeviceImpl.getHandleRef(); - // std::string Platform = DeviceImpl.get_platform(); cl::sycl::platform Platform = DeviceImpl.get_platform(); + if (HasLocalSize) { size_t MaxThreadsPerBlock[3] = {}; Plugin.call( From 14673ff0327eb9cc39ccc97cfa982f438eef1e0e Mon Sep 17 00:00:00 2001 From: Cory Levels Date: Thu, 2 Jul 2020 11:38:25 -0700 Subject: [PATCH 04/13] mark xfail with cuda. --- sycl/test/basic_tests/reqd_work_group_size.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/test/basic_tests/reqd_work_group_size.cpp b/sycl/test/basic_tests/reqd_work_group_size.cpp index 77926c7cbec74..0f6ba6383d8ea 100644 --- a/sycl/test/basic_tests/reqd_work_group_size.cpp +++ b/sycl/test/basic_tests/reqd_work_group_size.cpp @@ -1,4 +1,4 @@ -// XFAIL: opencl +// XFAIL: cuda || opencl // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out From fbb06558c1de62e3c6620cc22ef2b6795e7e6705 Mon Sep 17 00:00:00 2001 From: clevels <59889830+clevels@users.noreply.github.com> Date: Fri, 3 Jul 2020 09:09:32 -0500 Subject: [PATCH 05/13] Update sycl/source/detail/error_handling/enqueue_kernel.cpp Co-authored-by: Alexey Bader --- sycl/source/detail/error_handling/enqueue_kernel.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/source/detail/error_handling/enqueue_kernel.cpp b/sycl/source/detail/error_handling/enqueue_kernel.cpp index 4162617b48a2a..533f5861c02d2 100644 --- a/sycl/source/detail/error_handling/enqueue_kernel.cpp +++ b/sycl/source/detail/error_handling/enqueue_kernel.cpp @@ -40,7 +40,7 @@ bool handleInvalidWorkGroupSize(const device_impl &DeviceImpl, pi_kernel Kernel, if (MaxThreadsPerBlock[I] < NDRDesc.LocalSize[I]) { throw sycl::nd_range_error( "The number of work-items in each dimension of a work-group cannot " - "exceed info::device::max_work_item_sizes which is {" + + "exceed {" + std::to_string(MaxThreadsPerBlock[0]) + ", " + std::to_string(MaxThreadsPerBlock[1]) + ", " + std::to_string(MaxThreadsPerBlock[2]) + "} for this device", From 9182228e5d3a253db3d474ca8a4b69615ca59daa Mon Sep 17 00:00:00 2001 From: clevels <59889830+clevels@users.noreply.github.com> Date: Fri, 3 Jul 2020 09:10:05 -0500 Subject: [PATCH 06/13] Update sycl/source/detail/error_handling/enqueue_kernel.cpp Co-authored-by: Alexey Bader --- sycl/source/detail/error_handling/enqueue_kernel.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/sycl/source/detail/error_handling/enqueue_kernel.cpp b/sycl/source/detail/error_handling/enqueue_kernel.cpp index 533f5861c02d2..7427a77b85471 100644 --- a/sycl/source/detail/error_handling/enqueue_kernel.cpp +++ b/sycl/source/detail/error_handling/enqueue_kernel.cpp @@ -106,7 +106,6 @@ bool handleInvalidWorkGroupSize(const device_impl &DeviceImpl, pi_kernel Kernel, if (TotalNumberOfWIs > MaxWGSize) throw sycl::nd_range_error( "Total number of work-items in a work-group cannot exceed " - "info::device::max_work_group_size which is equal to " + std::to_string(MaxWGSize), PI_INVALID_WORK_GROUP_SIZE); } else { From 12487eddd12d8374ed07c090fb5045b87d6812cb Mon Sep 17 00:00:00 2001 From: clevels <59889830+clevels@users.noreply.github.com> Date: Fri, 3 Jul 2020 09:10:15 -0500 Subject: [PATCH 07/13] Update sycl/source/detail/error_handling/enqueue_kernel.cpp Co-authored-by: Alexey Bader --- sycl/source/detail/error_handling/enqueue_kernel.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/sycl/source/detail/error_handling/enqueue_kernel.cpp b/sycl/source/detail/error_handling/enqueue_kernel.cpp index 7427a77b85471..5c61832d74f31 100644 --- a/sycl/source/detail/error_handling/enqueue_kernel.cpp +++ b/sycl/source/detail/error_handling/enqueue_kernel.cpp @@ -124,7 +124,6 @@ bool handleInvalidWorkGroupSize(const device_impl &DeviceImpl, pi_kernel Kernel, if (TotalNumberOfWIs > KernelWGSize) throw sycl::nd_range_error( "Total number of work-items in a work-group cannot exceed " - "info::kernel_work_group::work_group_size which is equal to " + std::to_string(KernelWGSize) + " for this kernel", PI_INVALID_WORK_GROUP_SIZE); } From b9deaf3f93b148b7786924eb1c366ee50bbbdd6c Mon Sep 17 00:00:00 2001 From: clevels <59889830+clevels@users.noreply.github.com> Date: Fri, 3 Jul 2020 09:10:30 -0500 Subject: [PATCH 08/13] Update sycl/source/detail/error_handling/enqueue_kernel.cpp Co-authored-by: Alexey Bader --- sycl/source/detail/error_handling/enqueue_kernel.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/source/detail/error_handling/enqueue_kernel.cpp b/sycl/source/detail/error_handling/enqueue_kernel.cpp index 5c61832d74f31..ac9ccc6ec47eb 100644 --- a/sycl/source/detail/error_handling/enqueue_kernel.cpp +++ b/sycl/source/detail/error_handling/enqueue_kernel.cpp @@ -139,7 +139,7 @@ bool handleInvalidWorkGroupSize(const device_impl &DeviceImpl, pi_kernel Kernel, (NDRDesc.LocalSize[2] != 0 && NDRDesc.GlobalSize[2] % NDRDesc.LocalSize[2] != 0); // Is the local size of the workgroup greater than the global range size in - // any dimension? This is a sub-case of NonUniformWGs. + // any dimension? if (Platform.get_backend() == cl::sycl::backend::opencl) { const bool LocalExceedsGlobal = NonUniformWGs && (NDRDesc.LocalSize[0] > NDRDesc.GlobalSize[0] || From 33e41cf33ebd7164e5081fddb7415744b9fd751e Mon Sep 17 00:00:00 2001 From: clevels <59889830+clevels@users.noreply.github.com> Date: Fri, 3 Jul 2020 09:10:38 -0500 Subject: [PATCH 09/13] Update sycl/source/detail/error_handling/enqueue_kernel.cpp Co-authored-by: Alexey Bader --- sycl/source/detail/error_handling/enqueue_kernel.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/source/detail/error_handling/enqueue_kernel.cpp b/sycl/source/detail/error_handling/enqueue_kernel.cpp index ac9ccc6ec47eb..307f3739f610d 100644 --- a/sycl/source/detail/error_handling/enqueue_kernel.cpp +++ b/sycl/source/detail/error_handling/enqueue_kernel.cpp @@ -300,7 +300,7 @@ bool handleError(pi_result Error, const device_impl &DeviceImpl, // TODO: Handle other error codes default: - throw runtime_error("OpenCL API failed2. OpenCL API returns: " + + throw runtime_error("OpenCL API failed. OpenCL API returns: " + codeToString(Error), Error); } From d5e7a22e5f4894e32a6458c80ea339c320bcb799 Mon Sep 17 00:00:00 2001 From: clevels <59889830+clevels@users.noreply.github.com> Date: Fri, 3 Jul 2020 09:31:49 -0500 Subject: [PATCH 10/13] Update sycl/source/detail/error_handling/enqueue_kernel.cpp Co-authored-by: smaslov-intel <48694368+smaslov-intel@users.noreply.github.com> --- sycl/source/detail/error_handling/enqueue_kernel.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/source/detail/error_handling/enqueue_kernel.cpp b/sycl/source/detail/error_handling/enqueue_kernel.cpp index 307f3739f610d..f09b7f109def8 100644 --- a/sycl/source/detail/error_handling/enqueue_kernel.cpp +++ b/sycl/source/detail/error_handling/enqueue_kernel.cpp @@ -85,7 +85,7 @@ bool handleInvalidWorkGroupSize(const device_impl &DeviceImpl, pi_kernel Kernel, NDRDesc.LocalSize[1] != CompileWGSize[1] || NDRDesc.LocalSize[2] != CompileWGSize[2]) throw sycl::nd_range_error( - "2Specified local size doesn't match the required work-group size " + "Specified local size doesn't match the required work-group size " "specified in the program source", PI_INVALID_WORK_GROUP_SIZE); } From 361fe858bf8e56a6f23a829e6f9ee5695923ffff Mon Sep 17 00:00:00 2001 From: Cory Levels Date: Fri, 3 Jul 2020 07:34:33 -0700 Subject: [PATCH 11/13] Address comments. --- sycl/source/detail/error_handling/enqueue_kernel.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/source/detail/error_handling/enqueue_kernel.cpp b/sycl/source/detail/error_handling/enqueue_kernel.cpp index f09b7f109def8..7bd0fee42e14e 100644 --- a/sycl/source/detail/error_handling/enqueue_kernel.cpp +++ b/sycl/source/detail/error_handling/enqueue_kernel.cpp @@ -105,7 +105,7 @@ bool handleInvalidWorkGroupSize(const device_impl &DeviceImpl, pi_kernel Kernel, NDRDesc.LocalSize[0] * NDRDesc.LocalSize[1] * NDRDesc.LocalSize[2]; if (TotalNumberOfWIs > MaxWGSize) throw sycl::nd_range_error( - "Total number of work-items in a work-group cannot exceed " + "Total number of work-items in a work-group cannot exceed " + std::to_string(MaxWGSize), PI_INVALID_WORK_GROUP_SIZE); } else { @@ -123,7 +123,7 @@ bool handleInvalidWorkGroupSize(const device_impl &DeviceImpl, pi_kernel Kernel, NDRDesc.LocalSize[0] * NDRDesc.LocalSize[1] * NDRDesc.LocalSize[2]; if (TotalNumberOfWIs > KernelWGSize) throw sycl::nd_range_error( - "Total number of work-items in a work-group cannot exceed " + "Total number of work-items in a work-group cannot exceed " + std::to_string(KernelWGSize) + " for this kernel", PI_INVALID_WORK_GROUP_SIZE); } From c593556c6fc63f390f379178117b0504ffbdf36e Mon Sep 17 00:00:00 2001 From: Cory Levels Date: Fri, 3 Jul 2020 07:39:36 -0700 Subject: [PATCH 12/13] Fix formatting. --- sycl/source/detail/error_handling/enqueue_kernel.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sycl/source/detail/error_handling/enqueue_kernel.cpp b/sycl/source/detail/error_handling/enqueue_kernel.cpp index 7bd0fee42e14e..b97f9738e81b1 100644 --- a/sycl/source/detail/error_handling/enqueue_kernel.cpp +++ b/sycl/source/detail/error_handling/enqueue_kernel.cpp @@ -300,9 +300,8 @@ bool handleError(pi_result Error, const device_impl &DeviceImpl, // TODO: Handle other error codes default: - throw runtime_error("OpenCL API failed. OpenCL API returns: " + - codeToString(Error), - Error); + throw runtime_error( + "OpenCL API failed. OpenCL API returns: " + codeToString(Error), Error); } } From e2c142fa30b5c24b3e15d62dd39b5c7c03f0a7df Mon Sep 17 00:00:00 2001 From: Cory Levels Date: Fri, 3 Jul 2020 09:32:34 -0700 Subject: [PATCH 13/13] Update error message in parallel_for_range.cpp --- sycl/test/basic_tests/parallel_for_range.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/sycl/test/basic_tests/parallel_for_range.cpp b/sycl/test/basic_tests/parallel_for_range.cpp index d5ed05868d1c3..3031d3d30f388 100644 --- a/sycl/test/basic_tests/parallel_for_range.cpp +++ b/sycl/test/basic_tests/parallel_for_range.cpp @@ -300,8 +300,7 @@ int main() { return 1; // We shouldn't be here, exception is expected } catch (nd_range_error &E) { if (string_class(E.what()).find( - "Total number of work-items in a work-group cannot exceed " - "info::device::max_work_group_size which is equal to " + + "Total number of work-items in a work-group cannot exceed " + std::to_string(MaxDeviceWGSize)) == string_class::npos) { std::cerr << "Test case OpenCL1XNegativeC failed: unexpected exception: " @@ -349,8 +348,7 @@ int main() { return 1; // We shouldn't be here, exception is expected } catch (nd_range_error &E) { if (string_class(E.what()).find( - "Total number of work-items in a work-group cannot exceed " - "info::kernel_work_group::work_group_size which is equal to " + + "Total number of work-items in a work-group cannot exceed " + std::to_string(MaxKernelWGSize) + " for this kernel") == string_class::npos) { std::cerr