diff --git a/sycl/include/CL/sycl/detail/cg.hpp b/sycl/include/CL/sycl/detail/cg.hpp index 8155af774f8c5..60c641a2d944c 100644 --- a/sycl/include/CL/sycl/detail/cg.hpp +++ b/sycl/include/CL/sycl/detail/cg.hpp @@ -237,7 +237,7 @@ class HostKernel : public HostKernelBase { for (int I = 0; I < Dims; ++I) { if (NDRDesc.LocalSize[I] == 0 || NDRDesc.GlobalSize[I] % NDRDesc.LocalSize[I] != 0) - throw sycl::runtime_error("Invalid local size for global size"); + throw sycl::nd_range_error("Invalid local size for global size"); GroupSize[I] = NDRDesc.GlobalSize[I] / NDRDesc.LocalSize[I]; } @@ -278,7 +278,7 @@ class HostKernel : public HostKernelBase { for (int I = 0; I < Dims; ++I) { if (NDRDesc.LocalSize[I] == 0 || NDRDesc.GlobalSize[I] % NDRDesc.LocalSize[I] != 0) - throw sycl::runtime_error("Invalid local size for global size"); + throw sycl::nd_range_error("Invalid local size for global size"); NGroups[I] = NDRDesc.GlobalSize[I] / NDRDesc.LocalSize[I]; } @@ -385,15 +385,6 @@ class CGExecKernel : public CG { MStreams(std::move(Streams)) { assert((getType() == RUN_ON_HOST_INTEL || getType() == KERNEL) && "Wrong type of exec kernel CG."); - - if (MNDRDesc.LocalSize.size() > 0) { - range<3> Excess = (MNDRDesc.GlobalSize % MNDRDesc.LocalSize); - for (int I = 0; I < 3; I++) { - if (Excess[I] != 0) - throw nd_range_error("Global size is not a multiple of local size", - CL_INVALID_WORK_GROUP_SIZE); - } - } } std::vector getArguments() const { return MArgs; } diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h index 6358967a49a74..951cffe2688b4 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/include/CL/sycl/detail/pi.h @@ -62,7 +62,8 @@ typedef enum { PI_INVALID_DEVICE = CL_INVALID_DEVICE, PI_INVALID_BINARY = CL_INVALID_BINARY, PI_MISALIGNED_SUB_BUFFER_OFFSET = CL_MISALIGNED_SUB_BUFFER_OFFSET, - PI_OUT_OF_HOST_MEMORY = CL_OUT_OF_HOST_MEMORY + PI_OUT_OF_HOST_MEMORY = CL_OUT_OF_HOST_MEMORY, + PI_INVALID_WORK_GROUP_SIZE = CL_INVALID_WORK_GROUP_SIZE } _pi_result; typedef enum { @@ -88,7 +89,9 @@ typedef enum { PI_DEVICE_INFO_PARENT = CL_DEVICE_PARENT_DEVICE, PI_DEVICE_INFO_PLATFORM = CL_DEVICE_PLATFORM, PI_DEVICE_INFO_PARTITION_TYPE = CL_DEVICE_PARTITION_TYPE, - PI_DEVICE_INFO_NAME = CL_DEVICE_NAME + PI_DEVICE_INFO_NAME = CL_DEVICE_NAME, + PI_DEVICE_VERSION = CL_DEVICE_VERSION, + PI_DEVICE_MAX_WORK_GROUP_SIZE = CL_DEVICE_MAX_WORK_GROUP_SIZE } _pi_device_info; // TODO: populate diff --git a/sycl/source/CMakeLists.txt b/sycl/source/CMakeLists.txt index dfd5bac3df8ad..d79f2d6797fa8 100644 --- a/sycl/source/CMakeLists.txt +++ b/sycl/source/CMakeLists.txt @@ -46,6 +46,7 @@ set(SYCL_SOURCES "detail/context_impl.cpp" "detail/device_impl.cpp" "detail/device_info.cpp" + "detail/error_handling/enqueue_kernel.cpp" "detail/event_impl.cpp" "detail/force_device.cpp" "detail/helpers.cpp" diff --git a/sycl/source/detail/error_handling/enqueue_kernel.cpp b/sycl/source/detail/error_handling/enqueue_kernel.cpp new file mode 100644 index 0000000000000..4c4e8c4b8217c --- /dev/null +++ b/sycl/source/detail/error_handling/enqueue_kernel.cpp @@ -0,0 +1,183 @@ +//===------------------- enqueue_kernel.cpp ---------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// SYCL error handling of enqueue kernel operations +// +//===----------------------------------------------------------------------===// + +#include "error_handling.hpp" + +#include + +namespace cl { +namespace sycl { +namespace detail { + +namespace enqueue_kernel_launch { + +bool handleInvalidWorkGroupSize(pi_device Device, pi_kernel Kernel, + const NDRDescT &NDRDesc) { + const bool HasLocalSize = (NDRDesc.LocalSize[0] != 0); + + size_t VerSize = 0; + PI_CALL(piDeviceGetInfo)(Device, PI_DEVICE_VERSION, 0, nullptr, &VerSize); + assert(VerSize >= 10 && + "Unexpected device version string"); // strlen("OpenCL X.Y") + string_class VerStr(VerSize, '\0'); + PI_CALL(piDeviceGetInfo)(Device, PI_DEVICE_VERSION, VerSize, &VerStr.front(), + nullptr); + const char *Ver = &VerStr[7]; // strlen("OpenCL ") + + size_t CompileWGSize[3] = {0}; + PI_CALL(piKernelGetGroupInfo)(Kernel, Device, + CL_KERNEL_COMPILE_WORK_GROUP_SIZE, + sizeof(size_t) * 3, CompileWGSize, nullptr); + + if (CompileWGSize[0] != 0) { + // OpenCL 1.x && 2.0: + // CL_INVALID_WORK_GROUP_SIZE if local_work_size is NULL and the + // reqd_work_group_size attribute is used to declare the work-group size + // for kernel in the program source. + if (!HasLocalSize && (Ver[0] == '1' || (Ver[0] == '2' && Ver[2] == '0'))) + throw sycl::nd_range_error( + "OpenCL 1.x and 2.0 requires to pass local size argument even if " + "required work-group size was specified in the program source", + PI_INVALID_WORK_GROUP_SIZE); + + // Any OpenCL version: + // CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and does not + // match the required work-group size for kernel in the program source. + if (NDRDesc.LocalSize[0] != CompileWGSize[0] || + NDRDesc.LocalSize[1] != CompileWGSize[1] || + NDRDesc.LocalSize[2] != CompileWGSize[2]) + throw sycl::nd_range_error( + "Specified local size doesn't match the required work-group size " + "specified in the program source", + PI_INVALID_WORK_GROUP_SIZE); + } + + if (Ver[0] == '1') { + // OpenCL 1.x: + // CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and the + // total number of work-items in the work-group computed as + // local_work_size[0] * ... * local_work_size[work_dim – 1] is greater + // than the value specified by CL_DEVICE_MAX_WORK_GROUP_SIZE in + // table 4.3 + size_t MaxWGSize = 0; + PI_CALL(piDeviceGetInfo)(Device, PI_DEVICE_MAX_WORK_GROUP_SIZE, + sizeof(size_t), &MaxWGSize, nullptr); + const size_t TotalNumberOfWIs = + NDRDesc.LocalSize[0] * NDRDesc.LocalSize[1] * NDRDesc.LocalSize[2]; + if (TotalNumberOfWIs > MaxWGSize) + throw sycl::nd_range_error( + "Total number of work-items in a work-group cannot exceed " + "info::device::max_work_group_size which is equal to " + + std::to_string(MaxWGSize), + PI_INVALID_WORK_GROUP_SIZE); + } else { + // OpenCL 2.x: + // CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and the + // total number of work-items in the work-group computed as + // local_work_size[0] * ... * local_work_size[work_dim – 1] is greater + // than the value specified by CL_KERNEL_WORK_GROUP_SIZE in table 5.21. + size_t KernelWGSize = 0; + PI_CALL(piKernelGetGroupInfo)(Kernel, Device, CL_KERNEL_WORK_GROUP_SIZE, + sizeof(size_t), &KernelWGSize, nullptr); + const size_t TotalNumberOfWIs = + NDRDesc.LocalSize[0] * NDRDesc.LocalSize[1] * NDRDesc.LocalSize[2]; + if (TotalNumberOfWIs > KernelWGSize) + throw sycl::nd_range_error( + "Total number of work-items in a work-group cannot exceed " + "info::kernel_work_group::work_group_size which is equal to " + + std::to_string(KernelWGSize) + " for this kernel", + PI_INVALID_WORK_GROUP_SIZE); + } + + if (HasLocalSize) { + const bool NonUniformWGs = + (NDRDesc.LocalSize[0] != 0 && + NDRDesc.GlobalSize[0] % NDRDesc.LocalSize[0] != 0) || + (NDRDesc.LocalSize[1] != 0 && + NDRDesc.GlobalSize[1] % NDRDesc.LocalSize[1] != 0) || + (NDRDesc.LocalSize[2] != 0 && + NDRDesc.GlobalSize[2] % NDRDesc.LocalSize[2] != 0); + + if (Ver[0] == '1') { + // OpenCL 1.x: + // CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and + // number of workitems specified by global_work_size is not evenly + // divisible by size of work-group given by local_work_size + + if (NonUniformWGs) + throw sycl::nd_range_error( + "Non-uniform work-groups are not supported by the target device", + PI_INVALID_WORK_GROUP_SIZE); + } else { + // OpenCL 2.x: + // CL_INVALID_WORK_GROUP_SIZE if the program was compiled with + // –cl-uniform-work-group-size and the number of work-items specified + // by global_work_size is not evenly divisible by size of work-group + // given by local_work_size + + pi_program Program = nullptr; + PI_CALL(piKernelGetInfo)(Kernel, CL_KERNEL_PROGRAM, sizeof(pi_program), + &Program, nullptr); + size_t OptsSize = 0; + PI_CALL(piProgramGetBuildInfo)(Program, Device, CL_PROGRAM_BUILD_OPTIONS, + 0, nullptr, &OptsSize); + string_class Opts(OptsSize, '\0'); + PI_CALL(piProgramGetBuildInfo)(Program, Device, CL_PROGRAM_BUILD_OPTIONS, + OptsSize, &Opts.front(), nullptr); + if (NonUniformWGs) { + const bool HasStd20 = Opts.find("-cl-std=CL2.0") != string_class::npos; + if (!HasStd20) + throw sycl::nd_range_error( + "Non-uniform work-groups are not allowed by default. Underlying " + "OpenCL 2.x implementation supports this feature and to enable " + "it, build device program with -cl-std=CL2.0", + PI_INVALID_WORK_GROUP_SIZE); + else + throw sycl::nd_range_error( + "Non-uniform work-groups are not allowed by default. Underlying " + "OpenCL 2.x implementation supports this feature, but it is " + "disabled by -cl-uniform-work-group-size build flag", + PI_INVALID_WORK_GROUP_SIZE); + } + } + } + + // TODO: required number of sub-groups, OpenCL 2.1: + // CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and is not + // consistent with the required number of sub-groups for kernel in the + // program source. + + // Fallback + constexpr pi_result Error = PI_INVALID_WORK_GROUP_SIZE; + throw runtime_error( + "OpenCL API failed. OpenCL API returns: " + codeToString(Error), Error); +} + +bool handleError(pi_result Error, pi_device Device, pi_kernel Kernel, + const NDRDescT &NDRDesc) { + assert(Error != PI_SUCCESS && + "Success is expected to be handled on caller side"); + switch (Error) { + case PI_INVALID_WORK_GROUP_SIZE: + return handleInvalidWorkGroupSize(Device, Kernel, NDRDesc); + // TODO: Handle other error codes + default: + throw runtime_error( + "OpenCL API failed. OpenCL API returns: " + codeToString(Error), Error); + } +} + +} // namespace enqueue_kernel_launch + +} // namespace detail +} // namespace sycl +} // namespace cl diff --git a/sycl/source/detail/error_handling/error_handling.hpp b/sycl/source/detail/error_handling/error_handling.hpp new file mode 100644 index 0000000000000..5bdf0cb90ffa6 --- /dev/null +++ b/sycl/source/detail/error_handling/error_handling.hpp @@ -0,0 +1,32 @@ +//===-------- error_handling.hpp - SYCL error handling ---------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +#include +#include + +namespace cl { +namespace sycl { +namespace detail { + +namespace enqueue_kernel_launch { +/// Analyzes error code and arguments of piEnqueueKernelLaunch to emit +/// user-friendly exception describing the problem. +/// +/// This function is expected to be called only for non-success error codes, +/// i.e. the first argument must not be equal to PI_SUCCESS. +/// +/// This function actually never returns and always throws an exception with +/// error description. +bool handleError(pi_result, pi_device, pi_kernel, const NDRDescT &); +} // namespace enqueue_kernel_launch + +} // namespace detail +} // namespace sycl +} // namespace cl diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 5caccb13bbd89..3ad9ee86cbc9c 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +#include + #include "CL/sycl/access/access.hpp" #include #include @@ -940,11 +942,19 @@ cl_int ExecCGCommand::enqueueImp() { ReverseRangeDimensionsForKernel(NDRDesc); - PI_CALL(piEnqueueKernelLaunch)( + pi_result Error = PI_CALL_NOCHECK(piEnqueueKernelLaunch)( MQueue->getHandleRef(), Kernel, NDRDesc.Dims, &NDRDesc.GlobalOffset[0], &NDRDesc.GlobalSize[0], HasLocalSize ? &NDRDesc.LocalSize[0] : nullptr, RawEvents.size(), RawEvents.empty() ? nullptr : &RawEvents[0], &Event); + if (PI_SUCCESS != Error) { + // If we have got non-success error code, let's analyze it to emit nice + // exception explaining what was wrong + pi_device Device = + detail::getSyclObjImpl(MQueue->get_device())->getHandleRef(); + return detail::enqueue_kernel_launch::handleError(Error, Device, Kernel, + NDRDesc); + } return PI_SUCCESS; } case CG::CGTYPE::COPY_USM: { diff --git a/sycl/test/basic_tests/parallel_for_range.cpp b/sycl/test/basic_tests/parallel_for_range.cpp index 5d93d00d08b7e..1172b8bc44fd1 100644 --- a/sycl/test/basic_tests/parallel_for_range.cpp +++ b/sycl/test/basic_tests/parallel_for_range.cpp @@ -1,93 +1,571 @@ // RUN: %clangxx -fsycl %s -o %t.out -// RUN: env SYCL_DEVICE_TYPE=HOST %t.out // RUN: %CPU_RUN_PLACEHOLDER %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // RUN: %ACC_RUN_PLACEHOLDER %t.out #include +#include + using namespace cl::sycl; +[[cl::reqd_work_group_size(4, 4, 4)]] void reqd_wg_size_helper() { + // do nothing +} + int main() { - auto asyncHandler = [](exception_list es) { - for (auto& e : es) { - std::rethrow_exception(e); + auto AsyncHandler = [](exception_list ES) { + for (auto &E : ES) { + std::rethrow_exception(E); } }; - // parallel_for, 100 global, 3 local -> fail. + queue Q(AsyncHandler); + device D(Q.get_device()); + + string_class DeviceVendorName = D.get_info(); + auto DeviceType = D.get_info(); + + // parallel_for, (16, 16, 16) global, (8, 8, 8) local, reqd_wg_size(4, 4, 4) + // -> fail try { - queue q(asyncHandler); - q.submit([&](handler &cgh) { - cgh.parallel_for(nd_range<1>(range<1>(100), range<1>(3)), - [=](nd_item<1> id) {}); + Q.submit([&](handler &CGH) { + CGH.parallel_for( + nd_range<3>(range<3>(16, 16, 16), range<3>(8, 8, 8)), + [=](nd_item<3>) { reqd_wg_size_helper(); }); }); - q.wait_and_throw(); - assert(false && "Should have thrown exception"); - } catch (nd_range_error e) { - // We expect an error to be thrown! + Q.wait_and_throw(); + std::cerr << "Test case ReqdWGSizeNegativeA failed: no exception has been " + "thrown\n"; + return 1; // We shouldn't be here, exception is expected + } catch (nd_range_error &E) { + if (string_class(E.what()).find( + "Specified local size doesn't match the required work-group size " + "specified in the program source") == string_class::npos) { + std::cerr + << "Test case ReqdWGSizeNegativeA failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } + } catch (runtime_error &E) { + std::cerr << "Test case ReqdWGSizeNegativeA failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } catch (...) { + std::cerr << "Test case ReqdWGSizeNegativeA failed: something unexpected " + "has been caught" + << std::endl; + return 1; } - // parallel_for, 100 global, 4 local -> pass. - try { - queue q(asyncHandler); - q.submit([&](handler &cgh) { - cgh.parallel_for(nd_range<1>(range<1>(100), range<1>(4)), - [=](nd_item<1> id) {}); - }); - q.wait_and_throw(); - } catch (nd_range_error e) { - assert(false && "Should not have thrown exception"); + string_class OCLVersionStr = D.get_info(); + assert(OCLVersionStr.size() >= 10 && + "Unexpected device version string"); // strlen("OpenCL X.Y") + const char *OCLVersion = &OCLVersionStr[7]; // strlen("OpenCL ") + if (OCLVersion[0] == '1' || (OCLVersion[0] == '2' && OCLVersion[2] == '0')) { + // parallel_for, (16, 16, 16) global, null local, reqd_wg_size(4, 4, 4) // + // -> fail + try { + Q.submit([&](handler &CGH) { + CGH.parallel_for( + range<3>(16, 16, 16), [=](item<3>) { reqd_wg_size_helper(); }); + }); + Q.wait_and_throw(); + std::cerr + << "Test case ReqdWGSizeNegativeB failed: no exception has been " + "thrown\n"; + return 1; // We shouldn't be here, exception is expected + } catch (nd_range_error &E) { + if (string_class(E.what()).find( + "OpenCL 1.x and 2.0 requires to pass local size argument even if " + "required work-group size was specified in the program source") == + string_class::npos) { + std::cerr + << "Test case ReqdWGSizeNegativeB failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } + } catch (runtime_error &E) { + std::cerr + << "Test case ReqdWGSizeNegativeB failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } catch (...) { + std::cerr << "Test case ReqdWGSizeNegativeB failed: something unexpected " + "has been caught" + << std::endl; + return 1; + } } - // parallel_for, (100, 33, 16) global, (2, 3, 4) local -> pass. + // Positive test-cases that should pass on any underlying OpenCL runtime + + // parallel_for, (8, 8, 8) global, (4, 4, 4) local, reqd_wg_size(4, 4, 4) -> + // pass try { - queue q(asyncHandler); - q.submit([&](handler &cgh) { - cgh.parallel_for(nd_range<3>(range<3>(100, 33, 16), - range<3>(2, 3, 4)), - [=](nd_item<3> id) {}); + Q.submit([&](handler &CGH) { + CGH.parallel_for( + nd_range<3>(range<3>(8, 8, 8), range<3>(4, 4, 4)), + [=](nd_item<3>) { reqd_wg_size_helper(); }); }); - q.wait_and_throw(); - } catch (nd_range_error e) { - assert(false && "Should not have thrown exception"); + Q.wait_and_throw(); + } catch (nd_range_error &E) { + std::cerr << "Test case ReqdWGSizePositiveA failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } catch (runtime_error &E) { + std::cerr << "Test case ReqdWGSizePositiveA failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } catch (...) { + std::cerr << "Test case ReqdWGSizePositiveA failed: something unexpected " + "has been caught" + << std::endl; + return 1; } - // parallel_for, (100, 33, 16) global, (2, 3, 5) local -> fail. - try { - queue q(asyncHandler); - q.submit([&](handler &cgh) { - cgh.parallel_for(nd_range<3>(range<3>(100, 33, 16), - range<3>(2, 3, 5)), - [=](nd_item<3> id) {}); - }); - q.wait_and_throw(); - assert(false && "Should have thrown exception"); - } catch (nd_range_error e) { + if (OCLVersion[0] == '1') { + // OpenCL 1.x + + // CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and + // number of workitems specified by global_work_size is not evenly + // divisible by size of work-group given by local_work_size + try { + // parallel_for, 100 global, 3 local -> fail + Q.submit([&](handler &CGH) { + CGH.parallel_for( + nd_range<1>(range<1>(100), range<1>(3)), [=](nd_item<1>) {}); + }); + Q.wait_and_throw(); + // FIXME: some Intel runtimes contain bug and don't return expected + // error code + if (info::device_type::accelerator != DeviceType || + DeviceVendorName.find("Intel") == string_class::npos) { + std::cerr + << "Test case OpenCL1XNegativeA failed: no exception has been " + "thrown\n"; + return 1; // We shouldn't be here, exception is expected + } + } catch (nd_range_error &E) { + if (string_class(E.what()).find("Non-uniform work-groups are not " + "supported by the target device") == + string_class::npos) { + std::cerr + << "Test case OpenCL1XNegativeA failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } + } catch (runtime_error &E) { + std::cerr << "Test case OpenCL1XNegativeA failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } catch (...) { + std::cerr << "Test case OpenCL1XNegativeA failed: something unexpected " + "has been caught" + << std::endl; + return 1; + } + + try { + // parallel_for, (100, 33, 16) global, (2, 3, 5) local -> fail + Q.submit([&](handler &CGH) { + CGH.parallel_for( + nd_range<3>(range<3>(100, 33, 16), range<3>(2, 3, 5)), + [=](nd_item<3>) {}); + }); + Q.wait_and_throw(); + // FIXME: some Intel runtimes contain bug and don't return expected + // error code + if (info::device_type::accelerator != DeviceType || + DeviceVendorName.find("Intel") == string_class::npos) { + std::cerr + << "Test case OpenCL1XNegativeB failed: no exception has been " + "thrown\n"; + return 1; // We shouldn't be here, exception is expected + } + } catch (nd_range_error &E) { + if (string_class(E.what()).find("Non-uniform work-groups are not " + "supported by the target device") == + string_class::npos) { + std::cerr + << "Test case OpenCL1XNegativeB failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } + } catch (runtime_error &E) { + std::cerr << "Test case OpenCL1XNegativeB failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } catch (...) { + std::cerr << "Test case OpenCL1XNegativeB failed: something unexpected " + "has been caught" + << std::endl; + return 1; + } + + // CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and the + // total number of work-items in the work-group computed as + // local_work_size[0] * ... * local_work_size[work_dim – 1] is greater + // than the value specified by CL_DEVICE_MAX_WORK_GROUP_SIZE in + // table 4.3 + size_t MaxDeviceWGSize = D.get_info(); + try { + Q.submit([&](handler &CGH) { + CGH.parallel_for( + nd_range<2>(range<2>(MaxDeviceWGSize, MaxDeviceWGSize), + range<2>(MaxDeviceWGSize, 2)), + [=](nd_item<2>) {}); + }); + Q.wait_and_throw(); + std::cerr << "Test case OpenCL1XNegativeC failed: no exception has been " + "thrown\n"; + return 1; // We shouldn't be here, exception is expected + } catch (nd_range_error &E) { + if (string_class(E.what()).find( + "Total number of work-items in a work-group cannot exceed " + "info::device::max_work_group_size which is equal to " + + std::to_string(MaxDeviceWGSize)) == string_class::npos) { + std::cerr + << "Test case OpenCL1XNegativeC failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } + } catch (runtime_error &E) { + std::cerr << "Test case OpenCL1XNegativeC failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } catch (...) { + std::cerr << "Test case OpenCL1XNegativeC failed: something unexpected " + "has been caught" + << std::endl; + return 1; + } + } else if (OCLVersion[0] == '2') { + // OpenCL 2.x + + // OpenCL 2.x: + // CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and the + // total number of work-items in the work-group computed as + // local_work_size[0] * ... * local_work_size[work_dim – 1] is greater + // than the value specified by CL_KERNEL_WORK_GROUP_SIZE in table 5.21. + { + program P(Q.get_context()); + P.build_with_kernel_type(); + + kernel K = P.get_kernel(); + size_t MaxKernelWGSize = + K.get_work_group_info( + Q.get_device()); + try { + Q.submit([&](handler &CGH) { + CGH.parallel_for( + K, + nd_range<2>(range<2>(MaxKernelWGSize, MaxKernelWGSize), + range<2>(MaxKernelWGSize, 2)), + [=](nd_item<2>) {}); + }); + Q.wait_and_throw(); + std::cerr + << "Test case OpenCL2XNegativeA failed: no exception has been " + "thrown\n"; + return 1; // We shouldn't be here, exception is expected + } catch (nd_range_error &E) { + if (string_class(E.what()).find( + "Total number of work-items in a work-group cannot exceed " + "info::kernel_work_group::work_group_size which is equal to " + + std::to_string(MaxKernelWGSize) + " for this kernel") == + string_class::npos) { + std::cerr + << "Test case OpenCL2XNegativeA failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } + } catch (runtime_error &E) { + std::cerr + << "Test case OpenCL2XNegativeA failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } catch (...) { + std::cerr << "Test case OpenCL2XNegativeA failed: something unexpected " + "has been caught" + << std::endl; + return 1; + } + } + + // By default, program is built in OpenCL 1.2 mode, so the following error + // is expected even for OpenCL 2.x: + // CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and + // number of workitems specified by global_work_size is not evenly + // divisible by size of work-group given by local_work_size + { + try { + // parallel_for, 100 global, 3 local -> fail + Q.submit([&](handler &CGH) { + CGH.parallel_for( + nd_range<1>(range<1>(100), range<1>(3)), [=](nd_item<1>) {}); + }); + Q.wait_and_throw(); + // FIXME: some Intel runtimes contain bug and don't return expected + // error code + if (info::device_type::cpu != DeviceType || + DeviceVendorName.find("Intel") == string_class::npos) { + std::cerr + << "Test case OpenCL2XNegativeB failed: no exception has been " + "thrown\n"; + return 1; // We shouldn't be here, exception is expected + } + } catch (nd_range_error &E) { + if (string_class(E.what()).find( + "Non-uniform work-groups are not allowed by default. " + "Underlying OpenCL 2.x implementation supports this feature " + "and to enable it, build device program with -cl-std=CL2.0") == + string_class::npos) { + std::cerr + << "Test case OpenCL2XNegativeB failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } + } catch (runtime_error &E) { + std::cerr + << "Test case OpenCL2XNegativeB failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } catch (...) { + std::cerr << "Test case OpenCL2XNegativeB failed: something unexpected " + "has been caught" + << std::endl; + return 1; + } + + try { + // parallel_for, (100, 33, 16) global, (2, 3, 5) local -> fail + Q.submit([&](handler &CGH) { + CGH.parallel_for( + nd_range<3>(range<3>(100, 33, 16), range<3>(2, 3, 5)), + [=](nd_item<3>) {}); + }); + Q.wait_and_throw(); + // FIXME: some Intel runtimes contain bug and don't return expected + // error code + if (info::device_type::cpu != DeviceType || + DeviceVendorName.find("Intel") == string_class::npos) { + std::cerr + << "Test case OpenCL2XNegativeC failed: no exception has been " + "thrown\n"; + return 1; // We shouldn't be here, exception is expected + } + } catch (nd_range_error &E) { + if (string_class(E.what()).find( + "Non-uniform work-groups are not allowed by default. " + "Underlying OpenCL 2.x implementation supports this feature " + "and to enable it, build device program with -cl-std=CL2.0") == + string_class::npos) { + std::cerr + << "Test case OpenCL2XNegativeC failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } + } catch (runtime_error &E) { + std::cerr + << "Test case OpenCL2XNegativeC failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } catch (...) { + std::cerr << "Test case OpenCL2XNegativeC failed: something unexpected " + "has been caught" + << std::endl; + return 1; + } + } + + // Enable non-uniform work-groups by passing -cl-std=CL2.0 + { + program P(Q.get_context()); + P.build_with_kernel_type("-cl-std=CL2.0"); + + kernel K = P.get_kernel(); + try { + // parallel_for, 100 global, 3 local -> pass + Q.submit([&](handler &CGH) { + CGH.parallel_for( + K, nd_range<1>(range<1>(100), range<1>(3)), [=](nd_item<1>) {}); + }); + Q.wait_and_throw(); + } catch (nd_range_error &E) { + std::cerr + << "Test case OpenCL2XPositiveA failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } catch (runtime_error &E) { + std::cerr + << "Test case OpenCL2XPositiveA failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } catch (...) { + std::cerr << "Test case OpenCL2XPositiveA failed: something unexpected " + "has been caught" + << std::endl; + return 1; + } + } + + { + program P(Q.get_context()); + P.build_with_kernel_type("-cl-std=CL2.0"); + + kernel K = P.get_kernel(); + try { + // parallel_for, (100, 33, 16) global, (2, 3, 5) local -> pass + Q.submit([&](handler &CGH) { + CGH.parallel_for( + K, nd_range<3>(range<3>(100, 33, 16), range<3>(2, 3, 5)), + [=](nd_item<3>) {}); + }); + Q.wait_and_throw(); + } catch (nd_range_error &E) { + std::cerr + << "Test case OpenCL2XPositiveB failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } catch (runtime_error &E) { + std::cerr + << "Test case OpenCL2XPositiveB failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } catch (...) { + std::cerr << "Test case OpenCL2XPositiveB failed: something unexpected " + "has been caught" + << std::endl; + return 1; + } + } + + // Enable 2.0 mode with non-uniform work-groups, but disable the latter by + // specifying -cl-uniform-work-group-size: + // CL_INVALID_WORK_GROUP_SIZE if the program was compiled with + // –cl-uniform-work-group-size and the number of work-items specified + // by global_work_size is not evenly divisible by size of work-group + // given by local_work_size + { + program P(Q.get_context()); + P.build_with_kernel_type( + "-cl-std=CL2.0 -cl-uniform-work-group-size"); + + kernel K = P.get_kernel(); + try { + // parallel_for, 100 global, 3 local -> fail + Q.submit([&](handler &CGH) { + CGH.parallel_for( + K, nd_range<1>(range<1>(100), range<1>(3)), [=](nd_item<1>) {}); + }); + Q.wait_and_throw(); + // FIXME: some Intel runtimes contain bug and don't return expected + // error code + if (info::device_type::cpu != DeviceType || + DeviceVendorName.find("Intel") == string_class::npos) { + std::cerr + << "Test case OpenCL2XNegativeD failed: no exception has been " + "thrown\n"; + return 1; // We shouldn't be here, exception is expected + } + } catch (nd_range_error &E) { + if (string_class(E.what()).find( + "Non-uniform work-groups are not allowed by default. " + "Underlying OpenCL 2.x implementation supports this feature, " + "but it is disabled by -cl-uniform-work-group-size build " + "flag") == string_class::npos) { + std::cerr + << "Test case OpenCL2XNegativeD failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } + } catch (runtime_error &E) { + std::cerr + << "Test case OpenCL2XNegativeD failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } catch (...) { + std::cerr << "Test case OpenCL2XNegativeD failed: something unexpected " + "has been caught" + << std::endl; + return 1; + } + } + + { + program P(Q.get_context()); + P.build_with_kernel_type( + "-cl-std=CL2.0 -cl-uniform-work-group-size"); + + kernel K = P.get_kernel(); + try { + // parallel_for, (100, 33, 16) global, (2, 3, 5) local -> fail + Q.submit([&](handler &CGH) { + CGH.parallel_for( + K, nd_range<3>(range<3>(100, 33, 16), range<3>(2, 3, 5)), + [=](nd_item<3>) {}); + }); + Q.wait_and_throw(); + // FIXME: some Intel runtimes contain bug and don't return expected + // error code + if (info::device_type::cpu != DeviceType || + DeviceVendorName.find("Intel") == string_class::npos) { + std::cerr + << "Test case OpenCL2XNegativeE failed: no exception has been " + "thrown\n"; + return 1; // We shouldn't be here, exception is expected + } + } catch (nd_range_error &E) { + if (string_class(E.what()).find( + "Non-uniform work-groups are not allowed by default. " + "Underlying OpenCL 2.x implementation supports this feature, " + "but it is disabled by -cl-uniform-work-group-size build " + "flag") == string_class::npos) { + std::cerr + << "Test case OpenCL2XNegativeE failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } + } catch (runtime_error &E) { + std::cerr + << "Test case OpenCL2XNegativeE failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } catch (...) { + std::cerr << "Test case OpenCL2XNegativeE failed: something unexpected " + "has been caught" + << std::endl; + return 1; + } + } } - // local size has a 0-based range -- no SIGFPEs, we hope. + // local size has a 0-based range -- no SIGFPEs, we hope try { - queue q(asyncHandler); - q.submit([&](handler &cgh) { - cgh.parallel_for(nd_range<2>(range<2>(5, 33), range<2>(1, 0)), - [=](nd_item<2> id) {}); + Q.submit([&](handler &CGH) { + CGH.parallel_for( + nd_range<2>(range<2>(5, 33), range<2>(1, 0)), [=](nd_item<2>) {}); }); - q.wait_and_throw(); - assert(false && "Should have thrown exception"); - } catch (runtime_error e) { + Q.wait_and_throw(); + std::cerr << "Test case NegativeA failed: no exception has been " + "thrown\n"; + return 1; // We shouldn't be here, exception is expected + } catch (runtime_error) { } - // parallel_for_work_group with 0-based local range. + // parallel_for_work_group with 0-based local range try { - queue q(asyncHandler); - q.submit([&](handler &cgh) { - cgh.parallel_for_work_group(range<2>(5, 33), range<2>(1, 0), - [=](group<2> g) {}); + Q.submit([&](handler &CGH) { + CGH.parallel_for_work_group( + range<2>(5, 33), range<2>(1, 0), [=](group<2>) {}); }); - q.wait_and_throw(); - assert(false && "Should have thrown exception"); - } catch (runtime_error e) { + Q.wait_and_throw(); + std::cerr << "Test case NegativeB failed: no exception has been " + "thrown\n"; + return 1; // We shouldn't be here, exception is expected + } catch (runtime_error) { } + return 0; } diff --git a/sycl/test/basic_tests/parallel_for_range_host.cpp b/sycl/test/basic_tests/parallel_for_range_host.cpp new file mode 100644 index 0000000000000..012e0479b4b71 --- /dev/null +++ b/sycl/test/basic_tests/parallel_for_range_host.cpp @@ -0,0 +1,98 @@ +// RUN: %clangxx -fsycl %s -o %t.out +// RUN: env SYCL_DEVICE_TYPE=HOST %t.out + +#include + +#include + +using namespace cl::sycl; + +int main() { + auto AsyncHandler = [](exception_list ES) { + for (auto& E : ES) { + std::rethrow_exception(E); + } + }; + + queue Q(AsyncHandler); + + // parallel_for, 100 global, 3 local -> fail. + try { + Q.submit([&](handler &CGH) { + CGH.parallel_for(nd_range<1>(range<1>(100), range<1>(3)), + [=](nd_item<1>) {}); + }); + Q.wait_and_throw(); + std::cerr << "Test case 'a' failed: no exception has been thrown" + << std::endl; + return 1; + } catch (nd_range_error) { + // We expect an error to be thrown! + } + + // parallel_for, 100 global, 4 local -> pass. + try { + Q.submit([&](handler &CGH) { + CGH.parallel_for(nd_range<1>(range<1>(100), range<1>(4)), + [=](nd_item<1>) {}); + }); + Q.wait_and_throw(); + } catch (nd_range_error) { + std::cerr << "Test case 'b' failed: exception has been thrown" << std::endl; + return 1; + } + + // parallel_for, (100, 33, 16) global, (2, 3, 4) local -> pass. + try { + Q.submit([&](handler &CGH) { + CGH.parallel_for(nd_range<3>(range<3>(100, 33, 16), + range<3>(2, 3, 4)), + [=](nd_item<3>) {}); + }); + Q.wait_and_throw(); + } catch (nd_range_error) { + std::cerr << "Test case 'c' failed: exception has been thrown" << std::endl; + return 1; + } + + // parallel_for, (100, 33, 16) global, (2, 3, 5) local -> fail. + try { + Q.submit([&](handler &CGH) { + CGH.parallel_for(nd_range<3>(range<3>(100, 33, 16), + range<3>(2, 3, 5)), + [=](nd_item<3>) {}); + }); + Q.wait_and_throw(); + std::cerr << "Test case 'd' failed: no exception has been thrown" + << std::endl; + return 1; + } catch (nd_range_error) { + } + + // local size has a 0-based range -- no SIGFPEs, we hope. + try { + Q.submit([&](handler &CGH) { + CGH.parallel_for(nd_range<2>(range<2>(5, 33), range<2>(1, 0)), + [=](nd_item<2>) {}); + }); + Q.wait_and_throw(); + std::cerr << "Test case 'e' failed: no exception has been thrown" + << std::endl; + return 1; + } catch (nd_range_error) { + } + + // parallel_for_work_group with 0-based local range. + try { + Q.submit([&](handler &CGH) { + CGH.parallel_for_work_group(range<2>(5, 33), range<2>(1, 0), + [=](group<2>) {}); + }); + Q.wait_and_throw(); + std::cerr << "Test case 'f' failed: no exception has been thrown" + << std::endl; + return 1; + } catch (nd_range_error) { + } + return 0; +}