From e39575ed47f4230f57c1c9f1c9a0e362c5a96d11 Mon Sep 17 00:00:00 2001 From: Denis Bakhvalov Date: Mon, 21 Sep 2020 10:40:16 -0700 Subject: [PATCH 1/9] [SYCL] - Align get_info() with the SYCL spec According to the SYCL spec, cl::sycl::info::device::version should be returned in a form: `.` This patch trims the string returned from the piDeviceGetInfo call. For example, for the string "OpenCL 2.1 (Build 0)", it will return "2.1". --- sycl/source/detail/device_info.hpp | 36 ++++++++++++++++++++++-- sycl/test/plugins/sycl-ls-gpu-opencl.cpp | 4 +-- 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/sycl/source/detail/device_info.hpp b/sycl/source/detail/device_info.hpp index 4d46720fc7ce5..4fc8be379bb0e 100644 --- a/sycl/source/detail/device_info.hpp +++ b/sycl/source/detail/device_info.hpp @@ -122,8 +122,9 @@ template struct get_device_info { } }; -// Specialization for string return type, variable return size -template struct get_device_info { +// Helper struct to allow using the specialization of get_device_info +// for string return type in other specializations. +template struct get_device_info_string { static string_class get(RT::PiDevice dev, const plugin &Plugin) { size_t resultSize; Plugin.call( @@ -140,6 +141,13 @@ template struct get_device_info { } }; +// Specialization for string return type, variable return size +template struct get_device_info { + static string_class get(RT::PiDevice dev, const plugin &Plugin) { + return get_device_info_string::get(dev, Plugin); + } +}; + // Specialization for parent device template struct get_device_info { static T get(RT::PiDevice dev, const plugin &Plugin); @@ -176,6 +184,30 @@ struct get_device_info, param> { } }; +// Specialization for OpenCL version, splits the string returned by OpenCL +template <> struct get_device_info { + static string_class get(RT::PiDevice dev, const plugin &Plugin) { + string_class result = + get_device_info_string::get(dev, Plugin); + + // Extract OpenCL version from the returned string. + // For example, for the string "OpenCL 2.1 (Build 0)" + // return '2.1'. + auto dotPos = result.find('.'); + if (dotPos == std::string::npos) + return result; + + auto leftPos = result.rfind(' ', dotPos); + if (leftPos == std::string::npos) + leftPos = 0; + else + leftPos++; + + auto rightPos = result.find(' ', dotPos); + return result.substr(leftPos, rightPos - leftPos); + } +}; + // Specialization for single_fp_config, no type support check required template <> struct get_device_info, diff --git a/sycl/test/plugins/sycl-ls-gpu-opencl.cpp b/sycl/test/plugins/sycl-ls-gpu-opencl.cpp index 9172093380f79..31158d14e5945 100755 --- a/sycl/test/plugins/sycl-ls-gpu-opencl.cpp +++ b/sycl/test/plugins/sycl-ls-gpu-opencl.cpp @@ -3,8 +3,8 @@ // RUN: env SYCL_BE=PI_OPENCL sycl-ls --verbose >%t.opencl.out // RUN: FileCheck %s --check-prefixes=CHECK-GPU-BUILTIN,CHECK-GPU-CUSTOM --input-file %t.opencl.out -// CHECK-GPU-BUILTIN: gpu_selector(){{.*}}GPU : OpenCL -// CHECK-GPU-CUSTOM: custom_selector(gpu){{.*}}GPU : OpenCL +// CHECK-GPU-BUILTIN: gpu_selector(){{.*}}GPU : 2.1 +// CHECK-GPU-CUSTOM: custom_selector(gpu){{.*}}GPU : 2.1 //==-- sycl-ls-gpu-opencl.cpp - SYCL test for discovered/selected devices -===// // From 8d4876a5dc5f414a1b7251fbb9f106737a3f669e Mon Sep 17 00:00:00 2001 From: Denis Bakhvalov Date: Tue, 22 Sep 2020 14:07:23 -0700 Subject: [PATCH 2/9] Fixed review comments --- sycl/test/basic_tests/parallel_for_range.cpp | 1196 +++++++++--------- sycl/test/plugins/sycl-ls-gpu-opencl.cpp | 4 +- 2 files changed, 607 insertions(+), 593 deletions(-) diff --git a/sycl/test/basic_tests/parallel_for_range.cpp b/sycl/test/basic_tests/parallel_for_range.cpp index cab6523a70d06..2b9d3dad73fb0 100644 --- a/sycl/test/basic_tests/parallel_for_range.cpp +++ b/sycl/test/basic_tests/parallel_for_range.cpp @@ -31,413 +31,153 @@ int main() { auto DeviceType = D.get_info(); string_class OCLVersionStr = D.get_info(); - const bool OCLBackend = (OCLVersionStr.find("OpenCL") != string_class::npos); - assert((!OCLBackend || (OCLVersionStr.size() >= 10)) && - "Unexpected device version string"); // strlen("OpenCL X.Y") - const char *OCLVersion = &OCLVersionStr[7]; // strlen("OpenCL ") + assert((OCLVersionStr.size() >= 3) && "Unexpected device version string"); + assert(OCLVersionStr.find(".") != string_class::npos && + "Unexpected device version string"); + const char OCLVersionMajor = OCLVersionStr[0]; + const char OCLVersionMinor = OCLVersionStr[2]; // reqd_work_group_size is OpenCL specific. - if (OCLBackend) { - if (OCLVersion[0] == '1' || - (OCLVersion[0] == '2' && OCLVersion[2] == '0')) { - // parallel_for, (16, 16, 16) global, (8, 8, 8) local, reqd_wg_size(4, 4, - // 4) - // -> fail - try { - Q.submit([&](handler &CGH) { - CGH.parallel_for( - nd_range<3>(range<3>(16, 16, 16), range<3>(8, 8, 8)), - [=](nd_item<3>) { reqd_wg_size_helper(); }); - }); - Q.wait_and_throw(); - std::cerr - << "Test case ReqdWGSizeNegativeA failed: no exception has been " - "thrown\n"; - return 1; // We shouldn't be here, exception is expected - } catch (nd_range_error &E) { - if (string_class(E.what()).find("Specified local size doesn't match " - "the required work-group size " - "specified in the program source") == - string_class::npos) { - std::cerr - << "Test case ReqdWGSizeNegativeA failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } - } catch (runtime_error &E) { - std::cerr - << "Test case ReqdWGSizeNegativeA failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } catch (...) { - std::cerr - << "Test case ReqdWGSizeNegativeA failed: something unexpected " - "has been caught" - << std::endl; - return 1; - } - - // parallel_for, (16, 16, 16) global, null local, reqd_wg_size(4, 4, 4) // - // -> fail - try { - Q.submit([&](handler &CGH) { - CGH.parallel_for( - range<3>(16, 16, 16), [=](item<3>) { reqd_wg_size_helper(); }); - }); - Q.wait_and_throw(); - std::cerr - << "Test case ReqdWGSizeNegativeB failed: no exception has been " - "thrown\n"; - return 1; // We shouldn't be here, exception is expected - } catch (nd_range_error &E) { - if (string_class(E.what()).find("OpenCL 1.x and 2.0 requires to pass " - "local size argument even if " - "required work-group size was " - "specified in the program source") == - string_class::npos) { - std::cerr - << "Test case ReqdWGSizeNegativeB failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } - } catch (runtime_error &E) { - std::cerr - << "Test case ReqdWGSizeNegativeB failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } catch (...) { - std::cerr - << "Test case ReqdWGSizeNegativeB failed: something unexpected " - "has been caught" - << std::endl; - return 1; - } - } - - // Positive test-cases that should pass on any underlying OpenCL runtime - - // parallel_for, (8, 8, 8) global, (4, 4, 4) local, reqd_wg_size(4, 4, 4) -> - // pass + if (OCLVersionMajor == '1' || + (OCLVersionMajor == '2' && OCLVersionMinor == '0')) { + // parallel_for, (16, 16, 16) global, (8, 8, 8) local, reqd_wg_size(4, 4, + // 4) + // -> fail try { Q.submit([&](handler &CGH) { - CGH.parallel_for( - nd_range<3>(range<3>(8, 8, 8), range<3>(4, 4, 4)), + CGH.parallel_for( + nd_range<3>(range<3>(16, 16, 16), range<3>(8, 8, 8)), [=](nd_item<3>) { reqd_wg_size_helper(); }); }); Q.wait_and_throw(); - } catch (nd_range_error &E) { std::cerr - << "Test case ReqdWGSizePositiveA failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } catch (runtime_error &E) { - std::cerr - << "Test case ReqdWGSizePositiveA failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } catch (...) { - std::cerr << "Test case ReqdWGSizePositiveA failed: something unexpected " - "has been caught" - << std::endl; - return 1; - } - } // if (OCLBackend) - - if (!OCLBackend || (OCLVersion[0] == '1')) { - // OpenCL 1.x or non-OpenCL backends which behave like OpenCl 1.2 in SYCL. - - // CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and - // number of workitems specified by global_work_size is not evenly - // divisible by size of work-group given by local_work_size - try { - // parallel_for, 100 global, 3 local -> fail - Q.submit([&](handler &CGH) { - CGH.parallel_for( - nd_range<1>(range<1>(100), range<1>(3)), [=](nd_item<1>) {}); - }); - Q.wait_and_throw(); - // FIXME: some Intel runtimes contain bug and don't return expected - // error code - if (info::device_type::accelerator != DeviceType || - DeviceVendorName.find("Intel") == string_class::npos) { - std::cerr - << "Test case OpenCL1XNegativeA failed: no exception has been " - "thrown\n"; - return 1; // We shouldn't be here, exception is expected - } + << "Test case ReqdWGSizeNegativeA failed: no exception has been " + "thrown\n"; + return 1; // We shouldn't be here, exception is expected } catch (nd_range_error &E) { - if (string_class(E.what()).find("Non-uniform work-groups are not " - "supported by the target device") == + if (string_class(E.what()).find("Specified local size doesn't match " + "the required work-group size " + "specified in the program source") == string_class::npos) { std::cerr - << "Test case OpenCL1XNegativeA failed: unexpected exception: " + << "Test case ReqdWGSizeNegativeA failed: unexpected exception: " << E.what() << std::endl; return 1; } } catch (runtime_error &E) { - std::cerr << "Test case OpenCL1XNegativeA failed: unexpected exception: " - << E.what() << std::endl; + std::cerr + << "Test case ReqdWGSizeNegativeA failed: unexpected exception: " + << E.what() << std::endl; return 1; } catch (...) { - std::cerr << "Test case OpenCL1XNegativeA failed: something unexpected " + std::cerr << "Test case ReqdWGSizeNegativeA failed: something unexpected " "has been caught" << std::endl; return 1; } + // parallel_for, (16, 16, 16) global, null local, reqd_wg_size(4, 4, 4) // + // -> fail try { - // parallel_for, (100, 33, 16) global, (2, 3, 5) local -> fail Q.submit([&](handler &CGH) { - CGH.parallel_for( - nd_range<3>(range<3>(100, 33, 16), range<3>(2, 3, 5)), - [=](nd_item<3>) {}); + CGH.parallel_for( + range<3>(16, 16, 16), [=](item<3>) { reqd_wg_size_helper(); }); }); Q.wait_and_throw(); - // FIXME: some Intel runtimes contain bug and don't return expected - // error code - if (info::device_type::accelerator != DeviceType || - DeviceVendorName.find("Intel") == string_class::npos) { - std::cerr - << "Test case OpenCL1XNegativeB failed: no exception has been " - "thrown\n"; - return 1; // We shouldn't be here, exception is expected - } + std::cerr + << "Test case ReqdWGSizeNegativeB failed: no exception has been " + "thrown\n"; + return 1; // We shouldn't be here, exception is expected } catch (nd_range_error &E) { - if (string_class(E.what()).find("Non-uniform work-groups are not " - "supported by the target device") == + if (string_class(E.what()).find("OpenCL 1.x and 2.0 requires to pass " + "local size argument even if " + "required work-group size was " + "specified in the program source") == string_class::npos) { std::cerr - << "Test case OpenCL1XNegativeB failed: unexpected exception: " + << "Test case ReqdWGSizeNegativeB failed: unexpected exception: " << E.what() << std::endl; return 1; } } catch (runtime_error &E) { - std::cerr << "Test case OpenCL1XNegativeB failed: unexpected exception: " - << E.what() << std::endl; + std::cerr + << "Test case ReqdWGSizeNegativeB failed: unexpected exception: " + << E.what() << std::endl; return 1; } catch (...) { - std::cerr << "Test case OpenCL1XNegativeB failed: something unexpected " + std::cerr << "Test case ReqdWGSizeNegativeB failed: something unexpected " "has been caught" << std::endl; return 1; } + } - // Local Size larger than Global. - // CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified as larger - // than the global size, then a different error string is used. - // This is a sub-case of the more general 'non-uniform work group' - try { - // parallel_for, 16 global, 17 local -> fail - Q.submit([&](handler &CGH) { - CGH.parallel_for( - nd_range<1>(range<1>(16), range<1>(17)), [=](nd_item<1>) {}); - }); - Q.wait_and_throw(); - // FIXME: some Intel runtimes contain bug and don't return expected - // error code - if (info::device_type::accelerator != DeviceType || - DeviceVendorName.find("Intel") == string_class::npos) { - std::cerr - << "Test case OpenCL1XNegativeA2 failed: no exception has been " - "thrown\n"; - return 1; // We shouldn't be here, exception is expected - } - } catch (nd_range_error &E) { - if ((string_class(E.what()).find("Local workgroup size cannot be greater " - "than global range in any dimension") == - string_class::npos) && - (string_class(E.what()).find("Non-uniform work-groups are not " - "supported by the target device") == - string_class::npos)) { - std::cerr - << "Test case OpenCL1XNegativeA2 failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } - } catch (runtime_error &E) { - std::cerr << "Test case OpenCL1XNegativeA2 failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } catch (...) { - std::cerr << "Test case OpenCL1XNegativeA2 failed: something unexpected " - "has been caught" - << std::endl; - return 1; - } + // Positive test-cases that should pass on any underlying OpenCL runtime - // Local Size larger than Global, multi-dimensional - // This is a sub-case of the more general 'non-uniform work group' + // parallel_for, (8, 8, 8) global, (4, 4, 4) local, reqd_wg_size(4, 4, 4) -> + // pass try { - // parallel_for, 6, 6, 6 global, 2, 2, 7 local -> fail Q.submit([&](handler &CGH) { - CGH.parallel_for( - nd_range<3>(range<3>(6, 6, 6), range<3>(2, 2, 7)), - [=](nd_item<3>) {}); + CGH.parallel_for( + nd_range<3>(range<3>(8, 8, 8), range<3>(4, 4, 4)), + [=](nd_item<3>) { reqd_wg_size_helper(); }); }); Q.wait_and_throw(); - // FIXME: some Intel runtimes contain bug and don't return expected - // error code - if (info::device_type::accelerator != DeviceType || - DeviceVendorName.find("Intel") == string_class::npos) { - std::cerr - << "Test case OpenCL1XNegativeB2 failed: no exception has been " - "thrown\n"; - return 1; // We shouldn't be here, exception is expected - } } catch (nd_range_error &E) { - if ((string_class(E.what()).find("Local workgroup size cannot be greater " - "than global range in any dimension") == - string_class::npos) && - (string_class(E.what()).find("Non-uniform work-groups are not " - "supported by the target device") == - string_class::npos)) { - std::cerr - << "Test case OpenCL1XNegativeB2 failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } - } catch (runtime_error &E) { - std::cerr << "Test case OpenCL1XNegativeB2 failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } catch (...) { - std::cerr << "Test case OpenCL1XNegativeB2 failed: something unexpected " - "has been caught" - << std::endl; + std::cerr + << "Test case ReqdWGSizePositiveA failed: unexpected exception: " + << E.what() << std::endl; return 1; - } - - // CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and the - // total number of work-items in the work-group computed as - // local_work_size[0] * ... * local_work_size[work_dim – 1] is greater - // than the value specified by CL_DEVICE_MAX_WORK_GROUP_SIZE in - // table 4.3 - size_t MaxDeviceWGSize = D.get_info(); - try { - Q.submit([&](handler &CGH) { - CGH.parallel_for( - nd_range<2>(range<2>(MaxDeviceWGSize, MaxDeviceWGSize), - range<2>(MaxDeviceWGSize, 2)), - [=](nd_item<2>) {}); - }); - Q.wait_and_throw(); - std::cerr << "Test case OpenCL1XNegativeC failed: no exception has been " - "thrown\n"; - return 1; // We shouldn't be here, exception is expected - } catch (nd_range_error &E) { - if ((string_class(E.what()).find( - "Total number of work-items in a work-group cannot exceed " + - std::to_string(MaxDeviceWGSize)) == string_class::npos) && - (string_class(E.what()).find("Non-uniform work-groups are not " - "supported by the target device") == - string_class::npos)) { - std::cerr - << "Test case OpenCL1XNegativeC failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } } catch (runtime_error &E) { - std::cerr << "Test case OpenCL1XNegativeC failed: unexpected exception: " - << E.what() << std::endl; + std::cerr + << "Test case ReqdWGSizePositiveA failed: unexpected exception: " + << E.what() << std::endl; return 1; } catch (...) { - std::cerr << "Test case OpenCL1XNegativeC failed: something unexpected " + std::cerr << "Test case ReqdWGSizePositiveA failed: something unexpected " "has been caught" << std::endl; return 1; } - } else if (OCLBackend && (OCLVersion[0] == '2')) { - // OpenCL 2.x - - // OpenCL 2.x: - // CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and the - // total number of work-items in the work-group computed as - // local_work_size[0] * ... * local_work_size[work_dim – 1] is greater - // than the value specified by CL_KERNEL_WORK_GROUP_SIZE in table 5.21. - { - program P(Q.get_context()); - P.build_with_kernel_type(); - - kernel K = P.get_kernel(); - size_t MaxKernelWGSize = - K.get_work_group_info( - Q.get_device()); - try { - Q.submit([&](handler &CGH) { - CGH.parallel_for( - K, - nd_range<2>(range<2>(MaxKernelWGSize, MaxKernelWGSize), - range<2>(MaxKernelWGSize, 2)), - [=](nd_item<2>) {}); - }); - Q.wait_and_throw(); - std::cerr - << "Test case OpenCL2XNegativeA failed: no exception has been " - "thrown\n"; - return 1; // We shouldn't be here, exception is expected - } catch (nd_range_error &E) { - if (string_class(E.what()).find( - "Total number of work-items in a work-group cannot exceed " + - std::to_string(MaxKernelWGSize) + " for this kernel") == - string_class::npos) { - std::cerr - << "Test case OpenCL2XNegativeA failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } - } catch (runtime_error &E) { - std::cerr - << "Test case OpenCL2XNegativeA failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } catch (...) { - std::cerr << "Test case OpenCL2XNegativeA failed: something unexpected " - "has been caught" - << std::endl; - return 1; - } - } - // By default, program is built in OpenCL 1.2 mode, so the following error - // is expected even for OpenCL 2.x: - // CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and - // number of workitems specified by global_work_size is not evenly - // divisible by size of work-group given by local_work_size - { + if (OCLVersionMajor == '1') { + // OpenCL 1.x or non-OpenCL backends which behave like OpenCl 1.2 in SYCL. + + // CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and + // number of workitems specified by global_work_size is not evenly + // divisible by size of work-group given by local_work_size try { // parallel_for, 100 global, 3 local -> fail Q.submit([&](handler &CGH) { - CGH.parallel_for( + CGH.parallel_for( nd_range<1>(range<1>(100), range<1>(3)), [=](nd_item<1>) {}); }); Q.wait_and_throw(); // FIXME: some Intel runtimes contain bug and don't return expected // error code - if (info::device_type::cpu != DeviceType || + if (info::device_type::accelerator != DeviceType || DeviceVendorName.find("Intel") == string_class::npos) { std::cerr - << "Test case OpenCL2XNegativeB failed: no exception has been " + << "Test case OpenCL1XNegativeA failed: no exception has been " "thrown\n"; return 1; // We shouldn't be here, exception is expected } } catch (nd_range_error &E) { - if (string_class(E.what()).find( - "Non-uniform work-groups are not allowed by default. " - "Underlying OpenCL 2.x implementation supports this feature " - "and to enable it, build device program with -cl-std=CL2.0") == + if (string_class(E.what()).find("Non-uniform work-groups are not " + "supported by the target device") == string_class::npos) { std::cerr - << "Test case OpenCL2XNegativeB failed: unexpected exception: " + << "Test case OpenCL1XNegativeA failed: unexpected exception: " << E.what() << std::endl; return 1; } } catch (runtime_error &E) { std::cerr - << "Test case OpenCL2XNegativeB failed: unexpected exception: " + << "Test case OpenCL1XNegativeA failed: unexpected exception: " << E.what() << std::endl; return 1; } catch (...) { - std::cerr << "Test case OpenCL2XNegativeB failed: something unexpected " + std::cerr << "Test case OpenCL1XNegativeA failed: something unexpected " "has been caught" << std::endl; return 1; @@ -446,86 +186,81 @@ int main() { try { // parallel_for, (100, 33, 16) global, (2, 3, 5) local -> fail Q.submit([&](handler &CGH) { - CGH.parallel_for( + CGH.parallel_for( nd_range<3>(range<3>(100, 33, 16), range<3>(2, 3, 5)), [=](nd_item<3>) {}); }); Q.wait_and_throw(); // FIXME: some Intel runtimes contain bug and don't return expected // error code - if (info::device_type::cpu != DeviceType || + if (info::device_type::accelerator != DeviceType || DeviceVendorName.find("Intel") == string_class::npos) { std::cerr - << "Test case OpenCL2XNegativeC failed: no exception has been " + << "Test case OpenCL1XNegativeB failed: no exception has been " "thrown\n"; return 1; // We shouldn't be here, exception is expected } } catch (nd_range_error &E) { - if (string_class(E.what()).find( - "Non-uniform work-groups are not allowed by default. " - "Underlying OpenCL 2.x implementation supports this feature " - "and to enable it, build device program with -cl-std=CL2.0") == + if (string_class(E.what()).find("Non-uniform work-groups are not " + "supported by the target device") == string_class::npos) { std::cerr - << "Test case OpenCL2XNegativeC failed: unexpected exception: " + << "Test case OpenCL1XNegativeB failed: unexpected exception: " << E.what() << std::endl; return 1; } } catch (runtime_error &E) { std::cerr - << "Test case OpenCL2XNegativeC failed: unexpected exception: " + << "Test case OpenCL1XNegativeB failed: unexpected exception: " << E.what() << std::endl; return 1; } catch (...) { - std::cerr << "Test case OpenCL2XNegativeC failed: something unexpected " + std::cerr << "Test case OpenCL1XNegativeB failed: something unexpected " "has been caught" << std::endl; return 1; } - } - // Local Size larger than Global. - // CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified as larger - // than the global size, then a different error string is used. - // This is a sub-case of the more general 'non-uniform work group' - { + // Local Size larger than Global. + // CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified as larger + // than the global size, then a different error string is used. + // This is a sub-case of the more general 'non-uniform work group' try { // parallel_for, 16 global, 17 local -> fail Q.submit([&](handler &CGH) { - CGH.parallel_for( + CGH.parallel_for( nd_range<1>(range<1>(16), range<1>(17)), [=](nd_item<1>) {}); }); Q.wait_and_throw(); // FIXME: some Intel runtimes contain bug and don't return expected // error code - if (info::device_type::cpu != DeviceType || + if (info::device_type::accelerator != DeviceType || DeviceVendorName.find("Intel") == string_class::npos) { std::cerr - << "Test case OpenCL2XNegativeB2 failed: no exception has been " + << "Test case OpenCL1XNegativeA2 failed: no exception has been " "thrown\n"; return 1; // We shouldn't be here, exception is expected } } catch (nd_range_error &E) { - if (string_class(E.what()).find( - "Local workgroup size greater than global range size. " - "Non-uniform work-groups are not allowed by default. " - "Underlying " - "OpenCL 2.x implementation supports this feature and to enable " - "it, build device program with -cl-std=CL2.0") == - string_class::npos) { + if ((string_class(E.what()).find( + "Local workgroup size cannot be greater " + "than global range in any dimension") == string_class::npos) && + (string_class(E.what()).find("Non-uniform work-groups are not " + "supported by the target device") == + string_class::npos)) { std::cerr - << "Test case OpenCL2XNegativeB2 failed: unexpected exception: " + << "Test case OpenCL1XNegativeA2 failed: unexpected exception: " << E.what() << std::endl; return 1; } } catch (runtime_error &E) { std::cerr - << "Test case OpenCL2XNegativeB2 failed: unexpected exception: " + << "Test case OpenCL1XNegativeA2 failed: unexpected exception: " << E.what() << std::endl; return 1; } catch (...) { std::cerr - << "Test case OpenCL2XNegativeB2 failed: something unexpected " + << "Test case OpenCL1XNegativeA2 failed: something unexpected " "has been caught" << std::endl; return 1; @@ -536,318 +271,597 @@ int main() { try { // parallel_for, 6, 6, 6 global, 2, 2, 7 local -> fail Q.submit([&](handler &CGH) { - CGH.parallel_for( + CGH.parallel_for( nd_range<3>(range<3>(6, 6, 6), range<3>(2, 2, 7)), [=](nd_item<3>) {}); }); Q.wait_and_throw(); // FIXME: some Intel runtimes contain bug and don't return expected // error code - if (info::device_type::cpu != DeviceType || + if (info::device_type::accelerator != DeviceType || DeviceVendorName.find("Intel") == string_class::npos) { std::cerr - << "Test case OpenCL2XNegativeC2 failed: no exception has been " + << "Test case OpenCL1XNegativeB2 failed: no exception has been " "thrown\n"; return 1; // We shouldn't be here, exception is expected } } catch (nd_range_error &E) { - if (string_class(E.what()).find( - "Local workgroup size greater than global range size. " - "Non-uniform work-groups are not allowed by default. " - "Underlying OpenCL 2.x implementation supports this feature " - "and to enable it, build device program with -cl-std=CL2.0") == - string_class::npos) { + if ((string_class(E.what()).find( + "Local workgroup size cannot be greater " + "than global range in any dimension") == string_class::npos) && + (string_class(E.what()).find("Non-uniform work-groups are not " + "supported by the target device") == + string_class::npos)) { std::cerr - << "Test case OpenCL2XNegativeC2 failed: unexpected exception: " + << "Test case OpenCL1XNegativeB2 failed: unexpected exception: " << E.what() << std::endl; return 1; } } catch (runtime_error &E) { std::cerr - << "Test case OpenCL2XNegativeC2 failed: unexpected exception: " + << "Test case OpenCL1XNegativeB2 failed: unexpected exception: " << E.what() << std::endl; return 1; } catch (...) { std::cerr - << "Test case OpenCL2XNegativeC2 failed: something unexpected " + << "Test case OpenCL1XNegativeB2 failed: something unexpected " "has been caught" << std::endl; return 1; } - } - - // Enable non-uniform work-groups by passing -cl-std=CL2.0 - { - program P(Q.get_context()); - P.build_with_kernel_type("-cl-std=CL2.0"); - kernel K = P.get_kernel(); + // CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and the + // total number of work-items in the work-group computed as + // local_work_size[0] * ... * local_work_size[work_dim – 1] is greater + // than the value specified by CL_DEVICE_MAX_WORK_GROUP_SIZE in + // table 4.3 + size_t MaxDeviceWGSize = D.get_info(); try { - // parallel_for, 100 global, 3 local -> pass Q.submit([&](handler &CGH) { - CGH.parallel_for( - K, nd_range<1>(range<1>(100), range<1>(3)), [=](nd_item<1>) {}); + CGH.parallel_for( + nd_range<2>(range<2>(MaxDeviceWGSize, MaxDeviceWGSize), + range<2>(MaxDeviceWGSize, 2)), + [=](nd_item<2>) {}); }); Q.wait_and_throw(); - } catch (nd_range_error &E) { std::cerr - << "Test case OpenCL2XPositiveA failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } catch (runtime_error &E) { - std::cerr - << "Test case OpenCL2XPositiveA failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } catch (...) { - std::cerr << "Test case OpenCL2XPositiveA failed: something unexpected " - "has been caught" - << std::endl; - return 1; - } - } - // Multi-dimensional nd_range. - { - program P(Q.get_context()); - P.build_with_kernel_type("-cl-std=CL2.0"); - - kernel K = P.get_kernel(); - try { - // parallel_for, (100, 33, 16) global, (2, 3, 5) local -> pass - Q.submit([&](handler &CGH) { - CGH.parallel_for( - K, nd_range<3>(range<3>(100, 33, 16), range<3>(2, 3, 5)), - [=](nd_item<3>) {}); - }); - Q.wait_and_throw(); + << "Test case OpenCL1XNegativeC failed: no exception has been " + "thrown\n"; + return 1; // We shouldn't be here, exception is expected } catch (nd_range_error &E) { - std::cerr - << "Test case OpenCL2XPositiveB failed: unexpected exception: " - << E.what() << std::endl; - return 1; + if ((string_class(E.what()).find( + "Total number of work-items in a work-group cannot exceed " + + std::to_string(MaxDeviceWGSize)) == string_class::npos) && + (string_class(E.what()).find("Non-uniform work-groups are not " + "supported by the target device") == + string_class::npos)) { + std::cerr + << "Test case OpenCL1XNegativeC failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } } catch (runtime_error &E) { std::cerr - << "Test case OpenCL2XPositiveB failed: unexpected exception: " + << "Test case OpenCL1XNegativeC failed: unexpected exception: " << E.what() << std::endl; return 1; } catch (...) { - std::cerr << "Test case OpenCL2XPositiveB failed: something unexpected " + std::cerr << "Test case OpenCL1XNegativeC failed: something unexpected " "has been caught" << std::endl; return 1; } - } - - // Enable 2.0 mode with non-uniform work-groups, but disable the latter by - // specifying -cl-uniform-work-group-size: - // CL_INVALID_WORK_GROUP_SIZE if the program was compiled with - // –cl-uniform-work-group-size and the number of work-items specified - // by global_work_size is not evenly divisible by size of work-group - // given by local_work_size - { - program P(Q.get_context()); - P.build_with_kernel_type( - "-cl-std=CL2.0 -cl-uniform-work-group-size"); - - kernel K = P.get_kernel(); - try { - // parallel_for, 100 global, 3 local -> fail - Q.submit([&](handler &CGH) { - CGH.parallel_for( - K, nd_range<1>(range<1>(100), range<1>(3)), [=](nd_item<1>) {}); - }); - Q.wait_and_throw(); - // FIXME: some Intel runtimes contain bug and don't return expected - // error code - if (info::device_type::cpu != DeviceType || - DeviceVendorName.find("Intel") == string_class::npos) { + } else if (OCLVersionMajor == '2') { + // OpenCL 2.x + + // OpenCL 2.x: + // CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and the + // total number of work-items in the work-group computed as + // local_work_size[0] * ... * local_work_size[work_dim – 1] is greater + // than the value specified by CL_KERNEL_WORK_GROUP_SIZE in table 5.21. + { + program P(Q.get_context()); + P.build_with_kernel_type(); + + kernel K = P.get_kernel(); + size_t MaxKernelWGSize = + K.get_work_group_info( + Q.get_device()); + try { + Q.submit([&](handler &CGH) { + CGH.parallel_for( + K, + nd_range<2>(range<2>(MaxKernelWGSize, MaxKernelWGSize), + range<2>(MaxKernelWGSize, 2)), + [=](nd_item<2>) {}); + }); + Q.wait_and_throw(); std::cerr - << "Test case OpenCL2XNegativeD failed: no exception has been " + << "Test case OpenCL2XNegativeA failed: no exception has been " "thrown\n"; return 1; // We shouldn't be here, exception is expected + } catch (nd_range_error &E) { + if (string_class(E.what()).find( + "Total number of work-items in a work-group cannot exceed " + + std::to_string(MaxKernelWGSize) + " for this kernel") == + string_class::npos) { + std::cerr + << "Test case OpenCL2XNegativeA failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } + } catch (runtime_error &E) { + std::cerr + << "Test case OpenCL2XNegativeA failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } catch (...) { + std::cerr + << "Test case OpenCL2XNegativeA failed: something unexpected " + "has been caught" + << std::endl; + return 1; } - } catch (nd_range_error &E) { - if (string_class(E.what()).find( - "Global_work_size not evenly divisible by local_work_size. " - "Non-uniform work-groups are not allowed by when " - "-cl-uniform-work-group-size flag is used. Underlying " - "OpenCL 2.x implementation supports this feature, but it is " - "being " - "disabled by -cl-uniform-work-group-size build flag") == - string_class::npos) { + } + + // By default, program is built in OpenCL 1.2 mode, so the following error + // is expected even for OpenCL 2.x: + // CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and + // number of workitems specified by global_work_size is not evenly + // divisible by size of work-group given by local_work_size + { + try { + // parallel_for, 100 global, 3 local -> fail + Q.submit([&](handler &CGH) { + CGH.parallel_for( + nd_range<1>(range<1>(100), range<1>(3)), [=](nd_item<1>) {}); + }); + Q.wait_and_throw(); + // FIXME: some Intel runtimes contain bug and don't return expected + // error code + if (info::device_type::cpu != DeviceType || + DeviceVendorName.find("Intel") == string_class::npos) { + std::cerr + << "Test case OpenCL2XNegativeB failed: no exception has been " + "thrown\n"; + return 1; // We shouldn't be here, exception is expected + } + } catch (nd_range_error &E) { + if (string_class(E.what()).find( + "Non-uniform work-groups are not allowed by default. " + "Underlying OpenCL 2.x implementation supports this feature " + "and to enable it, build device program with " + "-cl-std=CL2.0") == string_class::npos) { + std::cerr + << "Test case OpenCL2XNegativeB failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } + } catch (runtime_error &E) { std::cerr - << "Test case OpenCL2XNegativeD failed: unexpected exception: " + << "Test case OpenCL2XNegativeB failed: unexpected exception: " << E.what() << std::endl; return 1; + } catch (...) { + std::cerr + << "Test case OpenCL2XNegativeB failed: something unexpected " + "has been caught" + << std::endl; + return 1; + } + + try { + // parallel_for, (100, 33, 16) global, (2, 3, 5) local -> fail + Q.submit([&](handler &CGH) { + CGH.parallel_for( + nd_range<3>(range<3>(100, 33, 16), range<3>(2, 3, 5)), + [=](nd_item<3>) {}); + }); + Q.wait_and_throw(); + // FIXME: some Intel runtimes contain bug and don't return expected + // error code + if (info::device_type::cpu != DeviceType || + DeviceVendorName.find("Intel") == string_class::npos) { + std::cerr + << "Test case OpenCL2XNegativeC failed: no exception has been " + "thrown\n"; + return 1; // We shouldn't be here, exception is expected + } + } catch (nd_range_error &E) { + if (string_class(E.what()).find( + "Non-uniform work-groups are not allowed by default. " + "Underlying OpenCL 2.x implementation supports this feature " + "and to enable it, build device program with " + "-cl-std=CL2.0") == string_class::npos) { + std::cerr + << "Test case OpenCL2XNegativeC failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } + } catch (runtime_error &E) { + std::cerr + << "Test case OpenCL2XNegativeC failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } catch (...) { + std::cerr + << "Test case OpenCL2XNegativeC failed: something unexpected " + "has been caught" + << std::endl; + return 1; } - } catch (runtime_error &E) { - std::cerr - << "Test case OpenCL2XNegativeD failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } catch (...) { - std::cerr << "Test case OpenCL2XNegativeD failed: something unexpected " - "has been caught" - << std::endl; - return 1; } - } - // Multi-dimensional nd_range. - { - program P(Q.get_context()); - P.build_with_kernel_type( - "-cl-std=CL2.0 -cl-uniform-work-group-size"); - kernel K = P.get_kernel(); - try { - // parallel_for, (100, 33, 16) global, (2, 3, 5) local -> fail - Q.submit([&](handler &CGH) { - CGH.parallel_for( - K, nd_range<3>(range<3>(100, 33, 16), range<3>(2, 3, 5)), - [=](nd_item<3>) {}); - }); - Q.wait_and_throw(); - // FIXME: some Intel runtimes contain bug and don't return expected - // error code - if (info::device_type::cpu != DeviceType || - DeviceVendorName.find("Intel") == string_class::npos) { + // Local Size larger than Global. + // CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified as larger + // than the global size, then a different error string is used. + // This is a sub-case of the more general 'non-uniform work group' + { + try { + // parallel_for, 16 global, 17 local -> fail + Q.submit([&](handler &CGH) { + CGH.parallel_for( + nd_range<1>(range<1>(16), range<1>(17)), [=](nd_item<1>) {}); + }); + Q.wait_and_throw(); + // FIXME: some Intel runtimes contain bug and don't return expected + // error code + if (info::device_type::cpu != DeviceType || + DeviceVendorName.find("Intel") == string_class::npos) { + std::cerr + << "Test case OpenCL2XNegativeB2 failed: no exception has been " + "thrown\n"; + return 1; // We shouldn't be here, exception is expected + } + } catch (nd_range_error &E) { + if (string_class(E.what()).find( + "Local workgroup size greater than global range size. " + "Non-uniform work-groups are not allowed by default. " + "Underlying " + "OpenCL 2.x implementation supports this feature and to " + "enable " + "it, build device program with -cl-std=CL2.0") == + string_class::npos) { + std::cerr + << "Test case OpenCL2XNegativeB2 failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } + } catch (runtime_error &E) { std::cerr - << "Test case OpenCL2XNegativeE failed: no exception has been " - "thrown\n"; - return 1; // We shouldn't be here, exception is expected + << "Test case OpenCL2XNegativeB2 failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } catch (...) { + std::cerr + << "Test case OpenCL2XNegativeB2 failed: something unexpected " + "has been caught" + << std::endl; + return 1; } - } catch (nd_range_error &E) { - if (string_class(E.what()).find( - "Global_work_size not evenly divisible by local_work_size. " - "Non-uniform work-groups are not allowed by when " - "-cl-uniform-work-group-size flag is used. Underlying " - "OpenCL 2.x implementation supports this feature, but it is " - "being " - "disabled by -cl-uniform-work-group-size build flag") == - string_class::npos) { + + // Local Size larger than Global, multi-dimensional + // This is a sub-case of the more general 'non-uniform work group' + try { + // parallel_for, 6, 6, 6 global, 2, 2, 7 local -> fail + Q.submit([&](handler &CGH) { + CGH.parallel_for( + nd_range<3>(range<3>(6, 6, 6), range<3>(2, 2, 7)), + [=](nd_item<3>) {}); + }); + Q.wait_and_throw(); + // FIXME: some Intel runtimes contain bug and don't return expected + // error code + if (info::device_type::cpu != DeviceType || + DeviceVendorName.find("Intel") == string_class::npos) { + std::cerr + << "Test case OpenCL2XNegativeC2 failed: no exception has been " + "thrown\n"; + return 1; // We shouldn't be here, exception is expected + } + } catch (nd_range_error &E) { + if (string_class(E.what()).find( + "Local workgroup size greater than global range size. " + "Non-uniform work-groups are not allowed by default. " + "Underlying OpenCL 2.x implementation supports this feature " + "and to enable it, build device program with " + "-cl-std=CL2.0") == string_class::npos) { + std::cerr + << "Test case OpenCL2XNegativeC2 failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } + } catch (runtime_error &E) { std::cerr - << "Test case OpenCL2XNegativeE failed: unexpected exception: " + << "Test case OpenCL2XNegativeC2 failed: unexpected exception: " << E.what() << std::endl; return 1; + } catch (...) { + std::cerr + << "Test case OpenCL2XNegativeC2 failed: something unexpected " + "has been caught" + << std::endl; + return 1; } - } catch (runtime_error &E) { - std::cerr - << "Test case OpenCL2XNegativeE failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } catch (...) { - std::cerr << "Test case OpenCL2XNegativeE failed: something unexpected " - "has been caught" - << std::endl; - return 1; } - } - // Local Size larger than Global, -cl-std=CL2.0 -cl-uniform-work-group-size - // flag used CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified as - // larger than the global size, then a different error string is used. This - // is a sub-case of the more general 'non-uniform work group' - { - program P(Q.get_context()); - P.build_with_kernel_type( - "-cl-std=CL2.0 -cl-uniform-work-group-size"); + // Enable non-uniform work-groups by passing -cl-std=CL2.0 + { + program P(Q.get_context()); + P.build_with_kernel_type("-cl-std=CL2.0"); + + kernel K = P.get_kernel(); + try { + // parallel_for, 100 global, 3 local -> pass + Q.submit([&](handler &CGH) { + CGH.parallel_for( + K, nd_range<1>(range<1>(100), range<1>(3)), [=](nd_item<1>) {}); + }); + Q.wait_and_throw(); + } catch (nd_range_error &E) { + std::cerr + << "Test case OpenCL2XPositiveA failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } catch (runtime_error &E) { + std::cerr + << "Test case OpenCL2XPositiveA failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } catch (...) { + std::cerr + << "Test case OpenCL2XPositiveA failed: something unexpected " + "has been caught" + << std::endl; + return 1; + } + } + // Multi-dimensional nd_range. + { + program P(Q.get_context()); + P.build_with_kernel_type("-cl-std=CL2.0"); + + kernel K = P.get_kernel(); + try { + // parallel_for, (100, 33, 16) global, (2, 3, 5) local -> pass + Q.submit([&](handler &CGH) { + CGH.parallel_for( + K, nd_range<3>(range<3>(100, 33, 16), range<3>(2, 3, 5)), + [=](nd_item<3>) {}); + }); + Q.wait_and_throw(); + } catch (nd_range_error &E) { + std::cerr + << "Test case OpenCL2XPositiveB failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } catch (runtime_error &E) { + std::cerr + << "Test case OpenCL2XPositiveB failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } catch (...) { + std::cerr + << "Test case OpenCL2XPositiveB failed: something unexpected " + "has been caught" + << std::endl; + return 1; + } + } - kernel K = P.get_kernel(); - try { - // parallel_for, 16 global, 17 local -> fail - Q.submit([&](handler &CGH) { - CGH.parallel_for( - K, nd_range<1>(range<1>(16), range<1>(17)), [=](nd_item<1>) {}); - }); - Q.wait_and_throw(); - // FIXME: some Intel runtimes contain bug and don't return expected - // error code - if (info::device_type::cpu != DeviceType || - DeviceVendorName.find("Intel") == string_class::npos) { + // Enable 2.0 mode with non-uniform work-groups, but disable the latter by + // specifying -cl-uniform-work-group-size: + // CL_INVALID_WORK_GROUP_SIZE if the program was compiled with + // –cl-uniform-work-group-size and the number of work-items specified + // by global_work_size is not evenly divisible by size of work-group + // given by local_work_size + { + program P(Q.get_context()); + P.build_with_kernel_type( + "-cl-std=CL2.0 -cl-uniform-work-group-size"); + + kernel K = P.get_kernel(); + try { + // parallel_for, 100 global, 3 local -> fail + Q.submit([&](handler &CGH) { + CGH.parallel_for( + K, nd_range<1>(range<1>(100), range<1>(3)), [=](nd_item<1>) {}); + }); + Q.wait_and_throw(); + // FIXME: some Intel runtimes contain bug and don't return expected + // error code + if (info::device_type::cpu != DeviceType || + DeviceVendorName.find("Intel") == string_class::npos) { + std::cerr + << "Test case OpenCL2XNegativeD failed: no exception has been " + "thrown\n"; + return 1; // We shouldn't be here, exception is expected + } + } catch (nd_range_error &E) { + if (string_class(E.what()).find( + "Global_work_size not evenly divisible by local_work_size. " + "Non-uniform work-groups are not allowed by when " + "-cl-uniform-work-group-size flag is used. Underlying " + "OpenCL 2.x implementation supports this feature, but it is " + "being " + "disabled by -cl-uniform-work-group-size build flag") == + string_class::npos) { + std::cerr + << "Test case OpenCL2XNegativeD failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } + } catch (runtime_error &E) { std::cerr - << "Test case OpenCL2XNegativeD2 failed: no exception has been " - "thrown\n"; - return 1; // We shouldn't be here, exception is expected + << "Test case OpenCL2XNegativeD failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } catch (...) { + std::cerr + << "Test case OpenCL2XNegativeD failed: something unexpected " + "has been caught" + << std::endl; + return 1; } - } catch (nd_range_error &E) { - if (string_class(E.what()).find( - "Local workgroup size greater than global range size. " - "Non-uniform work-groups are not allowed by when " - "-cl-uniform-work-group-size flag is used. Underlying " - "OpenCL 2.x implementation supports this feature, but it is " - "being " - "disabled by -cl-uniform-work-group-size build flag") == - string_class::npos) { + } + // Multi-dimensional nd_range. + { + program P(Q.get_context()); + P.build_with_kernel_type( + "-cl-std=CL2.0 -cl-uniform-work-group-size"); + + kernel K = P.get_kernel(); + try { + // parallel_for, (100, 33, 16) global, (2, 3, 5) local -> fail + Q.submit([&](handler &CGH) { + CGH.parallel_for( + K, nd_range<3>(range<3>(100, 33, 16), range<3>(2, 3, 5)), + [=](nd_item<3>) {}); + }); + Q.wait_and_throw(); + // FIXME: some Intel runtimes contain bug and don't return expected + // error code + if (info::device_type::cpu != DeviceType || + DeviceVendorName.find("Intel") == string_class::npos) { + std::cerr + << "Test case OpenCL2XNegativeE failed: no exception has been " + "thrown\n"; + return 1; // We shouldn't be here, exception is expected + } + } catch (nd_range_error &E) { + if (string_class(E.what()).find( + "Global_work_size not evenly divisible by local_work_size. " + "Non-uniform work-groups are not allowed by when " + "-cl-uniform-work-group-size flag is used. Underlying " + "OpenCL 2.x implementation supports this feature, but it is " + "being " + "disabled by -cl-uniform-work-group-size build flag") == + string_class::npos) { + std::cerr + << "Test case OpenCL2XNegativeE failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } + } catch (runtime_error &E) { std::cerr - << "Test case OpenCL2XNegativeD2 failed: unexpected exception: " + << "Test case OpenCL2XNegativeE failed: unexpected exception: " << E.what() << std::endl; return 1; + } catch (...) { + std::cerr + << "Test case OpenCL2XNegativeE failed: something unexpected " + "has been caught" + << std::endl; + return 1; } - } catch (runtime_error &E) { - std::cerr - << "Test case OpenCL2XNegativeD2 failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } catch (...) { - std::cerr - << "Test case OpenCL2XNegativeD2 failed: something unexpected " - "has been caught" - << std::endl; - return 1; } - } - // Multi-dimensional nd_range. - { - program P(Q.get_context()); - P.build_with_kernel_type( - "-cl-std=CL2.0 -cl-uniform-work-group-size"); - kernel K = P.get_kernel(); - try { - // parallel_for, 6, 6, 6 global, 2, 2, 7 local -> fail - Q.submit([&](handler &CGH) { - CGH.parallel_for( - K, nd_range<3>(range<3>(6, 6, 6), range<3>(2, 2, 7)), - [=](nd_item<3>) {}); - }); - Q.wait_and_throw(); - // FIXME: some Intel runtimes contain bug and don't return expected - // error code - if (info::device_type::cpu != DeviceType || - DeviceVendorName.find("Intel") == string_class::npos) { + // Local Size larger than Global, -cl-std=CL2.0 + // -cl-uniform-work-group-size flag used CL_INVALID_WORK_GROUP_SIZE if + // local_work_size is specified as larger than the global size, then a + // different error string is used. This is a sub-case of the more general + // 'non-uniform work group' + { + program P(Q.get_context()); + P.build_with_kernel_type( + "-cl-std=CL2.0 -cl-uniform-work-group-size"); + + kernel K = P.get_kernel(); + try { + // parallel_for, 16 global, 17 local -> fail + Q.submit([&](handler &CGH) { + CGH.parallel_for( + K, nd_range<1>(range<1>(16), range<1>(17)), [=](nd_item<1>) {}); + }); + Q.wait_and_throw(); + // FIXME: some Intel runtimes contain bug and don't return expected + // error code + if (info::device_type::cpu != DeviceType || + DeviceVendorName.find("Intel") == string_class::npos) { + std::cerr + << "Test case OpenCL2XNegativeD2 failed: no exception has been " + "thrown\n"; + return 1; // We shouldn't be here, exception is expected + } + } catch (nd_range_error &E) { + if (string_class(E.what()).find( + "Local workgroup size greater than global range size. " + "Non-uniform work-groups are not allowed by when " + "-cl-uniform-work-group-size flag is used. Underlying " + "OpenCL 2.x implementation supports this feature, but it is " + "being " + "disabled by -cl-uniform-work-group-size build flag") == + string_class::npos) { + std::cerr + << "Test case OpenCL2XNegativeD2 failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } + } catch (runtime_error &E) { std::cerr - << "Test case OpenCL2XNegativeE2 failed: no exception has been " - "thrown\n"; - return 1; // We shouldn't be here, exception is expected + << "Test case OpenCL2XNegativeD2 failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } catch (...) { + std::cerr + << "Test case OpenCL2XNegativeD2 failed: something unexpected " + "has been caught" + << std::endl; + return 1; } - } catch (nd_range_error &E) { - if (string_class(E.what()).find( - "Local workgroup size greater than global range size. " - "Non-uniform work-groups are not allowed by when " - "-cl-uniform-work-group-size flag is used. Underlying " - "OpenCL 2.x implementation supports this feature, but it is " - "being " - "disabled by -cl-uniform-work-group-size build flag") == - string_class::npos) { + } + // Multi-dimensional nd_range. + { + program P(Q.get_context()); + P.build_with_kernel_type( + "-cl-std=CL2.0 -cl-uniform-work-group-size"); + + kernel K = P.get_kernel(); + try { + // parallel_for, 6, 6, 6 global, 2, 2, 7 local -> fail + Q.submit([&](handler &CGH) { + CGH.parallel_for( + K, nd_range<3>(range<3>(6, 6, 6), range<3>(2, 2, 7)), + [=](nd_item<3>) {}); + }); + Q.wait_and_throw(); + // FIXME: some Intel runtimes contain bug and don't return expected + // error code + if (info::device_type::cpu != DeviceType || + DeviceVendorName.find("Intel") == string_class::npos) { + std::cerr + << "Test case OpenCL2XNegativeE2 failed: no exception has been " + "thrown\n"; + return 1; // We shouldn't be here, exception is expected + } + } catch (nd_range_error &E) { + if (string_class(E.what()).find( + "Local workgroup size greater than global range size. " + "Non-uniform work-groups are not allowed by when " + "-cl-uniform-work-group-size flag is used. Underlying " + "OpenCL 2.x implementation supports this feature, but it is " + "being " + "disabled by -cl-uniform-work-group-size build flag") == + string_class::npos) { + std::cerr + << "Test case OpenCL2XNegativeE2 failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } + } catch (runtime_error &E) { std::cerr << "Test case OpenCL2XNegativeE2 failed: unexpected exception: " << E.what() << std::endl; return 1; + } catch (...) { + std::cerr + << "Test case OpenCL2XNegativeE2 failed: something unexpected " + "has been caught" + << std::endl; + return 1; } - } catch (runtime_error &E) { - std::cerr - << "Test case OpenCL2XNegativeE2 failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } catch (...) { - std::cerr - << "Test case OpenCL2XNegativeE2 failed: something unexpected " - "has been caught" - << std::endl; - return 1; } } - } // local size has a 0-based range -- no SIGFPEs, we hope try { diff --git a/sycl/test/plugins/sycl-ls-gpu-opencl.cpp b/sycl/test/plugins/sycl-ls-gpu-opencl.cpp index 31158d14e5945..4676017086484 100755 --- a/sycl/test/plugins/sycl-ls-gpu-opencl.cpp +++ b/sycl/test/plugins/sycl-ls-gpu-opencl.cpp @@ -3,8 +3,8 @@ // RUN: env SYCL_BE=PI_OPENCL sycl-ls --verbose >%t.opencl.out // RUN: FileCheck %s --check-prefixes=CHECK-GPU-BUILTIN,CHECK-GPU-CUSTOM --input-file %t.opencl.out -// CHECK-GPU-BUILTIN: gpu_selector(){{.*}}GPU : 2.1 -// CHECK-GPU-CUSTOM: custom_selector(gpu){{.*}}GPU : 2.1 +// CHECK-GPU-BUILTIN: gpu_selector(){{.*}}GPU : {{[0-9].[0-9]}} +// CHECK-GPU-CUSTOM: custom_selector(gpu){{.*}}GPU : {{[0-9].[0-9]}} //==-- sycl-ls-gpu-opencl.cpp - SYCL test for discovered/selected devices -===// // From 7c6b85631f53d9403219fd0f4a9a8305134a3701 Mon Sep 17 00:00:00 2001 From: Denis Bakhvalov Date: Tue, 22 Sep 2020 14:52:20 -0700 Subject: [PATCH 3/9] Revert "Fixed review comments" This reverts commit 8d4876a5dc5f414a1b7251fbb9f106737a3f669e. --- sycl/test/basic_tests/parallel_for_range.cpp | 1196 +++++++++--------- sycl/test/plugins/sycl-ls-gpu-opencl.cpp | 4 +- 2 files changed, 593 insertions(+), 607 deletions(-) diff --git a/sycl/test/basic_tests/parallel_for_range.cpp b/sycl/test/basic_tests/parallel_for_range.cpp index 2b9d3dad73fb0..cab6523a70d06 100644 --- a/sycl/test/basic_tests/parallel_for_range.cpp +++ b/sycl/test/basic_tests/parallel_for_range.cpp @@ -31,153 +31,413 @@ int main() { auto DeviceType = D.get_info(); string_class OCLVersionStr = D.get_info(); - assert((OCLVersionStr.size() >= 3) && "Unexpected device version string"); - assert(OCLVersionStr.find(".") != string_class::npos && - "Unexpected device version string"); - const char OCLVersionMajor = OCLVersionStr[0]; - const char OCLVersionMinor = OCLVersionStr[2]; + const bool OCLBackend = (OCLVersionStr.find("OpenCL") != string_class::npos); + assert((!OCLBackend || (OCLVersionStr.size() >= 10)) && + "Unexpected device version string"); // strlen("OpenCL X.Y") + const char *OCLVersion = &OCLVersionStr[7]; // strlen("OpenCL ") // reqd_work_group_size is OpenCL specific. - if (OCLVersionMajor == '1' || - (OCLVersionMajor == '2' && OCLVersionMinor == '0')) { - // parallel_for, (16, 16, 16) global, (8, 8, 8) local, reqd_wg_size(4, 4, - // 4) - // -> fail + if (OCLBackend) { + if (OCLVersion[0] == '1' || + (OCLVersion[0] == '2' && OCLVersion[2] == '0')) { + // parallel_for, (16, 16, 16) global, (8, 8, 8) local, reqd_wg_size(4, 4, + // 4) + // -> fail + try { + Q.submit([&](handler &CGH) { + CGH.parallel_for( + nd_range<3>(range<3>(16, 16, 16), range<3>(8, 8, 8)), + [=](nd_item<3>) { reqd_wg_size_helper(); }); + }); + Q.wait_and_throw(); + std::cerr + << "Test case ReqdWGSizeNegativeA failed: no exception has been " + "thrown\n"; + return 1; // We shouldn't be here, exception is expected + } catch (nd_range_error &E) { + if (string_class(E.what()).find("Specified local size doesn't match " + "the required work-group size " + "specified in the program source") == + string_class::npos) { + std::cerr + << "Test case ReqdWGSizeNegativeA failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } + } catch (runtime_error &E) { + std::cerr + << "Test case ReqdWGSizeNegativeA failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } catch (...) { + std::cerr + << "Test case ReqdWGSizeNegativeA failed: something unexpected " + "has been caught" + << std::endl; + return 1; + } + + // parallel_for, (16, 16, 16) global, null local, reqd_wg_size(4, 4, 4) // + // -> fail + try { + Q.submit([&](handler &CGH) { + CGH.parallel_for( + range<3>(16, 16, 16), [=](item<3>) { reqd_wg_size_helper(); }); + }); + Q.wait_and_throw(); + std::cerr + << "Test case ReqdWGSizeNegativeB failed: no exception has been " + "thrown\n"; + return 1; // We shouldn't be here, exception is expected + } catch (nd_range_error &E) { + if (string_class(E.what()).find("OpenCL 1.x and 2.0 requires to pass " + "local size argument even if " + "required work-group size was " + "specified in the program source") == + string_class::npos) { + std::cerr + << "Test case ReqdWGSizeNegativeB failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } + } catch (runtime_error &E) { + std::cerr + << "Test case ReqdWGSizeNegativeB failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } catch (...) { + std::cerr + << "Test case ReqdWGSizeNegativeB failed: something unexpected " + "has been caught" + << std::endl; + return 1; + } + } + + // Positive test-cases that should pass on any underlying OpenCL runtime + + // parallel_for, (8, 8, 8) global, (4, 4, 4) local, reqd_wg_size(4, 4, 4) -> + // pass try { Q.submit([&](handler &CGH) { - CGH.parallel_for( - nd_range<3>(range<3>(16, 16, 16), range<3>(8, 8, 8)), + CGH.parallel_for( + nd_range<3>(range<3>(8, 8, 8), range<3>(4, 4, 4)), [=](nd_item<3>) { reqd_wg_size_helper(); }); }); Q.wait_and_throw(); + } catch (nd_range_error &E) { std::cerr - << "Test case ReqdWGSizeNegativeA failed: no exception has been " - "thrown\n"; - return 1; // We shouldn't be here, exception is expected + << "Test case ReqdWGSizePositiveA failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } catch (runtime_error &E) { + std::cerr + << "Test case ReqdWGSizePositiveA failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } catch (...) { + std::cerr << "Test case ReqdWGSizePositiveA failed: something unexpected " + "has been caught" + << std::endl; + return 1; + } + } // if (OCLBackend) + + if (!OCLBackend || (OCLVersion[0] == '1')) { + // OpenCL 1.x or non-OpenCL backends which behave like OpenCl 1.2 in SYCL. + + // CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and + // number of workitems specified by global_work_size is not evenly + // divisible by size of work-group given by local_work_size + try { + // parallel_for, 100 global, 3 local -> fail + Q.submit([&](handler &CGH) { + CGH.parallel_for( + nd_range<1>(range<1>(100), range<1>(3)), [=](nd_item<1>) {}); + }); + Q.wait_and_throw(); + // FIXME: some Intel runtimes contain bug and don't return expected + // error code + if (info::device_type::accelerator != DeviceType || + DeviceVendorName.find("Intel") == string_class::npos) { + std::cerr + << "Test case OpenCL1XNegativeA failed: no exception has been " + "thrown\n"; + return 1; // We shouldn't be here, exception is expected + } } catch (nd_range_error &E) { - if (string_class(E.what()).find("Specified local size doesn't match " - "the required work-group size " - "specified in the program source") == + if (string_class(E.what()).find("Non-uniform work-groups are not " + "supported by the target device") == string_class::npos) { std::cerr - << "Test case ReqdWGSizeNegativeA failed: unexpected exception: " + << "Test case OpenCL1XNegativeA failed: unexpected exception: " << E.what() << std::endl; return 1; } } catch (runtime_error &E) { - std::cerr - << "Test case ReqdWGSizeNegativeA failed: unexpected exception: " - << E.what() << std::endl; + std::cerr << "Test case OpenCL1XNegativeA failed: unexpected exception: " + << E.what() << std::endl; return 1; } catch (...) { - std::cerr << "Test case ReqdWGSizeNegativeA failed: something unexpected " + std::cerr << "Test case OpenCL1XNegativeA failed: something unexpected " "has been caught" << std::endl; return 1; } - // parallel_for, (16, 16, 16) global, null local, reqd_wg_size(4, 4, 4) // - // -> fail try { + // parallel_for, (100, 33, 16) global, (2, 3, 5) local -> fail Q.submit([&](handler &CGH) { - CGH.parallel_for( - range<3>(16, 16, 16), [=](item<3>) { reqd_wg_size_helper(); }); + CGH.parallel_for( + nd_range<3>(range<3>(100, 33, 16), range<3>(2, 3, 5)), + [=](nd_item<3>) {}); }); Q.wait_and_throw(); - std::cerr - << "Test case ReqdWGSizeNegativeB failed: no exception has been " - "thrown\n"; - return 1; // We shouldn't be here, exception is expected + // FIXME: some Intel runtimes contain bug and don't return expected + // error code + if (info::device_type::accelerator != DeviceType || + DeviceVendorName.find("Intel") == string_class::npos) { + std::cerr + << "Test case OpenCL1XNegativeB failed: no exception has been " + "thrown\n"; + return 1; // We shouldn't be here, exception is expected + } } catch (nd_range_error &E) { - if (string_class(E.what()).find("OpenCL 1.x and 2.0 requires to pass " - "local size argument even if " - "required work-group size was " - "specified in the program source") == + if (string_class(E.what()).find("Non-uniform work-groups are not " + "supported by the target device") == string_class::npos) { std::cerr - << "Test case ReqdWGSizeNegativeB failed: unexpected exception: " + << "Test case OpenCL1XNegativeB failed: unexpected exception: " << E.what() << std::endl; return 1; } } catch (runtime_error &E) { - std::cerr - << "Test case ReqdWGSizeNegativeB failed: unexpected exception: " - << E.what() << std::endl; + std::cerr << "Test case OpenCL1XNegativeB failed: unexpected exception: " + << E.what() << std::endl; return 1; } catch (...) { - std::cerr << "Test case ReqdWGSizeNegativeB failed: something unexpected " + std::cerr << "Test case OpenCL1XNegativeB failed: something unexpected " "has been caught" << std::endl; return 1; } - } - // Positive test-cases that should pass on any underlying OpenCL runtime - - // parallel_for, (8, 8, 8) global, (4, 4, 4) local, reqd_wg_size(4, 4, 4) -> - // pass + // Local Size larger than Global. + // CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified as larger + // than the global size, then a different error string is used. + // This is a sub-case of the more general 'non-uniform work group' try { + // parallel_for, 16 global, 17 local -> fail Q.submit([&](handler &CGH) { - CGH.parallel_for( - nd_range<3>(range<3>(8, 8, 8), range<3>(4, 4, 4)), - [=](nd_item<3>) { reqd_wg_size_helper(); }); + CGH.parallel_for( + nd_range<1>(range<1>(16), range<1>(17)), [=](nd_item<1>) {}); }); Q.wait_and_throw(); + // FIXME: some Intel runtimes contain bug and don't return expected + // error code + if (info::device_type::accelerator != DeviceType || + DeviceVendorName.find("Intel") == string_class::npos) { + std::cerr + << "Test case OpenCL1XNegativeA2 failed: no exception has been " + "thrown\n"; + return 1; // We shouldn't be here, exception is expected + } } catch (nd_range_error &E) { - std::cerr - << "Test case ReqdWGSizePositiveA failed: unexpected exception: " - << E.what() << std::endl; + if ((string_class(E.what()).find("Local workgroup size cannot be greater " + "than global range in any dimension") == + string_class::npos) && + (string_class(E.what()).find("Non-uniform work-groups are not " + "supported by the target device") == + string_class::npos)) { + std::cerr + << "Test case OpenCL1XNegativeA2 failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } + } catch (runtime_error &E) { + std::cerr << "Test case OpenCL1XNegativeA2 failed: unexpected exception: " + << E.what() << std::endl; return 1; + } catch (...) { + std::cerr << "Test case OpenCL1XNegativeA2 failed: something unexpected " + "has been caught" + << std::endl; + return 1; + } + + // Local Size larger than Global, multi-dimensional + // This is a sub-case of the more general 'non-uniform work group' + try { + // parallel_for, 6, 6, 6 global, 2, 2, 7 local -> fail + Q.submit([&](handler &CGH) { + CGH.parallel_for( + nd_range<3>(range<3>(6, 6, 6), range<3>(2, 2, 7)), + [=](nd_item<3>) {}); + }); + Q.wait_and_throw(); + // FIXME: some Intel runtimes contain bug and don't return expected + // error code + if (info::device_type::accelerator != DeviceType || + DeviceVendorName.find("Intel") == string_class::npos) { + std::cerr + << "Test case OpenCL1XNegativeB2 failed: no exception has been " + "thrown\n"; + return 1; // We shouldn't be here, exception is expected + } + } catch (nd_range_error &E) { + if ((string_class(E.what()).find("Local workgroup size cannot be greater " + "than global range in any dimension") == + string_class::npos) && + (string_class(E.what()).find("Non-uniform work-groups are not " + "supported by the target device") == + string_class::npos)) { + std::cerr + << "Test case OpenCL1XNegativeB2 failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } } catch (runtime_error &E) { - std::cerr - << "Test case ReqdWGSizePositiveA failed: unexpected exception: " - << E.what() << std::endl; + std::cerr << "Test case OpenCL1XNegativeB2 failed: unexpected exception: " + << E.what() << std::endl; return 1; } catch (...) { - std::cerr << "Test case ReqdWGSizePositiveA failed: something unexpected " + std::cerr << "Test case OpenCL1XNegativeB2 failed: something unexpected " "has been caught" << std::endl; return 1; } - if (OCLVersionMajor == '1') { - // OpenCL 1.x or non-OpenCL backends which behave like OpenCl 1.2 in SYCL. + // CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and the + // total number of work-items in the work-group computed as + // local_work_size[0] * ... * local_work_size[work_dim – 1] is greater + // than the value specified by CL_DEVICE_MAX_WORK_GROUP_SIZE in + // table 4.3 + size_t MaxDeviceWGSize = D.get_info(); + try { + Q.submit([&](handler &CGH) { + CGH.parallel_for( + nd_range<2>(range<2>(MaxDeviceWGSize, MaxDeviceWGSize), + range<2>(MaxDeviceWGSize, 2)), + [=](nd_item<2>) {}); + }); + Q.wait_and_throw(); + std::cerr << "Test case OpenCL1XNegativeC failed: no exception has been " + "thrown\n"; + return 1; // We shouldn't be here, exception is expected + } catch (nd_range_error &E) { + if ((string_class(E.what()).find( + "Total number of work-items in a work-group cannot exceed " + + std::to_string(MaxDeviceWGSize)) == string_class::npos) && + (string_class(E.what()).find("Non-uniform work-groups are not " + "supported by the target device") == + string_class::npos)) { + std::cerr + << "Test case OpenCL1XNegativeC failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } + } catch (runtime_error &E) { + std::cerr << "Test case OpenCL1XNegativeC failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } catch (...) { + std::cerr << "Test case OpenCL1XNegativeC failed: something unexpected " + "has been caught" + << std::endl; + return 1; + } + } else if (OCLBackend && (OCLVersion[0] == '2')) { + // OpenCL 2.x + + // OpenCL 2.x: + // CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and the + // total number of work-items in the work-group computed as + // local_work_size[0] * ... * local_work_size[work_dim – 1] is greater + // than the value specified by CL_KERNEL_WORK_GROUP_SIZE in table 5.21. + { + program P(Q.get_context()); + P.build_with_kernel_type(); + + kernel K = P.get_kernel(); + size_t MaxKernelWGSize = + K.get_work_group_info( + Q.get_device()); + try { + Q.submit([&](handler &CGH) { + CGH.parallel_for( + K, + nd_range<2>(range<2>(MaxKernelWGSize, MaxKernelWGSize), + range<2>(MaxKernelWGSize, 2)), + [=](nd_item<2>) {}); + }); + Q.wait_and_throw(); + std::cerr + << "Test case OpenCL2XNegativeA failed: no exception has been " + "thrown\n"; + return 1; // We shouldn't be here, exception is expected + } catch (nd_range_error &E) { + if (string_class(E.what()).find( + "Total number of work-items in a work-group cannot exceed " + + std::to_string(MaxKernelWGSize) + " for this kernel") == + string_class::npos) { + std::cerr + << "Test case OpenCL2XNegativeA failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } + } catch (runtime_error &E) { + std::cerr + << "Test case OpenCL2XNegativeA failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } catch (...) { + std::cerr << "Test case OpenCL2XNegativeA failed: something unexpected " + "has been caught" + << std::endl; + return 1; + } + } - // CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and - // number of workitems specified by global_work_size is not evenly - // divisible by size of work-group given by local_work_size + // By default, program is built in OpenCL 1.2 mode, so the following error + // is expected even for OpenCL 2.x: + // CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and + // number of workitems specified by global_work_size is not evenly + // divisible by size of work-group given by local_work_size + { try { // parallel_for, 100 global, 3 local -> fail Q.submit([&](handler &CGH) { - CGH.parallel_for( + CGH.parallel_for( nd_range<1>(range<1>(100), range<1>(3)), [=](nd_item<1>) {}); }); Q.wait_and_throw(); // FIXME: some Intel runtimes contain bug and don't return expected // error code - if (info::device_type::accelerator != DeviceType || + if (info::device_type::cpu != DeviceType || DeviceVendorName.find("Intel") == string_class::npos) { std::cerr - << "Test case OpenCL1XNegativeA failed: no exception has been " + << "Test case OpenCL2XNegativeB failed: no exception has been " "thrown\n"; return 1; // We shouldn't be here, exception is expected } } catch (nd_range_error &E) { - if (string_class(E.what()).find("Non-uniform work-groups are not " - "supported by the target device") == + if (string_class(E.what()).find( + "Non-uniform work-groups are not allowed by default. " + "Underlying OpenCL 2.x implementation supports this feature " + "and to enable it, build device program with -cl-std=CL2.0") == string_class::npos) { std::cerr - << "Test case OpenCL1XNegativeA failed: unexpected exception: " + << "Test case OpenCL2XNegativeB failed: unexpected exception: " << E.what() << std::endl; return 1; } } catch (runtime_error &E) { std::cerr - << "Test case OpenCL1XNegativeA failed: unexpected exception: " + << "Test case OpenCL2XNegativeB failed: unexpected exception: " << E.what() << std::endl; return 1; } catch (...) { - std::cerr << "Test case OpenCL1XNegativeA failed: something unexpected " + std::cerr << "Test case OpenCL2XNegativeB failed: something unexpected " "has been caught" << std::endl; return 1; @@ -186,81 +446,86 @@ int main() { try { // parallel_for, (100, 33, 16) global, (2, 3, 5) local -> fail Q.submit([&](handler &CGH) { - CGH.parallel_for( + CGH.parallel_for( nd_range<3>(range<3>(100, 33, 16), range<3>(2, 3, 5)), [=](nd_item<3>) {}); }); Q.wait_and_throw(); // FIXME: some Intel runtimes contain bug and don't return expected // error code - if (info::device_type::accelerator != DeviceType || + if (info::device_type::cpu != DeviceType || DeviceVendorName.find("Intel") == string_class::npos) { std::cerr - << "Test case OpenCL1XNegativeB failed: no exception has been " + << "Test case OpenCL2XNegativeC failed: no exception has been " "thrown\n"; return 1; // We shouldn't be here, exception is expected } } catch (nd_range_error &E) { - if (string_class(E.what()).find("Non-uniform work-groups are not " - "supported by the target device") == + if (string_class(E.what()).find( + "Non-uniform work-groups are not allowed by default. " + "Underlying OpenCL 2.x implementation supports this feature " + "and to enable it, build device program with -cl-std=CL2.0") == string_class::npos) { std::cerr - << "Test case OpenCL1XNegativeB failed: unexpected exception: " + << "Test case OpenCL2XNegativeC failed: unexpected exception: " << E.what() << std::endl; return 1; } } catch (runtime_error &E) { std::cerr - << "Test case OpenCL1XNegativeB failed: unexpected exception: " + << "Test case OpenCL2XNegativeC failed: unexpected exception: " << E.what() << std::endl; return 1; } catch (...) { - std::cerr << "Test case OpenCL1XNegativeB failed: something unexpected " + std::cerr << "Test case OpenCL2XNegativeC failed: something unexpected " "has been caught" << std::endl; return 1; } + } - // Local Size larger than Global. - // CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified as larger - // than the global size, then a different error string is used. - // This is a sub-case of the more general 'non-uniform work group' + // Local Size larger than Global. + // CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified as larger + // than the global size, then a different error string is used. + // This is a sub-case of the more general 'non-uniform work group' + { try { // parallel_for, 16 global, 17 local -> fail Q.submit([&](handler &CGH) { - CGH.parallel_for( + CGH.parallel_for( nd_range<1>(range<1>(16), range<1>(17)), [=](nd_item<1>) {}); }); Q.wait_and_throw(); // FIXME: some Intel runtimes contain bug and don't return expected // error code - if (info::device_type::accelerator != DeviceType || + if (info::device_type::cpu != DeviceType || DeviceVendorName.find("Intel") == string_class::npos) { std::cerr - << "Test case OpenCL1XNegativeA2 failed: no exception has been " + << "Test case OpenCL2XNegativeB2 failed: no exception has been " "thrown\n"; return 1; // We shouldn't be here, exception is expected } } catch (nd_range_error &E) { - if ((string_class(E.what()).find( - "Local workgroup size cannot be greater " - "than global range in any dimension") == string_class::npos) && - (string_class(E.what()).find("Non-uniform work-groups are not " - "supported by the target device") == - string_class::npos)) { + if (string_class(E.what()).find( + "Local workgroup size greater than global range size. " + "Non-uniform work-groups are not allowed by default. " + "Underlying " + "OpenCL 2.x implementation supports this feature and to enable " + "it, build device program with -cl-std=CL2.0") == + string_class::npos) { std::cerr - << "Test case OpenCL1XNegativeA2 failed: unexpected exception: " + << "Test case OpenCL2XNegativeB2 failed: unexpected exception: " << E.what() << std::endl; return 1; } } catch (runtime_error &E) { std::cerr - << "Test case OpenCL1XNegativeA2 failed: unexpected exception: " + << "Test case OpenCL2XNegativeB2 failed: unexpected exception: " << E.what() << std::endl; return 1; } catch (...) { std::cerr - << "Test case OpenCL1XNegativeA2 failed: something unexpected " + << "Test case OpenCL2XNegativeB2 failed: something unexpected " "has been caught" << std::endl; return 1; @@ -271,597 +536,318 @@ int main() { try { // parallel_for, 6, 6, 6 global, 2, 2, 7 local -> fail Q.submit([&](handler &CGH) { - CGH.parallel_for( + CGH.parallel_for( nd_range<3>(range<3>(6, 6, 6), range<3>(2, 2, 7)), [=](nd_item<3>) {}); }); Q.wait_and_throw(); // FIXME: some Intel runtimes contain bug and don't return expected // error code - if (info::device_type::accelerator != DeviceType || + if (info::device_type::cpu != DeviceType || DeviceVendorName.find("Intel") == string_class::npos) { std::cerr - << "Test case OpenCL1XNegativeB2 failed: no exception has been " + << "Test case OpenCL2XNegativeC2 failed: no exception has been " "thrown\n"; return 1; // We shouldn't be here, exception is expected } } catch (nd_range_error &E) { - if ((string_class(E.what()).find( - "Local workgroup size cannot be greater " - "than global range in any dimension") == string_class::npos) && - (string_class(E.what()).find("Non-uniform work-groups are not " - "supported by the target device") == - string_class::npos)) { + if (string_class(E.what()).find( + "Local workgroup size greater than global range size. " + "Non-uniform work-groups are not allowed by default. " + "Underlying OpenCL 2.x implementation supports this feature " + "and to enable it, build device program with -cl-std=CL2.0") == + string_class::npos) { std::cerr - << "Test case OpenCL1XNegativeB2 failed: unexpected exception: " + << "Test case OpenCL2XNegativeC2 failed: unexpected exception: " << E.what() << std::endl; return 1; } } catch (runtime_error &E) { std::cerr - << "Test case OpenCL1XNegativeB2 failed: unexpected exception: " + << "Test case OpenCL2XNegativeC2 failed: unexpected exception: " << E.what() << std::endl; return 1; } catch (...) { std::cerr - << "Test case OpenCL1XNegativeB2 failed: something unexpected " + << "Test case OpenCL2XNegativeC2 failed: something unexpected " "has been caught" << std::endl; return 1; } + } + + // Enable non-uniform work-groups by passing -cl-std=CL2.0 + { + program P(Q.get_context()); + P.build_with_kernel_type("-cl-std=CL2.0"); - // CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and the - // total number of work-items in the work-group computed as - // local_work_size[0] * ... * local_work_size[work_dim – 1] is greater - // than the value specified by CL_DEVICE_MAX_WORK_GROUP_SIZE in - // table 4.3 - size_t MaxDeviceWGSize = D.get_info(); + kernel K = P.get_kernel(); try { + // parallel_for, 100 global, 3 local -> pass Q.submit([&](handler &CGH) { - CGH.parallel_for( - nd_range<2>(range<2>(MaxDeviceWGSize, MaxDeviceWGSize), - range<2>(MaxDeviceWGSize, 2)), - [=](nd_item<2>) {}); + CGH.parallel_for( + K, nd_range<1>(range<1>(100), range<1>(3)), [=](nd_item<1>) {}); }); Q.wait_and_throw(); + } catch (nd_range_error &E) { std::cerr - << "Test case OpenCL1XNegativeC failed: no exception has been " - "thrown\n"; - return 1; // We shouldn't be here, exception is expected + << "Test case OpenCL2XPositiveA failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } catch (runtime_error &E) { + std::cerr + << "Test case OpenCL2XPositiveA failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } catch (...) { + std::cerr << "Test case OpenCL2XPositiveA failed: something unexpected " + "has been caught" + << std::endl; + return 1; + } + } + // Multi-dimensional nd_range. + { + program P(Q.get_context()); + P.build_with_kernel_type("-cl-std=CL2.0"); + + kernel K = P.get_kernel(); + try { + // parallel_for, (100, 33, 16) global, (2, 3, 5) local -> pass + Q.submit([&](handler &CGH) { + CGH.parallel_for( + K, nd_range<3>(range<3>(100, 33, 16), range<3>(2, 3, 5)), + [=](nd_item<3>) {}); + }); + Q.wait_and_throw(); } catch (nd_range_error &E) { - if ((string_class(E.what()).find( - "Total number of work-items in a work-group cannot exceed " + - std::to_string(MaxDeviceWGSize)) == string_class::npos) && - (string_class(E.what()).find("Non-uniform work-groups are not " - "supported by the target device") == - string_class::npos)) { - std::cerr - << "Test case OpenCL1XNegativeC failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } + std::cerr + << "Test case OpenCL2XPositiveB failed: unexpected exception: " + << E.what() << std::endl; + return 1; } catch (runtime_error &E) { std::cerr - << "Test case OpenCL1XNegativeC failed: unexpected exception: " + << "Test case OpenCL2XPositiveB failed: unexpected exception: " << E.what() << std::endl; return 1; } catch (...) { - std::cerr << "Test case OpenCL1XNegativeC failed: something unexpected " + std::cerr << "Test case OpenCL2XPositiveB failed: something unexpected " "has been caught" << std::endl; return 1; } - } else if (OCLVersionMajor == '2') { - // OpenCL 2.x - - // OpenCL 2.x: - // CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and the - // total number of work-items in the work-group computed as - // local_work_size[0] * ... * local_work_size[work_dim – 1] is greater - // than the value specified by CL_KERNEL_WORK_GROUP_SIZE in table 5.21. - { - program P(Q.get_context()); - P.build_with_kernel_type(); - - kernel K = P.get_kernel(); - size_t MaxKernelWGSize = - K.get_work_group_info( - Q.get_device()); - try { - Q.submit([&](handler &CGH) { - CGH.parallel_for( - K, - nd_range<2>(range<2>(MaxKernelWGSize, MaxKernelWGSize), - range<2>(MaxKernelWGSize, 2)), - [=](nd_item<2>) {}); - }); - Q.wait_and_throw(); + } + + // Enable 2.0 mode with non-uniform work-groups, but disable the latter by + // specifying -cl-uniform-work-group-size: + // CL_INVALID_WORK_GROUP_SIZE if the program was compiled with + // –cl-uniform-work-group-size and the number of work-items specified + // by global_work_size is not evenly divisible by size of work-group + // given by local_work_size + { + program P(Q.get_context()); + P.build_with_kernel_type( + "-cl-std=CL2.0 -cl-uniform-work-group-size"); + + kernel K = P.get_kernel(); + try { + // parallel_for, 100 global, 3 local -> fail + Q.submit([&](handler &CGH) { + CGH.parallel_for( + K, nd_range<1>(range<1>(100), range<1>(3)), [=](nd_item<1>) {}); + }); + Q.wait_and_throw(); + // FIXME: some Intel runtimes contain bug and don't return expected + // error code + if (info::device_type::cpu != DeviceType || + DeviceVendorName.find("Intel") == string_class::npos) { std::cerr - << "Test case OpenCL2XNegativeA failed: no exception has been " + << "Test case OpenCL2XNegativeD failed: no exception has been " "thrown\n"; return 1; // We shouldn't be here, exception is expected - } catch (nd_range_error &E) { - if (string_class(E.what()).find( - "Total number of work-items in a work-group cannot exceed " + - std::to_string(MaxKernelWGSize) + " for this kernel") == - string_class::npos) { - std::cerr - << "Test case OpenCL2XNegativeA failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } - } catch (runtime_error &E) { - std::cerr - << "Test case OpenCL2XNegativeA failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } catch (...) { - std::cerr - << "Test case OpenCL2XNegativeA failed: something unexpected " - "has been caught" - << std::endl; - return 1; - } - } - - // By default, program is built in OpenCL 1.2 mode, so the following error - // is expected even for OpenCL 2.x: - // CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and - // number of workitems specified by global_work_size is not evenly - // divisible by size of work-group given by local_work_size - { - try { - // parallel_for, 100 global, 3 local -> fail - Q.submit([&](handler &CGH) { - CGH.parallel_for( - nd_range<1>(range<1>(100), range<1>(3)), [=](nd_item<1>) {}); - }); - Q.wait_and_throw(); - // FIXME: some Intel runtimes contain bug and don't return expected - // error code - if (info::device_type::cpu != DeviceType || - DeviceVendorName.find("Intel") == string_class::npos) { - std::cerr - << "Test case OpenCL2XNegativeB failed: no exception has been " - "thrown\n"; - return 1; // We shouldn't be here, exception is expected - } - } catch (nd_range_error &E) { - if (string_class(E.what()).find( - "Non-uniform work-groups are not allowed by default. " - "Underlying OpenCL 2.x implementation supports this feature " - "and to enable it, build device program with " - "-cl-std=CL2.0") == string_class::npos) { - std::cerr - << "Test case OpenCL2XNegativeB failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } - } catch (runtime_error &E) { - std::cerr - << "Test case OpenCL2XNegativeB failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } catch (...) { - std::cerr - << "Test case OpenCL2XNegativeB failed: something unexpected " - "has been caught" - << std::endl; - return 1; } - - try { - // parallel_for, (100, 33, 16) global, (2, 3, 5) local -> fail - Q.submit([&](handler &CGH) { - CGH.parallel_for( - nd_range<3>(range<3>(100, 33, 16), range<3>(2, 3, 5)), - [=](nd_item<3>) {}); - }); - Q.wait_and_throw(); - // FIXME: some Intel runtimes contain bug and don't return expected - // error code - if (info::device_type::cpu != DeviceType || - DeviceVendorName.find("Intel") == string_class::npos) { - std::cerr - << "Test case OpenCL2XNegativeC failed: no exception has been " - "thrown\n"; - return 1; // We shouldn't be here, exception is expected - } - } catch (nd_range_error &E) { - if (string_class(E.what()).find( - "Non-uniform work-groups are not allowed by default. " - "Underlying OpenCL 2.x implementation supports this feature " - "and to enable it, build device program with " - "-cl-std=CL2.0") == string_class::npos) { - std::cerr - << "Test case OpenCL2XNegativeC failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } - } catch (runtime_error &E) { + } catch (nd_range_error &E) { + if (string_class(E.what()).find( + "Global_work_size not evenly divisible by local_work_size. " + "Non-uniform work-groups are not allowed by when " + "-cl-uniform-work-group-size flag is used. Underlying " + "OpenCL 2.x implementation supports this feature, but it is " + "being " + "disabled by -cl-uniform-work-group-size build flag") == + string_class::npos) { std::cerr - << "Test case OpenCL2XNegativeC failed: unexpected exception: " + << "Test case OpenCL2XNegativeD failed: unexpected exception: " << E.what() << std::endl; return 1; - } catch (...) { - std::cerr - << "Test case OpenCL2XNegativeC failed: something unexpected " - "has been caught" - << std::endl; - return 1; } + } catch (runtime_error &E) { + std::cerr + << "Test case OpenCL2XNegativeD failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } catch (...) { + std::cerr << "Test case OpenCL2XNegativeD failed: something unexpected " + "has been caught" + << std::endl; + return 1; } + } + // Multi-dimensional nd_range. + { + program P(Q.get_context()); + P.build_with_kernel_type( + "-cl-std=CL2.0 -cl-uniform-work-group-size"); - // Local Size larger than Global. - // CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified as larger - // than the global size, then a different error string is used. - // This is a sub-case of the more general 'non-uniform work group' - { - try { - // parallel_for, 16 global, 17 local -> fail - Q.submit([&](handler &CGH) { - CGH.parallel_for( - nd_range<1>(range<1>(16), range<1>(17)), [=](nd_item<1>) {}); - }); - Q.wait_and_throw(); - // FIXME: some Intel runtimes contain bug and don't return expected - // error code - if (info::device_type::cpu != DeviceType || - DeviceVendorName.find("Intel") == string_class::npos) { - std::cerr - << "Test case OpenCL2XNegativeB2 failed: no exception has been " - "thrown\n"; - return 1; // We shouldn't be here, exception is expected - } - } catch (nd_range_error &E) { - if (string_class(E.what()).find( - "Local workgroup size greater than global range size. " - "Non-uniform work-groups are not allowed by default. " - "Underlying " - "OpenCL 2.x implementation supports this feature and to " - "enable " - "it, build device program with -cl-std=CL2.0") == - string_class::npos) { - std::cerr - << "Test case OpenCL2XNegativeB2 failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } - } catch (runtime_error &E) { - std::cerr - << "Test case OpenCL2XNegativeB2 failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } catch (...) { + kernel K = P.get_kernel(); + try { + // parallel_for, (100, 33, 16) global, (2, 3, 5) local -> fail + Q.submit([&](handler &CGH) { + CGH.parallel_for( + K, nd_range<3>(range<3>(100, 33, 16), range<3>(2, 3, 5)), + [=](nd_item<3>) {}); + }); + Q.wait_and_throw(); + // FIXME: some Intel runtimes contain bug and don't return expected + // error code + if (info::device_type::cpu != DeviceType || + DeviceVendorName.find("Intel") == string_class::npos) { std::cerr - << "Test case OpenCL2XNegativeB2 failed: something unexpected " - "has been caught" - << std::endl; - return 1; + << "Test case OpenCL2XNegativeE failed: no exception has been " + "thrown\n"; + return 1; // We shouldn't be here, exception is expected } - - // Local Size larger than Global, multi-dimensional - // This is a sub-case of the more general 'non-uniform work group' - try { - // parallel_for, 6, 6, 6 global, 2, 2, 7 local -> fail - Q.submit([&](handler &CGH) { - CGH.parallel_for( - nd_range<3>(range<3>(6, 6, 6), range<3>(2, 2, 7)), - [=](nd_item<3>) {}); - }); - Q.wait_and_throw(); - // FIXME: some Intel runtimes contain bug and don't return expected - // error code - if (info::device_type::cpu != DeviceType || - DeviceVendorName.find("Intel") == string_class::npos) { - std::cerr - << "Test case OpenCL2XNegativeC2 failed: no exception has been " - "thrown\n"; - return 1; // We shouldn't be here, exception is expected - } - } catch (nd_range_error &E) { - if (string_class(E.what()).find( - "Local workgroup size greater than global range size. " - "Non-uniform work-groups are not allowed by default. " - "Underlying OpenCL 2.x implementation supports this feature " - "and to enable it, build device program with " - "-cl-std=CL2.0") == string_class::npos) { - std::cerr - << "Test case OpenCL2XNegativeC2 failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } - } catch (runtime_error &E) { + } catch (nd_range_error &E) { + if (string_class(E.what()).find( + "Global_work_size not evenly divisible by local_work_size. " + "Non-uniform work-groups are not allowed by when " + "-cl-uniform-work-group-size flag is used. Underlying " + "OpenCL 2.x implementation supports this feature, but it is " + "being " + "disabled by -cl-uniform-work-group-size build flag") == + string_class::npos) { std::cerr - << "Test case OpenCL2XNegativeC2 failed: unexpected exception: " + << "Test case OpenCL2XNegativeE failed: unexpected exception: " << E.what() << std::endl; return 1; - } catch (...) { - std::cerr - << "Test case OpenCL2XNegativeC2 failed: something unexpected " - "has been caught" - << std::endl; - return 1; } + } catch (runtime_error &E) { + std::cerr + << "Test case OpenCL2XNegativeE failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } catch (...) { + std::cerr << "Test case OpenCL2XNegativeE failed: something unexpected " + "has been caught" + << std::endl; + return 1; } + } - // Enable non-uniform work-groups by passing -cl-std=CL2.0 - { - program P(Q.get_context()); - P.build_with_kernel_type("-cl-std=CL2.0"); - - kernel K = P.get_kernel(); - try { - // parallel_for, 100 global, 3 local -> pass - Q.submit([&](handler &CGH) { - CGH.parallel_for( - K, nd_range<1>(range<1>(100), range<1>(3)), [=](nd_item<1>) {}); - }); - Q.wait_and_throw(); - } catch (nd_range_error &E) { - std::cerr - << "Test case OpenCL2XPositiveA failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } catch (runtime_error &E) { - std::cerr - << "Test case OpenCL2XPositiveA failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } catch (...) { - std::cerr - << "Test case OpenCL2XPositiveA failed: something unexpected " - "has been caught" - << std::endl; - return 1; - } - } - // Multi-dimensional nd_range. - { - program P(Q.get_context()); - P.build_with_kernel_type("-cl-std=CL2.0"); - - kernel K = P.get_kernel(); - try { - // parallel_for, (100, 33, 16) global, (2, 3, 5) local -> pass - Q.submit([&](handler &CGH) { - CGH.parallel_for( - K, nd_range<3>(range<3>(100, 33, 16), range<3>(2, 3, 5)), - [=](nd_item<3>) {}); - }); - Q.wait_and_throw(); - } catch (nd_range_error &E) { - std::cerr - << "Test case OpenCL2XPositiveB failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } catch (runtime_error &E) { - std::cerr - << "Test case OpenCL2XPositiveB failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } catch (...) { - std::cerr - << "Test case OpenCL2XPositiveB failed: something unexpected " - "has been caught" - << std::endl; - return 1; - } - } + // Local Size larger than Global, -cl-std=CL2.0 -cl-uniform-work-group-size + // flag used CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified as + // larger than the global size, then a different error string is used. This + // is a sub-case of the more general 'non-uniform work group' + { + program P(Q.get_context()); + P.build_with_kernel_type( + "-cl-std=CL2.0 -cl-uniform-work-group-size"); - // Enable 2.0 mode with non-uniform work-groups, but disable the latter by - // specifying -cl-uniform-work-group-size: - // CL_INVALID_WORK_GROUP_SIZE if the program was compiled with - // –cl-uniform-work-group-size and the number of work-items specified - // by global_work_size is not evenly divisible by size of work-group - // given by local_work_size - { - program P(Q.get_context()); - P.build_with_kernel_type( - "-cl-std=CL2.0 -cl-uniform-work-group-size"); - - kernel K = P.get_kernel(); - try { - // parallel_for, 100 global, 3 local -> fail - Q.submit([&](handler &CGH) { - CGH.parallel_for( - K, nd_range<1>(range<1>(100), range<1>(3)), [=](nd_item<1>) {}); - }); - Q.wait_and_throw(); - // FIXME: some Intel runtimes contain bug and don't return expected - // error code - if (info::device_type::cpu != DeviceType || - DeviceVendorName.find("Intel") == string_class::npos) { - std::cerr - << "Test case OpenCL2XNegativeD failed: no exception has been " - "thrown\n"; - return 1; // We shouldn't be here, exception is expected - } - } catch (nd_range_error &E) { - if (string_class(E.what()).find( - "Global_work_size not evenly divisible by local_work_size. " - "Non-uniform work-groups are not allowed by when " - "-cl-uniform-work-group-size flag is used. Underlying " - "OpenCL 2.x implementation supports this feature, but it is " - "being " - "disabled by -cl-uniform-work-group-size build flag") == - string_class::npos) { - std::cerr - << "Test case OpenCL2XNegativeD failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } - } catch (runtime_error &E) { - std::cerr - << "Test case OpenCL2XNegativeD failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } catch (...) { + kernel K = P.get_kernel(); + try { + // parallel_for, 16 global, 17 local -> fail + Q.submit([&](handler &CGH) { + CGH.parallel_for( + K, nd_range<1>(range<1>(16), range<1>(17)), [=](nd_item<1>) {}); + }); + Q.wait_and_throw(); + // FIXME: some Intel runtimes contain bug and don't return expected + // error code + if (info::device_type::cpu != DeviceType || + DeviceVendorName.find("Intel") == string_class::npos) { std::cerr - << "Test case OpenCL2XNegativeD failed: something unexpected " - "has been caught" - << std::endl; - return 1; + << "Test case OpenCL2XNegativeD2 failed: no exception has been " + "thrown\n"; + return 1; // We shouldn't be here, exception is expected } - } - // Multi-dimensional nd_range. - { - program P(Q.get_context()); - P.build_with_kernel_type( - "-cl-std=CL2.0 -cl-uniform-work-group-size"); - - kernel K = P.get_kernel(); - try { - // parallel_for, (100, 33, 16) global, (2, 3, 5) local -> fail - Q.submit([&](handler &CGH) { - CGH.parallel_for( - K, nd_range<3>(range<3>(100, 33, 16), range<3>(2, 3, 5)), - [=](nd_item<3>) {}); - }); - Q.wait_and_throw(); - // FIXME: some Intel runtimes contain bug and don't return expected - // error code - if (info::device_type::cpu != DeviceType || - DeviceVendorName.find("Intel") == string_class::npos) { - std::cerr - << "Test case OpenCL2XNegativeE failed: no exception has been " - "thrown\n"; - return 1; // We shouldn't be here, exception is expected - } - } catch (nd_range_error &E) { - if (string_class(E.what()).find( - "Global_work_size not evenly divisible by local_work_size. " - "Non-uniform work-groups are not allowed by when " - "-cl-uniform-work-group-size flag is used. Underlying " - "OpenCL 2.x implementation supports this feature, but it is " - "being " - "disabled by -cl-uniform-work-group-size build flag") == - string_class::npos) { - std::cerr - << "Test case OpenCL2XNegativeE failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } - } catch (runtime_error &E) { + } catch (nd_range_error &E) { + if (string_class(E.what()).find( + "Local workgroup size greater than global range size. " + "Non-uniform work-groups are not allowed by when " + "-cl-uniform-work-group-size flag is used. Underlying " + "OpenCL 2.x implementation supports this feature, but it is " + "being " + "disabled by -cl-uniform-work-group-size build flag") == + string_class::npos) { std::cerr - << "Test case OpenCL2XNegativeE failed: unexpected exception: " + << "Test case OpenCL2XNegativeD2 failed: unexpected exception: " << E.what() << std::endl; return 1; - } catch (...) { - std::cerr - << "Test case OpenCL2XNegativeE failed: something unexpected " - "has been caught" - << std::endl; - return 1; } + } catch (runtime_error &E) { + std::cerr + << "Test case OpenCL2XNegativeD2 failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } catch (...) { + std::cerr + << "Test case OpenCL2XNegativeD2 failed: something unexpected " + "has been caught" + << std::endl; + return 1; } + } + // Multi-dimensional nd_range. + { + program P(Q.get_context()); + P.build_with_kernel_type( + "-cl-std=CL2.0 -cl-uniform-work-group-size"); - // Local Size larger than Global, -cl-std=CL2.0 - // -cl-uniform-work-group-size flag used CL_INVALID_WORK_GROUP_SIZE if - // local_work_size is specified as larger than the global size, then a - // different error string is used. This is a sub-case of the more general - // 'non-uniform work group' - { - program P(Q.get_context()); - P.build_with_kernel_type( - "-cl-std=CL2.0 -cl-uniform-work-group-size"); - - kernel K = P.get_kernel(); - try { - // parallel_for, 16 global, 17 local -> fail - Q.submit([&](handler &CGH) { - CGH.parallel_for( - K, nd_range<1>(range<1>(16), range<1>(17)), [=](nd_item<1>) {}); - }); - Q.wait_and_throw(); - // FIXME: some Intel runtimes contain bug and don't return expected - // error code - if (info::device_type::cpu != DeviceType || - DeviceVendorName.find("Intel") == string_class::npos) { - std::cerr - << "Test case OpenCL2XNegativeD2 failed: no exception has been " - "thrown\n"; - return 1; // We shouldn't be here, exception is expected - } - } catch (nd_range_error &E) { - if (string_class(E.what()).find( - "Local workgroup size greater than global range size. " - "Non-uniform work-groups are not allowed by when " - "-cl-uniform-work-group-size flag is used. Underlying " - "OpenCL 2.x implementation supports this feature, but it is " - "being " - "disabled by -cl-uniform-work-group-size build flag") == - string_class::npos) { - std::cerr - << "Test case OpenCL2XNegativeD2 failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } - } catch (runtime_error &E) { - std::cerr - << "Test case OpenCL2XNegativeD2 failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } catch (...) { + kernel K = P.get_kernel(); + try { + // parallel_for, 6, 6, 6 global, 2, 2, 7 local -> fail + Q.submit([&](handler &CGH) { + CGH.parallel_for( + K, nd_range<3>(range<3>(6, 6, 6), range<3>(2, 2, 7)), + [=](nd_item<3>) {}); + }); + Q.wait_and_throw(); + // FIXME: some Intel runtimes contain bug and don't return expected + // error code + if (info::device_type::cpu != DeviceType || + DeviceVendorName.find("Intel") == string_class::npos) { std::cerr - << "Test case OpenCL2XNegativeD2 failed: something unexpected " - "has been caught" - << std::endl; - return 1; + << "Test case OpenCL2XNegativeE2 failed: no exception has been " + "thrown\n"; + return 1; // We shouldn't be here, exception is expected } - } - // Multi-dimensional nd_range. - { - program P(Q.get_context()); - P.build_with_kernel_type( - "-cl-std=CL2.0 -cl-uniform-work-group-size"); - - kernel K = P.get_kernel(); - try { - // parallel_for, 6, 6, 6 global, 2, 2, 7 local -> fail - Q.submit([&](handler &CGH) { - CGH.parallel_for( - K, nd_range<3>(range<3>(6, 6, 6), range<3>(2, 2, 7)), - [=](nd_item<3>) {}); - }); - Q.wait_and_throw(); - // FIXME: some Intel runtimes contain bug and don't return expected - // error code - if (info::device_type::cpu != DeviceType || - DeviceVendorName.find("Intel") == string_class::npos) { - std::cerr - << "Test case OpenCL2XNegativeE2 failed: no exception has been " - "thrown\n"; - return 1; // We shouldn't be here, exception is expected - } - } catch (nd_range_error &E) { - if (string_class(E.what()).find( - "Local workgroup size greater than global range size. " - "Non-uniform work-groups are not allowed by when " - "-cl-uniform-work-group-size flag is used. Underlying " - "OpenCL 2.x implementation supports this feature, but it is " - "being " - "disabled by -cl-uniform-work-group-size build flag") == - string_class::npos) { - std::cerr - << "Test case OpenCL2XNegativeE2 failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } - } catch (runtime_error &E) { + } catch (nd_range_error &E) { + if (string_class(E.what()).find( + "Local workgroup size greater than global range size. " + "Non-uniform work-groups are not allowed by when " + "-cl-uniform-work-group-size flag is used. Underlying " + "OpenCL 2.x implementation supports this feature, but it is " + "being " + "disabled by -cl-uniform-work-group-size build flag") == + string_class::npos) { std::cerr << "Test case OpenCL2XNegativeE2 failed: unexpected exception: " << E.what() << std::endl; return 1; - } catch (...) { - std::cerr - << "Test case OpenCL2XNegativeE2 failed: something unexpected " - "has been caught" - << std::endl; - return 1; } + } catch (runtime_error &E) { + std::cerr + << "Test case OpenCL2XNegativeE2 failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } catch (...) { + std::cerr + << "Test case OpenCL2XNegativeE2 failed: something unexpected " + "has been caught" + << std::endl; + return 1; } } + } // local size has a 0-based range -- no SIGFPEs, we hope try { diff --git a/sycl/test/plugins/sycl-ls-gpu-opencl.cpp b/sycl/test/plugins/sycl-ls-gpu-opencl.cpp index 4676017086484..31158d14e5945 100755 --- a/sycl/test/plugins/sycl-ls-gpu-opencl.cpp +++ b/sycl/test/plugins/sycl-ls-gpu-opencl.cpp @@ -3,8 +3,8 @@ // RUN: env SYCL_BE=PI_OPENCL sycl-ls --verbose >%t.opencl.out // RUN: FileCheck %s --check-prefixes=CHECK-GPU-BUILTIN,CHECK-GPU-CUSTOM --input-file %t.opencl.out -// CHECK-GPU-BUILTIN: gpu_selector(){{.*}}GPU : {{[0-9].[0-9]}} -// CHECK-GPU-CUSTOM: custom_selector(gpu){{.*}}GPU : {{[0-9].[0-9]}} +// CHECK-GPU-BUILTIN: gpu_selector(){{.*}}GPU : 2.1 +// CHECK-GPU-CUSTOM: custom_selector(gpu){{.*}}GPU : 2.1 //==-- sycl-ls-gpu-opencl.cpp - SYCL test for discovered/selected devices -===// // From db2b3211b50638733a8b02e9f210569f0cd1aafb Mon Sep 17 00:00:00 2001 From: Denis Bakhvalov Date: Tue, 22 Sep 2020 14:58:21 -0700 Subject: [PATCH 4/9] Fixed review comments --- sycl/test/basic_tests/parallel_for_range.cpp | 21 +++++++++----------- sycl/test/plugins/sycl-ls-gpu-opencl.cpp | 4 ++-- 2 files changed, 11 insertions(+), 14 deletions(-) diff --git a/sycl/test/basic_tests/parallel_for_range.cpp b/sycl/test/basic_tests/parallel_for_range.cpp index cab6523a70d06..432d33ab77daf 100644 --- a/sycl/test/basic_tests/parallel_for_range.cpp +++ b/sycl/test/basic_tests/parallel_for_range.cpp @@ -31,15 +31,13 @@ int main() { auto DeviceType = D.get_info(); string_class OCLVersionStr = D.get_info(); - const bool OCLBackend = (OCLVersionStr.find("OpenCL") != string_class::npos); - assert((!OCLBackend || (OCLVersionStr.size() >= 10)) && - "Unexpected device version string"); // strlen("OpenCL X.Y") - const char *OCLVersion = &OCLVersionStr[7]; // strlen("OpenCL ") - - // reqd_work_group_size is OpenCL specific. - if (OCLBackend) { - if (OCLVersion[0] == '1' || - (OCLVersion[0] == '2' && OCLVersion[2] == '0')) { + assert((OCLVersionStr.size() >= 3) && "Unexpected device version string"); + assert(OCLVersionStr.find(".") != string_class::npos && "Unexpected device version string"); + const char OCLVersionMajor = OCLVersionStr[0]; + const char OCLVersionMinor = OCLVersionStr[2]; + + if (OCLVersionMajor == '1' || + (OCLVersionMajor == '2' && OCLVersionMinor == '0')) { // parallel_for, (16, 16, 16) global, (8, 8, 8) local, reqd_wg_size(4, 4, // 4) // -> fail @@ -141,9 +139,8 @@ int main() { << std::endl; return 1; } - } // if (OCLBackend) - if (!OCLBackend || (OCLVersion[0] == '1')) { + if (OCLVersionMajor == '1') { // OpenCL 1.x or non-OpenCL backends which behave like OpenCl 1.2 in SYCL. // CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and @@ -346,7 +343,7 @@ int main() { << std::endl; return 1; } - } else if (OCLBackend && (OCLVersion[0] == '2')) { + } else if (OCLVersionMajor == '2') { // OpenCL 2.x // OpenCL 2.x: diff --git a/sycl/test/plugins/sycl-ls-gpu-opencl.cpp b/sycl/test/plugins/sycl-ls-gpu-opencl.cpp index 31158d14e5945..4676017086484 100755 --- a/sycl/test/plugins/sycl-ls-gpu-opencl.cpp +++ b/sycl/test/plugins/sycl-ls-gpu-opencl.cpp @@ -3,8 +3,8 @@ // RUN: env SYCL_BE=PI_OPENCL sycl-ls --verbose >%t.opencl.out // RUN: FileCheck %s --check-prefixes=CHECK-GPU-BUILTIN,CHECK-GPU-CUSTOM --input-file %t.opencl.out -// CHECK-GPU-BUILTIN: gpu_selector(){{.*}}GPU : 2.1 -// CHECK-GPU-CUSTOM: custom_selector(gpu){{.*}}GPU : 2.1 +// CHECK-GPU-BUILTIN: gpu_selector(){{.*}}GPU : {{[0-9].[0-9]}} +// CHECK-GPU-CUSTOM: custom_selector(gpu){{.*}}GPU : {{[0-9].[0-9]}} //==-- sycl-ls-gpu-opencl.cpp - SYCL test for discovered/selected devices -===// // From 1b40b9f2dc3e2db27c7cfb0013702d90293e4c2e Mon Sep 17 00:00:00 2001 From: Denis Bakhvalov Date: Tue, 22 Sep 2020 14:59:30 -0700 Subject: [PATCH 5/9] Run clang-format on sycl/test/basic_tests/parallel_for_range.cpp --- sycl/test/basic_tests/parallel_for_range.cpp | 191 +++++++++---------- 1 file changed, 95 insertions(+), 96 deletions(-) diff --git a/sycl/test/basic_tests/parallel_for_range.cpp b/sycl/test/basic_tests/parallel_for_range.cpp index 432d33ab77daf..3c42a1133bf97 100644 --- a/sycl/test/basic_tests/parallel_for_range.cpp +++ b/sycl/test/basic_tests/parallel_for_range.cpp @@ -31,114 +31,111 @@ int main() { auto DeviceType = D.get_info(); string_class OCLVersionStr = D.get_info(); - assert((OCLVersionStr.size() >= 3) && "Unexpected device version string"); - assert(OCLVersionStr.find(".") != string_class::npos && "Unexpected device version string"); + assert((OCLVersionStr.size() >= 3) && "Unexpected device version string"); + assert(OCLVersionStr.find(".") != string_class::npos && + "Unexpected device version string"); const char OCLVersionMajor = OCLVersionStr[0]; - const char OCLVersionMinor = OCLVersionStr[2]; + const char OCLVersionMinor = OCLVersionStr[2]; - if (OCLVersionMajor == '1' || - (OCLVersionMajor == '2' && OCLVersionMinor == '0')) { - // parallel_for, (16, 16, 16) global, (8, 8, 8) local, reqd_wg_size(4, 4, - // 4) - // -> fail - try { - Q.submit([&](handler &CGH) { - CGH.parallel_for( - nd_range<3>(range<3>(16, 16, 16), range<3>(8, 8, 8)), - [=](nd_item<3>) { reqd_wg_size_helper(); }); - }); - Q.wait_and_throw(); - std::cerr - << "Test case ReqdWGSizeNegativeA failed: no exception has been " - "thrown\n"; - return 1; // We shouldn't be here, exception is expected - } catch (nd_range_error &E) { - if (string_class(E.what()).find("Specified local size doesn't match " - "the required work-group size " - "specified in the program source") == - string_class::npos) { - std::cerr - << "Test case ReqdWGSizeNegativeA failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } - } catch (runtime_error &E) { + if (OCLVersionMajor == '1' || + (OCLVersionMajor == '2' && OCLVersionMinor == '0')) { + // parallel_for, (16, 16, 16) global, (8, 8, 8) local, reqd_wg_size(4, 4, + // 4) + // -> fail + try { + Q.submit([&](handler &CGH) { + CGH.parallel_for( + nd_range<3>(range<3>(16, 16, 16), range<3>(8, 8, 8)), + [=](nd_item<3>) { reqd_wg_size_helper(); }); + }); + Q.wait_and_throw(); + std::cerr + << "Test case ReqdWGSizeNegativeA failed: no exception has been " + "thrown\n"; + return 1; // We shouldn't be here, exception is expected + } catch (nd_range_error &E) { + if (string_class(E.what()).find("Specified local size doesn't match " + "the required work-group size " + "specified in the program source") == + string_class::npos) { std::cerr << "Test case ReqdWGSizeNegativeA failed: unexpected exception: " << E.what() << std::endl; return 1; - } catch (...) { - std::cerr - << "Test case ReqdWGSizeNegativeA failed: something unexpected " - "has been caught" - << std::endl; - return 1; - } - - // parallel_for, (16, 16, 16) global, null local, reqd_wg_size(4, 4, 4) // - // -> fail - try { - Q.submit([&](handler &CGH) { - CGH.parallel_for( - range<3>(16, 16, 16), [=](item<3>) { reqd_wg_size_helper(); }); - }); - Q.wait_and_throw(); - std::cerr - << "Test case ReqdWGSizeNegativeB failed: no exception has been " - "thrown\n"; - return 1; // We shouldn't be here, exception is expected - } catch (nd_range_error &E) { - if (string_class(E.what()).find("OpenCL 1.x and 2.0 requires to pass " - "local size argument even if " - "required work-group size was " - "specified in the program source") == - string_class::npos) { - std::cerr - << "Test case ReqdWGSizeNegativeB failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } - } catch (runtime_error &E) { - std::cerr - << "Test case ReqdWGSizeNegativeB failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } catch (...) { - std::cerr - << "Test case ReqdWGSizeNegativeB failed: something unexpected " - "has been caught" - << std::endl; - return 1; } + } catch (runtime_error &E) { + std::cerr + << "Test case ReqdWGSizeNegativeA failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } catch (...) { + std::cerr << "Test case ReqdWGSizeNegativeA failed: something unexpected " + "has been caught" + << std::endl; + return 1; } - // Positive test-cases that should pass on any underlying OpenCL runtime - - // parallel_for, (8, 8, 8) global, (4, 4, 4) local, reqd_wg_size(4, 4, 4) -> - // pass + // parallel_for, (16, 16, 16) global, null local, reqd_wg_size(4, 4, 4) // + // -> fail try { Q.submit([&](handler &CGH) { - CGH.parallel_for( - nd_range<3>(range<3>(8, 8, 8), range<3>(4, 4, 4)), - [=](nd_item<3>) { reqd_wg_size_helper(); }); + CGH.parallel_for( + range<3>(16, 16, 16), [=](item<3>) { reqd_wg_size_helper(); }); }); Q.wait_and_throw(); - } catch (nd_range_error &E) { std::cerr - << "Test case ReqdWGSizePositiveA failed: unexpected exception: " - << E.what() << std::endl; - return 1; + << "Test case ReqdWGSizeNegativeB failed: no exception has been " + "thrown\n"; + return 1; // We shouldn't be here, exception is expected + } catch (nd_range_error &E) { + if (string_class(E.what()).find("OpenCL 1.x and 2.0 requires to pass " + "local size argument even if " + "required work-group size was " + "specified in the program source") == + string_class::npos) { + std::cerr + << "Test case ReqdWGSizeNegativeB failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } } catch (runtime_error &E) { std::cerr - << "Test case ReqdWGSizePositiveA failed: unexpected exception: " + << "Test case ReqdWGSizeNegativeB failed: unexpected exception: " << E.what() << std::endl; return 1; } catch (...) { - std::cerr << "Test case ReqdWGSizePositiveA failed: something unexpected " + std::cerr << "Test case ReqdWGSizeNegativeB failed: something unexpected " "has been caught" << std::endl; return 1; } + } + + // Positive test-cases that should pass on any underlying OpenCL runtime + + // parallel_for, (8, 8, 8) global, (4, 4, 4) local, reqd_wg_size(4, 4, 4) -> + // pass + try { + Q.submit([&](handler &CGH) { + CGH.parallel_for( + nd_range<3>(range<3>(8, 8, 8), range<3>(4, 4, 4)), + [=](nd_item<3>) { reqd_wg_size_helper(); }); + }); + Q.wait_and_throw(); + } catch (nd_range_error &E) { + std::cerr << "Test case ReqdWGSizePositiveA failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } catch (runtime_error &E) { + std::cerr << "Test case ReqdWGSizePositiveA failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } catch (...) { + std::cerr << "Test case ReqdWGSizePositiveA failed: something unexpected " + "has been caught" + << std::endl; + return 1; + } if (OCLVersionMajor == '1') { // OpenCL 1.x or non-OpenCL backends which behave like OpenCl 1.2 in SYCL. @@ -421,8 +418,8 @@ int main() { if (string_class(E.what()).find( "Non-uniform work-groups are not allowed by default. " "Underlying OpenCL 2.x implementation supports this feature " - "and to enable it, build device program with -cl-std=CL2.0") == - string_class::npos) { + "and to enable it, build device program with " + "-cl-std=CL2.0") == string_class::npos) { std::cerr << "Test case OpenCL2XNegativeB failed: unexpected exception: " << E.what() << std::endl; @@ -461,8 +458,8 @@ int main() { if (string_class(E.what()).find( "Non-uniform work-groups are not allowed by default. " "Underlying OpenCL 2.x implementation supports this feature " - "and to enable it, build device program with -cl-std=CL2.0") == - string_class::npos) { + "and to enable it, build device program with " + "-cl-std=CL2.0") == string_class::npos) { std::cerr << "Test case OpenCL2XNegativeC failed: unexpected exception: " << E.what() << std::endl; @@ -507,7 +504,8 @@ int main() { "Local workgroup size greater than global range size. " "Non-uniform work-groups are not allowed by default. " "Underlying " - "OpenCL 2.x implementation supports this feature and to enable " + "OpenCL 2.x implementation supports this feature and to " + "enable " "it, build device program with -cl-std=CL2.0") == string_class::npos) { std::cerr @@ -552,8 +550,8 @@ int main() { "Local workgroup size greater than global range size. " "Non-uniform work-groups are not allowed by default. " "Underlying OpenCL 2.x implementation supports this feature " - "and to enable it, build device program with -cl-std=CL2.0") == - string_class::npos) { + "and to enable it, build device program with " + "-cl-std=CL2.0") == string_class::npos) { std::cerr << "Test case OpenCL2XNegativeC2 failed: unexpected exception: " << E.what() << std::endl; @@ -740,10 +738,11 @@ int main() { } } - // Local Size larger than Global, -cl-std=CL2.0 -cl-uniform-work-group-size - // flag used CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified as - // larger than the global size, then a different error string is used. This - // is a sub-case of the more general 'non-uniform work group' + // Local Size larger than Global, -cl-std=CL2.0 + // -cl-uniform-work-group-size flag used CL_INVALID_WORK_GROUP_SIZE if + // local_work_size is specified as larger than the global size, then a + // different error string is used. This is a sub-case of the more general + // 'non-uniform work group' { program P(Q.get_context()); P.build_with_kernel_type( From da074db261ee8ed044f41ab4af8ece215e25342a Mon Sep 17 00:00:00 2001 From: Denis Bakhvalov Date: Tue, 22 Sep 2020 16:10:04 -0700 Subject: [PATCH 6/9] Added more tests for get_info() --- .../basic_tests/info_ocl_version-cuda.cpp | 35 ++++++++++++++++ sycl/test/basic_tests/info_ocl_version.cpp | 40 +++++++++++++++++++ 2 files changed, 75 insertions(+) create mode 100644 sycl/test/basic_tests/info_ocl_version-cuda.cpp create mode 100644 sycl/test/basic_tests/info_ocl_version.cpp diff --git a/sycl/test/basic_tests/info_ocl_version-cuda.cpp b/sycl/test/basic_tests/info_ocl_version-cuda.cpp new file mode 100644 index 0000000000000..a0cdf09537141 --- /dev/null +++ b/sycl/test/basic_tests/info_ocl_version-cuda.cpp @@ -0,0 +1,35 @@ +// REQUIRES: gpu, cuda + +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: env SYCL_BE=PI_CUDA %GPU_RUN_PLACEHOLDER %t.out + +//==--------info_ocl_version-cuda.cpp - SYCL objects get_info() test -------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include + +using namespace cl::sycl; + +// This test checks that cl::sycl::info::device::version +// is returned in a form: . + +int main() { + default_selector selector; + device dev(selector.select_device()); + auto ocl_version = dev.get_info(); + const std::regex oclVersionRegex("[0-9]\\.[0-9]"); + if (!std::regex_match(ocl_version, oclVersionRegex)) { + std::cout << "Failed" << sd::endl; + return 1; + } + std::cout << "Passed" << std::endl; + return 0; +} diff --git a/sycl/test/basic_tests/info_ocl_version.cpp b/sycl/test/basic_tests/info_ocl_version.cpp new file mode 100644 index 0000000000000..ddad3bba8abe6 --- /dev/null +++ b/sycl/test/basic_tests/info_ocl_version.cpp @@ -0,0 +1,40 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: env SYCL_DEVICE_TYPE=HOST %t.out +// RUN: env SYCL_BE=PI_OPENCL %CPU_RUN_PLACEHOLDER %t.out +// RUN: env SYCL_BE=PI_OPENCL %GPU_RUN_PLACEHOLDER %t.out +// RUN: env SYCL_BE=PI_OPENCL %ACC_RUN_PLACEHOLDER %t.out +// RUN: env SYCL_BE=PI_LEVEL_ZERO %CPU_RUN_PLACEHOLDER %t.out +// RUN: env SYCL_BE=PI_LEVEL_ZERO %GPU_RUN_PLACEHOLDER %t.out +// RUN: env SYCL_BE=PI_LEVEL_ZERO %ACC_RUN_PLACEHOLDER %t.out + +//==--------info_ocl_version.cpp - SYCL objects get_info() test ------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include +#include +#include +#include + +using namespace cl::sycl; + +// This test checks that cl::sycl::info::device::version +// is returned in a form: . + +int main() { + default_selector selector; + device dev(selector.select_device()); + auto ocl_version = dev.get_info(); + std::cout << ocl_version << std::endl; + const std::regex oclVersionRegex("[0-9]\\.[0-9]"); + if (!std::regex_match(ocl_version, oclVersionRegex)) { + std::cout << "Failed" << std::endl; + return 1; + } + std::cout << "Passed" << std::endl; + return 0; +} From 2da0428c9fa9c9a8b7f488fe3c16aeb827b0e655 Mon Sep 17 00:00:00 2001 From: Denis Bakhvalov Date: Tue, 22 Sep 2020 17:58:31 -0700 Subject: [PATCH 7/9] Fixed basic_tests/parallel_for_range.cpp --- sycl/source/detail/error_handling/enqueue_kernel.cpp | 4 ++-- sycl/test/basic_tests/parallel_for_range.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sycl/source/detail/error_handling/enqueue_kernel.cpp b/sycl/source/detail/error_handling/enqueue_kernel.cpp index 455c88bbc7e66..ab846466161ca 100644 --- a/sycl/source/detail/error_handling/enqueue_kernel.cpp +++ b/sycl/source/detail/error_handling/enqueue_kernel.cpp @@ -57,8 +57,8 @@ bool handleInvalidWorkGroupSize(const device_impl &DeviceImpl, pi_kernel Kernel, if (Platform.get_backend() == cl::sycl::backend::opencl) { string_class VersionString = DeviceImpl.get_info(); IsOpenCL = true; - IsOpenCLV1x = (VersionString.find("OpenCL 1.") == 0); - IsOpenCLV20 = (VersionString.find("OpenCL 2.0") == 0); + IsOpenCLV1x = (VersionString.find("1.") == 0); + IsOpenCLV20 = (VersionString.find("2.0") == 0); } size_t CompileWGSize[3] = {0}; diff --git a/sycl/test/basic_tests/parallel_for_range.cpp b/sycl/test/basic_tests/parallel_for_range.cpp index 3c42a1133bf97..d68241ee28932 100644 --- a/sycl/test/basic_tests/parallel_for_range.cpp +++ b/sycl/test/basic_tests/parallel_for_range.cpp @@ -31,7 +31,7 @@ int main() { auto DeviceType = D.get_info(); string_class OCLVersionStr = D.get_info(); - assert((OCLVersionStr.size() >= 3) && "Unexpected device version string"); + assert((OCLVersionStr.size() == 3) && "Unexpected device version string"); assert(OCLVersionStr.find(".") != string_class::npos && "Unexpected device version string"); const char OCLVersionMajor = OCLVersionStr[0]; From b66ec6ee94d581d82d3d0ac145d6b6ed567de990 Mon Sep 17 00:00:00 2001 From: Denis Bakhvalov Date: Tue, 22 Sep 2020 22:10:05 -0700 Subject: [PATCH 8/9] Updated sub_group/broadcast tests --- sycl/test/sub_group/broadcast.cpp | 1 - sycl/test/sub_group/broadcast_fp64.cpp | 1 - 2 files changed, 2 deletions(-) diff --git a/sycl/test/sub_group/broadcast.cpp b/sycl/test/sub_group/broadcast.cpp index 3dbba78387b2d..49df849c1baad 100644 --- a/sycl/test/sub_group/broadcast.cpp +++ b/sycl/test/sub_group/broadcast.cpp @@ -1,4 +1,3 @@ -// XFAIL: cpu // UNSUPPORTED: cuda // CUDA compilation and runtime do not yet support sub-groups. diff --git a/sycl/test/sub_group/broadcast_fp64.cpp b/sycl/test/sub_group/broadcast_fp64.cpp index 9652fa6b73f46..f9f87e8f95fd9 100644 --- a/sycl/test/sub_group/broadcast_fp64.cpp +++ b/sycl/test/sub_group/broadcast_fp64.cpp @@ -1,4 +1,3 @@ -// XFAIL: cpu // UNSUPPORTED: cuda // CUDA compilation and runtime do not yet support sub-groups. From 5bd0760e43e1be49940a3df9ae6a78d574843242 Mon Sep 17 00:00:00 2001 From: Denis Bakhvalov Date: Wed, 23 Sep 2020 14:36:58 -0700 Subject: [PATCH 9/9] Fixed review comments --- .../basic_tests/info_ocl_version-cuda.cpp | 35 ---- sycl/test/basic_tests/info_ocl_version.cpp | 9 +- sycl/test/basic_tests/parallel_for_range.cpp | 192 +++++++++--------- sycl/test/plugins/sycl-ls-gpu-opencl.cpp | 4 +- 4 files changed, 104 insertions(+), 136 deletions(-) delete mode 100644 sycl/test/basic_tests/info_ocl_version-cuda.cpp diff --git a/sycl/test/basic_tests/info_ocl_version-cuda.cpp b/sycl/test/basic_tests/info_ocl_version-cuda.cpp deleted file mode 100644 index a0cdf09537141..0000000000000 --- a/sycl/test/basic_tests/info_ocl_version-cuda.cpp +++ /dev/null @@ -1,35 +0,0 @@ -// REQUIRES: gpu, cuda - -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out -// RUN: env SYCL_BE=PI_CUDA %GPU_RUN_PLACEHOLDER %t.out - -//==--------info_ocl_version-cuda.cpp - SYCL objects get_info() test -------==// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include -#include -#include -#include - -using namespace cl::sycl; - -// This test checks that cl::sycl::info::device::version -// is returned in a form: . - -int main() { - default_selector selector; - device dev(selector.select_device()); - auto ocl_version = dev.get_info(); - const std::regex oclVersionRegex("[0-9]\\.[0-9]"); - if (!std::regex_match(ocl_version, oclVersionRegex)) { - std::cout << "Failed" << sd::endl; - return 1; - } - std::cout << "Passed" << std::endl; - return 0; -} diff --git a/sycl/test/basic_tests/info_ocl_version.cpp b/sycl/test/basic_tests/info_ocl_version.cpp index ddad3bba8abe6..e42383bbe3e79 100644 --- a/sycl/test/basic_tests/info_ocl_version.cpp +++ b/sycl/test/basic_tests/info_ocl_version.cpp @@ -1,11 +1,8 @@ // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out // RUN: env SYCL_DEVICE_TYPE=HOST %t.out -// RUN: env SYCL_BE=PI_OPENCL %CPU_RUN_PLACEHOLDER %t.out -// RUN: env SYCL_BE=PI_OPENCL %GPU_RUN_PLACEHOLDER %t.out -// RUN: env SYCL_BE=PI_OPENCL %ACC_RUN_PLACEHOLDER %t.out -// RUN: env SYCL_BE=PI_LEVEL_ZERO %CPU_RUN_PLACEHOLDER %t.out -// RUN: env SYCL_BE=PI_LEVEL_ZERO %GPU_RUN_PLACEHOLDER %t.out -// RUN: env SYCL_BE=PI_LEVEL_ZERO %ACC_RUN_PLACEHOLDER %t.out +// RUN: env %CPU_RUN_PLACEHOLDER %t.out +// RUN: env %GPU_RUN_PLACEHOLDER %t.out +// RUN: env %ACC_RUN_PLACEHOLDER %t.out //==--------info_ocl_version.cpp - SYCL objects get_info() test ------------==// // diff --git a/sycl/test/basic_tests/parallel_for_range.cpp b/sycl/test/basic_tests/parallel_for_range.cpp index d68241ee28932..5100639638369 100644 --- a/sycl/test/basic_tests/parallel_for_range.cpp +++ b/sycl/test/basic_tests/parallel_for_range.cpp @@ -30,6 +30,7 @@ int main() { string_class DeviceVendorName = D.get_info(); auto DeviceType = D.get_info(); + const bool OCLBackend = D.get_platform().get_backend() == backend::opencl; string_class OCLVersionStr = D.get_info(); assert((OCLVersionStr.size() == 3) && "Unexpected device version string"); assert(OCLVersionStr.find(".") != string_class::npos && @@ -37,107 +38,114 @@ int main() { const char OCLVersionMajor = OCLVersionStr[0]; const char OCLVersionMinor = OCLVersionStr[2]; - if (OCLVersionMajor == '1' || - (OCLVersionMajor == '2' && OCLVersionMinor == '0')) { - // parallel_for, (16, 16, 16) global, (8, 8, 8) local, reqd_wg_size(4, 4, - // 4) - // -> fail - try { - Q.submit([&](handler &CGH) { - CGH.parallel_for( - nd_range<3>(range<3>(16, 16, 16), range<3>(8, 8, 8)), - [=](nd_item<3>) { reqd_wg_size_helper(); }); - }); - Q.wait_and_throw(); - std::cerr - << "Test case ReqdWGSizeNegativeA failed: no exception has been " - "thrown\n"; - return 1; // We shouldn't be here, exception is expected - } catch (nd_range_error &E) { - if (string_class(E.what()).find("Specified local size doesn't match " - "the required work-group size " - "specified in the program source") == - string_class::npos) { + // reqd_work_group_size is OpenCL specific. + if (OCLBackend) { + if (OCLVersionMajor == '1' || + (OCLVersionMajor == '2' && OCLVersionMinor == '0')) { + // parallel_for, (16, 16, 16) global, (8, 8, 8) local, reqd_wg_size(4, 4, + // 4) + // -> fail + try { + Q.submit([&](handler &CGH) { + CGH.parallel_for( + nd_range<3>(range<3>(16, 16, 16), range<3>(8, 8, 8)), + [=](nd_item<3>) { reqd_wg_size_helper(); }); + }); + Q.wait_and_throw(); + std::cerr + << "Test case ReqdWGSizeNegativeA failed: no exception has been " + "thrown\n"; + return 1; // We shouldn't be here, exception is expected + } catch (nd_range_error &E) { + if (string_class(E.what()).find("Specified local size doesn't match " + "the required work-group size " + "specified in the program source") == + string_class::npos) { + std::cerr + << "Test case ReqdWGSizeNegativeA failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } + } catch (runtime_error &E) { std::cerr << "Test case ReqdWGSizeNegativeA failed: unexpected exception: " << E.what() << std::endl; return 1; + } catch (...) { + std::cerr + << "Test case ReqdWGSizeNegativeA failed: something unexpected " + "has been caught" + << std::endl; + return 1; + } + + // parallel_for, (16, 16, 16) global, null local, reqd_wg_size(4, 4, 4) // + // -> fail + try { + Q.submit([&](handler &CGH) { + CGH.parallel_for( + range<3>(16, 16, 16), [=](item<3>) { reqd_wg_size_helper(); }); + }); + Q.wait_and_throw(); + std::cerr + << "Test case ReqdWGSizeNegativeB failed: no exception has been " + "thrown\n"; + return 1; // We shouldn't be here, exception is expected + } catch (nd_range_error &E) { + if (string_class(E.what()).find("OpenCL 1.x and 2.0 requires to pass " + "local size argument even if " + "required work-group size was " + "specified in the program source") == + string_class::npos) { + std::cerr + << "Test case ReqdWGSizeNegativeB failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } + } catch (runtime_error &E) { + std::cerr + << "Test case ReqdWGSizeNegativeB failed: unexpected exception: " + << E.what() << std::endl; + return 1; + } catch (...) { + std::cerr + << "Test case ReqdWGSizeNegativeB failed: something unexpected " + "has been caught" + << std::endl; + return 1; } - } catch (runtime_error &E) { - std::cerr - << "Test case ReqdWGSizeNegativeA failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } catch (...) { - std::cerr << "Test case ReqdWGSizeNegativeA failed: something unexpected " - "has been caught" - << std::endl; - return 1; } - // parallel_for, (16, 16, 16) global, null local, reqd_wg_size(4, 4, 4) // - // -> fail + // Positive test-cases that should pass on any underlying OpenCL runtime + + // parallel_for, (8, 8, 8) global, (4, 4, 4) local, reqd_wg_size(4, 4, 4) -> + // pass try { Q.submit([&](handler &CGH) { - CGH.parallel_for( - range<3>(16, 16, 16), [=](item<3>) { reqd_wg_size_helper(); }); + CGH.parallel_for( + nd_range<3>(range<3>(8, 8, 8), range<3>(4, 4, 4)), + [=](nd_item<3>) { reqd_wg_size_helper(); }); }); Q.wait_and_throw(); - std::cerr - << "Test case ReqdWGSizeNegativeB failed: no exception has been " - "thrown\n"; - return 1; // We shouldn't be here, exception is expected } catch (nd_range_error &E) { - if (string_class(E.what()).find("OpenCL 1.x and 2.0 requires to pass " - "local size argument even if " - "required work-group size was " - "specified in the program source") == - string_class::npos) { - std::cerr - << "Test case ReqdWGSizeNegativeB failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } + std::cerr + << "Test case ReqdWGSizePositiveA failed: unexpected exception: " + << E.what() << std::endl; + return 1; } catch (runtime_error &E) { std::cerr - << "Test case ReqdWGSizeNegativeB failed: unexpected exception: " + << "Test case ReqdWGSizePositiveA failed: unexpected exception: " << E.what() << std::endl; return 1; } catch (...) { - std::cerr << "Test case ReqdWGSizeNegativeB failed: something unexpected " + std::cerr << "Test case ReqdWGSizePositiveA failed: something unexpected " "has been caught" << std::endl; return 1; } - } + } // if (OCLBackend) - // Positive test-cases that should pass on any underlying OpenCL runtime - - // parallel_for, (8, 8, 8) global, (4, 4, 4) local, reqd_wg_size(4, 4, 4) -> - // pass - try { - Q.submit([&](handler &CGH) { - CGH.parallel_for( - nd_range<3>(range<3>(8, 8, 8), range<3>(4, 4, 4)), - [=](nd_item<3>) { reqd_wg_size_helper(); }); - }); - Q.wait_and_throw(); - } catch (nd_range_error &E) { - std::cerr << "Test case ReqdWGSizePositiveA failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } catch (runtime_error &E) { - std::cerr << "Test case ReqdWGSizePositiveA failed: unexpected exception: " - << E.what() << std::endl; - return 1; - } catch (...) { - std::cerr << "Test case ReqdWGSizePositiveA failed: something unexpected " - "has been caught" - << std::endl; - return 1; - } - - if (OCLVersionMajor == '1') { + if (!OCLBackend || (OCLVersionMajor == '1')) { // OpenCL 1.x or non-OpenCL backends which behave like OpenCl 1.2 in SYCL. // CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified and @@ -340,7 +348,7 @@ int main() { << std::endl; return 1; } - } else if (OCLVersionMajor == '2') { + } else if (OCLBackend && (OCLVersionMajor == '2')) { // OpenCL 2.x // OpenCL 2.x: @@ -418,8 +426,8 @@ int main() { if (string_class(E.what()).find( "Non-uniform work-groups are not allowed by default. " "Underlying OpenCL 2.x implementation supports this feature " - "and to enable it, build device program with " - "-cl-std=CL2.0") == string_class::npos) { + "and to enable it, build device program with -cl-std=CL2.0") == + string_class::npos) { std::cerr << "Test case OpenCL2XNegativeB failed: unexpected exception: " << E.what() << std::endl; @@ -458,8 +466,8 @@ int main() { if (string_class(E.what()).find( "Non-uniform work-groups are not allowed by default. " "Underlying OpenCL 2.x implementation supports this feature " - "and to enable it, build device program with " - "-cl-std=CL2.0") == string_class::npos) { + "and to enable it, build device program with -cl-std=CL2.0") == + string_class::npos) { std::cerr << "Test case OpenCL2XNegativeC failed: unexpected exception: " << E.what() << std::endl; @@ -504,8 +512,7 @@ int main() { "Local workgroup size greater than global range size. " "Non-uniform work-groups are not allowed by default. " "Underlying " - "OpenCL 2.x implementation supports this feature and to " - "enable " + "OpenCL 2.x implementation supports this feature and to enable " "it, build device program with -cl-std=CL2.0") == string_class::npos) { std::cerr @@ -550,8 +557,8 @@ int main() { "Local workgroup size greater than global range size. " "Non-uniform work-groups are not allowed by default. " "Underlying OpenCL 2.x implementation supports this feature " - "and to enable it, build device program with " - "-cl-std=CL2.0") == string_class::npos) { + "and to enable it, build device program with -cl-std=CL2.0") == + string_class::npos) { std::cerr << "Test case OpenCL2XNegativeC2 failed: unexpected exception: " << E.what() << std::endl; @@ -738,11 +745,10 @@ int main() { } } - // Local Size larger than Global, -cl-std=CL2.0 - // -cl-uniform-work-group-size flag used CL_INVALID_WORK_GROUP_SIZE if - // local_work_size is specified as larger than the global size, then a - // different error string is used. This is a sub-case of the more general - // 'non-uniform work group' + // Local Size larger than Global, -cl-std=CL2.0 -cl-uniform-work-group-size + // flag used CL_INVALID_WORK_GROUP_SIZE if local_work_size is specified as + // larger than the global size, then a different error string is used. This + // is a sub-case of the more general 'non-uniform work group' { program P(Q.get_context()); P.build_with_kernel_type( diff --git a/sycl/test/plugins/sycl-ls-gpu-opencl.cpp b/sycl/test/plugins/sycl-ls-gpu-opencl.cpp index 4676017086484..4437e68c28335 100755 --- a/sycl/test/plugins/sycl-ls-gpu-opencl.cpp +++ b/sycl/test/plugins/sycl-ls-gpu-opencl.cpp @@ -3,8 +3,8 @@ // RUN: env SYCL_BE=PI_OPENCL sycl-ls --verbose >%t.opencl.out // RUN: FileCheck %s --check-prefixes=CHECK-GPU-BUILTIN,CHECK-GPU-CUSTOM --input-file %t.opencl.out -// CHECK-GPU-BUILTIN: gpu_selector(){{.*}}GPU : {{[0-9].[0-9]}} -// CHECK-GPU-CUSTOM: custom_selector(gpu){{.*}}GPU : {{[0-9].[0-9]}} +// CHECK-GPU-BUILTIN: gpu_selector(){{.*}}GPU : {{[0-9]\.[0-9]}} +// CHECK-GPU-CUSTOM: custom_selector(gpu){{.*}}GPU : {{[0-9]\.[0-9]}} //==-- sycl-ls-gpu-opencl.cpp - SYCL test for discovered/selected devices -===// //