diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 965285ce75ddc..dd88b9fed9685 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1417,14 +1417,19 @@ static void adjustNDRangePerKernel(NDRDescT &NDR, RT::PiKernel Kernel, if (WGSize[0] == 0) { // kernel does not request specific workgroup shape - set one - // TODO maximum work group size as the local size might not be the best - // choice for CPU or FPGA devices + id<3> MaxWGSizes = + get_device_info, cl::sycl::info::device::max_work_item_sizes>:: + get(DeviceImpl.getHandleRef(), DeviceImpl.getPlugin()); + size_t WGSize1D = get_kernel_work_group_info< size_t, cl::sycl::info::kernel_work_group::work_group_size>:: get(Kernel, DeviceImpl.getHandleRef(), DeviceImpl.getPlugin()); - assert(WGSize1D != 0); - // TODO implement better default for 2D/3D case: - WGSize = {WGSize1D, 1, 1}; + + assert(MaxWGSizes[2] != 0); + + // Set default work-group size in the Z-direction to either the max + // number of work-items or the maximum work-group size in the Z-direction. + WGSize = {1, 1, min(WGSize1D, MaxWGSizes[2])}; } NDR.set(NDR.Dims, nd_range<3>(NDR.NumWorkGroups * WGSize, WGSize)); }