From ffd31b787e56d84b5f8b25f0fba1d799b87b0bc5 Mon Sep 17 00:00:00 2001 From: Hua Huang Date: Wed, 2 Dec 2020 22:36:41 -0500 Subject: [PATCH] Bug fix: non-uniform work-groups error on CUDA Fix https://github.com/intel/llvm/issues/2821 --- sycl/include/CL/sycl/ONEAPI/reduction.hpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sycl/include/CL/sycl/ONEAPI/reduction.hpp b/sycl/include/CL/sycl/ONEAPI/reduction.hpp index a11d3d89fb513..981c2cff93060 100644 --- a/sycl/include/CL/sycl/ONEAPI/reduction.hpp +++ b/sycl/include/CL/sycl/ONEAPI/reduction.hpp @@ -892,7 +892,8 @@ reduAuxCGFuncImpl(handler &CGH, size_t NWorkItems, size_t NWorkGroups, bool IsUpdateOfUserVar = Reduction::accessor_mode == access::mode::read_write && NWorkGroups == 1; - nd_range<1> Range{range<1>(NWorkItems), range<1>(WGSize)}; + size_t NWorkItemsExt = ((NWorkItems + WGSize - 1) / WGSize) * WGSize; + nd_range<1> Range{range<1>(NWorkItemsExt), range<1>(WGSize)}; CGH.parallel_for(Range, [=](nd_item<1> NDIt) { typename Reduction::binary_operation BOp; size_t WGID = NDIt.get_group_linear_id(); @@ -936,7 +937,8 @@ reduAuxCGFuncImpl(handler &CGH, size_t NWorkItems, size_t NWorkGroups, auto BOp = Redu.getBinaryOperation(); using Name = typename get_reduction_aux_kernel_name_t< KernelName, KernelType, Reduction::is_usm, UniformPow2WG, OutputT>::name; - nd_range<1> Range{range<1>(NWorkItems), range<1>(WGSize)}; + size_t NWorkItemsExt = ((NWorkItems + WGSize - 1) / WGSize) * WGSize; + nd_range<1> Range{range<1>(NWorkItemsExt), range<1>(WGSize)}; CGH.parallel_for(Range, [=](nd_item<1> NDIt) { size_t WGSize = NDIt.get_local_range().size(); size_t LID = NDIt.get_local_linear_id();