diff --git a/offload/DeviceRTL/src/Mapping.cpp b/offload/DeviceRTL/src/Mapping.cpp index 3aefcff68e195..881bd12f03405 100644 --- a/offload/DeviceRTL/src/Mapping.cpp +++ b/offload/DeviceRTL/src/Mapping.cpp @@ -25,7 +25,6 @@ namespace ompx { namespace impl { // Forward declarations defined to be defined for AMDGCN and NVPTX. -const llvm::omp::GV &getGridValue(); LaneMaskTy activemask(); LaneMaskTy lanemaskLT(); LaneMaskTy lanemaskGT(); @@ -37,15 +36,14 @@ uint32_t getBlockIdInKernel(int32_t Dim); uint32_t getNumberOfBlocksInKernel(int32_t Dim); uint32_t getWarpIdInBlock(); uint32_t getNumberOfWarpsInBlock(); +uint32_t getWarpSize(); /// AMDGCN Implementation /// ///{ #pragma omp begin declare variant match(device = {arch(amdgcn)}) -const llvm::omp::GV &getGridValue() { - return llvm::omp::getAMDGPUGridValues<__AMDGCN_WAVEFRONT_SIZE>(); -} +uint32_t getWarpSize() { return __builtin_amdgcn_wavefrontsize(); } uint32_t getNumberOfThreadsInBlock(int32_t Dim) { switch (Dim) { @@ -152,7 +150,7 @@ uint32_t getNumberOfThreadsInBlock(int32_t Dim) { UNREACHABLE("Dim outside range!"); } -const llvm::omp::GV &getGridValue() { return llvm::omp::NVPTXGridValues; } +uint32_t getWarpSize() { return __nvvm_read_ptx_sreg_warpsize(); } LaneMaskTy activemask() { return __nvvm_activemask(); } @@ -219,8 +217,6 @@ uint32_t getNumberOfWarpsInBlock() { #pragma omp end declare variant ///} -uint32_t getWarpSize() { return getGridValue().GV_Warp_Size; } - } // namespace impl } // namespace ompx diff --git a/offload/test/offloading/ompx_bare_ballot_sync.c b/offload/test/offloading/ompx_bare_ballot_sync.c index 101d1255f0d67..b810fb404b58f 100644 --- a/offload/test/offloading/ompx_bare_ballot_sync.c +++ b/offload/test/offloading/ompx_bare_ballot_sync.c @@ -8,22 +8,33 @@ #include #include +#pragma omp begin declare variant match(device = {arch(amdgcn)}) +unsigned get_warp_size() { return __builtin_amdgcn_wavefrontsize(); } +#pragma omp end declare variant + +#pragma omp begin declare variant match(device = {arch(nvptx64)}) +unsigned get_warp_size() { return __nvvm_read_ptx_sreg_warpsize(); } +#pragma omp end declare variant + +#pragma omp begin declare variant match(device = {kind(cpu)}) +unsigned get_warp_size() { return 1; } +#pragma omp end declare variant + int main(int argc, char *argv[]) { const int num_blocks = 1; const int block_size = 256; const int N = num_blocks * block_size; int *res = (int *)malloc(N * sizeof(int)); -#pragma omp target teams ompx_bare num_teams(num_blocks) thread_limit(block_size) \ - map(from: res[0:N]) +#pragma omp target teams ompx_bare num_teams(num_blocks) \ + thread_limit(block_size) map(from : res[0 : N]) { int tid = ompx_thread_id_x(); uint64_t mask = ompx_ballot_sync(~0LU, tid & 0x1); -#if defined __AMDGCN_WAVEFRONT_SIZE && __AMDGCN_WAVEFRONT_SIZE == 64 - res[tid] = mask == 0xaaaaaaaaaaaaaaaa; -#else - res[tid] = mask == 0xaaaaaaaa; -#endif + if (get_warp_size() == 64) + res[tid] = mask == 0xaaaaaaaaaaaaaaaa; + else + res[tid] = mask == 0xaaaaaaaa; } for (int i = 0; i < N; ++i) diff --git a/offload/test/offloading/ompx_bare_shfl_down_sync.cpp b/offload/test/offloading/ompx_bare_shfl_down_sync.cpp index 9b0e66e25f68c..311999918de85 100644 --- a/offload/test/offloading/ompx_bare_shfl_down_sync.cpp +++ b/offload/test/offloading/ompx_bare_shfl_down_sync.cpp @@ -10,6 +10,18 @@ #include #include +#pragma omp begin declare variant match(device = {arch(amdgcn)}) +unsigned get_warp_size() { return __builtin_amdgcn_wavefrontsize(); } +#pragma omp end declare variant + +#pragma omp begin declare variant match(device = {arch(nvptx64)}) +unsigned get_warp_size() { return __nvvm_read_ptx_sreg_warpsize(); } +#pragma omp end declare variant + +#pragma omp begin declare variant match(device = {kind(cpu)}) +unsigned get_warp_size() { return 1; } +#pragma omp end declare variant + template ::value, bool> = true> bool equal(T LHS, T RHS) { return LHS == RHS; @@ -32,11 +44,7 @@ template void test() { { int tid = ompx_thread_id_x(); T val = ompx::shfl_down_sync(~0U, static_cast(tid), 1); -#ifdef __AMDGCN_WAVEFRONT_SIZE - int warp_size = __AMDGCN_WAVEFRONT_SIZE; -#else - int warp_size = 32; -#endif + int warp_size = get_warp_size(); if ((tid & (warp_size - 1)) != warp_size - 1) res[tid] = equal(val, static_cast(tid + 1)); else