-
Notifications
You must be signed in to change notification settings - Fork 14.5k
[OpenMP] Replace most GPU helpers with ones from <gpuintrin.h> #125771
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Closed
Closed
Changes from all commits
Commits
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -15,116 +15,48 @@ | |||||
#include "Interface.h" | ||||||
#include "Mapping.h" | ||||||
|
||||||
using namespace ompx; | ||||||
|
||||||
namespace impl { | ||||||
|
||||||
void Unpack(uint64_t Val, uint32_t *LowBits, uint32_t *HighBits) { | ||||||
static_assert(sizeof(unsigned long) == 8, ""); | ||||||
*LowBits = static_cast<uint32_t>(Val & 0x00000000FFFFFFFFUL); | ||||||
*HighBits = static_cast<uint32_t>((Val & 0xFFFFFFFF00000000UL) >> 32); | ||||||
} | ||||||
|
||||||
uint64_t Pack(uint32_t LowBits, uint32_t HighBits) { | ||||||
return (((uint64_t)HighBits) << 32) | (uint64_t)LowBits; | ||||||
} | ||||||
|
||||||
int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane, int32_t Width); | ||||||
int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t LaneDelta, | ||||||
int32_t Width); | ||||||
|
||||||
uint64_t ballotSync(uint64_t Mask, int32_t Pred); | ||||||
|
||||||
/// AMDGCN Implementation | ||||||
/// | ||||||
///{ | ||||||
#ifdef __AMDGPU__ | ||||||
|
||||||
int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane, int32_t Width) { | ||||||
int Self = mapping::getThreadIdInWarp(); | ||||||
int Index = SrcLane + (Self & ~(Width - 1)); | ||||||
return __builtin_amdgcn_ds_bpermute(Index << 2, Var); | ||||||
} | ||||||
|
||||||
int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t LaneDelta, | ||||||
int32_t Width) { | ||||||
int Self = mapping::getThreadIdInWarp(); | ||||||
int Index = Self + LaneDelta; | ||||||
Index = (int)(LaneDelta + (Self & (Width - 1))) >= Width ? Self : Index; | ||||||
return __builtin_amdgcn_ds_bpermute(Index << 2, Var); | ||||||
} | ||||||
#include <gpuintrin.h> | ||||||
|
||||||
uint64_t ballotSync(uint64_t Mask, int32_t Pred) { | ||||||
return Mask & __builtin_amdgcn_ballot_w64(Pred); | ||||||
} | ||||||
|
||||||
bool isSharedMemPtr(const void *Ptr) { | ||||||
return __builtin_amdgcn_is_shared( | ||||||
(const __attribute__((address_space(0))) void *)Ptr); | ||||||
} | ||||||
#endif | ||||||
///} | ||||||
|
||||||
/// NVPTX Implementation | ||||||
/// | ||||||
///{ | ||||||
#ifdef __NVPTX__ | ||||||
|
||||||
int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane, int32_t Width) { | ||||||
return __nvvm_shfl_sync_idx_i32(Mask, Var, SrcLane, Width - 1); | ||||||
} | ||||||
|
||||||
int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t Delta, int32_t Width) { | ||||||
int32_t T = ((mapping::getWarpSize() - Width) << 8) | 0x1f; | ||||||
return __nvvm_shfl_sync_down_i32(Mask, Var, Delta, T); | ||||||
} | ||||||
|
||||||
uint64_t ballotSync(uint64_t Mask, int32_t Pred) { | ||||||
return __nvvm_vote_ballot_sync(static_cast<uint32_t>(Mask), Pred); | ||||||
} | ||||||
|
||||||
bool isSharedMemPtr(const void *Ptr) { return __nvvm_isspacep_shared(Ptr); } | ||||||
|
||||||
#endif | ||||||
///} | ||||||
} // namespace impl | ||||||
using namespace ompx; | ||||||
|
||||||
uint64_t utils::pack(uint32_t LowBits, uint32_t HighBits) { | ||||||
return impl::Pack(LowBits, HighBits); | ||||||
return (uint64_t(HighBits) << 32) | uint64_t(LowBits); | ||||||
} | ||||||
|
||||||
void utils::unpack(uint64_t Val, uint32_t &LowBits, uint32_t &HighBits) { | ||||||
impl::Unpack(Val, &LowBits, &HighBits); | ||||||
static_assert(sizeof(unsigned long) == 8, ""); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does it really matter for literal values? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
LowBits = static_cast<uint32_t>(Val & 0x00000000fffffffful); | ||||||
HighBits = static_cast<uint32_t>((Val & 0xffffffff00000000ul) >> 32); | ||||||
} | ||||||
|
||||||
int32_t utils::shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane, | ||||||
int32_t Width) { | ||||||
return impl::shuffle(Mask, Var, SrcLane, Width); | ||||||
return __gpu_shuffle_idx_u32(Mask, Var, SrcLane, Width); | ||||||
} | ||||||
|
||||||
int32_t utils::shuffleDown(uint64_t Mask, int32_t Var, uint32_t Delta, | ||||||
int32_t Width) { | ||||||
return impl::shuffleDown(Mask, Var, Delta, Width); | ||||||
int32_t Self = mapping::getThreadIdInWarp(); | ||||||
int32_t Index = (Delta + (Self & (Width - 1))) >= Width ? Self : Self + Delta; | ||||||
return __gpu_shuffle_idx_u32(Mask, Index, Var, Width); | ||||||
} | ||||||
|
||||||
int64_t utils::shuffleDown(uint64_t Mask, int64_t Var, uint32_t Delta, | ||||||
int32_t Width) { | ||||||
uint32_t Lo, Hi; | ||||||
utils::unpack(Var, Lo, Hi); | ||||||
Hi = impl::shuffleDown(Mask, Hi, Delta, Width); | ||||||
Lo = impl::shuffleDown(Mask, Lo, Delta, Width); | ||||||
return utils::pack(Lo, Hi); | ||||||
int32_t Self = mapping::getThreadIdInWarp(); | ||||||
int32_t Index = (Delta + (Self & (Width - 1))) >= Width ? Self : Self + Delta; | ||||||
return __gpu_shuffle_idx_u64(Mask, Index, Var, Width); | ||||||
} | ||||||
|
||||||
uint64_t utils::ballotSync(uint64_t Mask, int32_t Pred) { | ||||||
return impl::ballotSync(Mask, Pred); | ||||||
return __gpu_ballot(Mask, Pred); | ||||||
} | ||||||
|
||||||
bool utils::isSharedMemPtr(void *Ptr) { return impl::isSharedMemPtr(Ptr); } | ||||||
bool utils::isSharedMemPtr(void *Ptr) { return __gpu_is_ptr_local(Ptr); } | ||||||
|
||||||
extern "C" { | ||||||
int32_t __kmpc_shuffle_int32(int32_t Val, int16_t Delta, int16_t SrcLane) { | ||||||
return impl::shuffleDown(lanes::All, Val, Delta, SrcLane); | ||||||
return utils::shuffleDown(lanes::All, Val, Delta, SrcLane); | ||||||
} | ||||||
|
||||||
int64_t __kmpc_shuffle_int64(int64_t Val, int16_t Delta, int16_t Width) { | ||||||
|
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This macro marked things uninitialised