15
15
#include " Interface.h"
16
16
#include " Mapping.h"
17
17
18
+ #include < gpuintrin.h>
19
+
18
20
using namespace ompx ;
19
21
20
22
namespace impl {
@@ -29,17 +31,12 @@ uint64_t Pack(uint32_t LowBits, uint32_t HighBits) {
29
31
return (((uint64_t )HighBits) << 32 ) | (uint64_t )LowBits;
30
32
}
31
33
32
- int32_t shuffle (uint64_t Mask, int32_t Var, int32_t SrcLane, int32_t Width);
33
- int32_t shuffleDown (uint64_t Mask, int32_t Var, uint32_t LaneDelta,
34
- int32_t Width);
35
-
36
- uint64_t ballotSync (uint64_t Mask, int32_t Pred);
37
-
38
34
// / AMDGCN Implementation
39
35
// /
40
36
// /{
41
37
#ifdef __AMDGPU__
42
38
39
+ // TODO: Move this to <gpuintrin.h>.
43
40
int32_t shuffle (uint64_t Mask, int32_t Var, int32_t SrcLane, int32_t Width) {
44
41
int Self = mapping::getThreadIdInWarp ();
45
42
int Index = SrcLane + (Self & ~(Width - 1 ));
@@ -53,15 +50,6 @@ int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t LaneDelta,
53
50
Index = (int )(LaneDelta + (Self & (Width - 1 ))) >= Width ? Self : Index;
54
51
return __builtin_amdgcn_ds_bpermute (Index << 2 , Var);
55
52
}
56
-
57
- uint64_t ballotSync (uint64_t Mask, int32_t Pred) {
58
- return Mask & __builtin_amdgcn_ballot_w64 (Pred);
59
- }
60
-
61
- bool isSharedMemPtr (const void *Ptr) {
62
- return __builtin_amdgcn_is_shared (
63
- (const __attribute__ ((address_space (0 ))) void *)Ptr);
64
- }
65
53
#endif
66
54
// /}
67
55
@@ -70,6 +58,7 @@ bool isSharedMemPtr(const void *Ptr) {
70
58
// /{
71
59
#ifdef __NVPTX__
72
60
61
+ // TODO: Move this to <gpuintrin.h>.
73
62
int32_t shuffle (uint64_t Mask, int32_t Var, int32_t SrcLane, int32_t Width) {
74
63
return __nvvm_shfl_sync_idx_i32 (Mask, Var, SrcLane, Width - 1 );
75
64
}
@@ -78,13 +67,6 @@ int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t Delta, int32_t Width) {
78
67
int32_t T = ((mapping::getWarpSize () - Width) << 8 ) | 0x1f ;
79
68
return __nvvm_shfl_sync_down_i32 (Mask, Var, Delta, T);
80
69
}
81
-
82
- uint64_t ballotSync (uint64_t Mask, int32_t Pred) {
83
- return __nvvm_vote_ballot_sync (static_cast <uint32_t >(Mask), Pred);
84
- }
85
-
86
- bool isSharedMemPtr (const void *Ptr) { return __nvvm_isspacep_shared (Ptr); }
87
-
88
70
#endif
89
71
// /}
90
72
} // namespace impl
@@ -117,10 +99,10 @@ int64_t utils::shuffleDown(uint64_t Mask, int64_t Var, uint32_t Delta,
117
99
}
118
100
119
101
uint64_t utils::ballotSync (uint64_t Mask, int32_t Pred) {
120
- return impl::ballotSync (Mask, Pred);
102
+ return __gpu_ballot (Mask, Pred);
121
103
}
122
104
123
- bool utils::isSharedMemPtr (void *Ptr) { return impl::isSharedMemPtr (Ptr); }
105
+ bool utils::isSharedMemPtr (void *Ptr) { return __gpu_is_ptr_local (Ptr); }
124
106
125
107
extern " C" {
126
108
int32_t __kmpc_shuffle_int32 (int32_t Val, int16_t Delta, int16_t SrcLane) {
0 commit comments