18
18
#include "oshmem/proc/proc.h"
19
19
#include "atomic_ucx.h"
20
20
21
+ #if HAVE_DECL_UCP_ATOMIC_OP_NBX
22
+ /*
23
+ * A static params array, for datatypes of size 4 and 8. "size >> 3" is used to
24
+ * access the corresponding offset.
25
+ */
26
+ static ucp_request_param_t mca_spml_ucp_request_params [] = {
27
+ {.op_attr_mask = UCP_OP_ATTR_FIELD_DATATYPE , .datatype = ucp_dt_make_contig (4 )},
28
+ {.op_attr_mask = UCP_OP_ATTR_FIELD_DATATYPE , .datatype = ucp_dt_make_contig (8 )}
29
+ };
30
+ #endif
31
+
21
32
/*
22
33
* Initial query function that is invoked during initialization, allowing
23
34
* this module to indicate what level of thread support it provides.
@@ -38,20 +49,37 @@ int mca_atomic_ucx_op(shmem_ctx_t ctx,
38
49
uint64_t value ,
39
50
size_t size ,
40
51
int pe ,
52
+ #if HAVE_DECL_UCP_ATOMIC_OP_NBX
53
+ ucp_atomic_op_t op )
54
+ #else
41
55
ucp_atomic_post_op_t op )
56
+ #endif
42
57
{
43
58
ucs_status_t status ;
44
59
spml_ucx_mkey_t * ucx_mkey ;
45
60
uint64_t rva ;
46
61
mca_spml_ucx_ctx_t * ucx_ctx = (mca_spml_ucx_ctx_t * )ctx ;
62
+ #if HAVE_DECL_UCP_ATOMIC_OP_NBX
63
+ ucs_status_ptr_t status_ptr ;
64
+ #endif
47
65
48
66
assert ((8 == size ) || (4 == size ));
49
67
50
68
ucx_mkey = mca_spml_ucx_get_mkey (ctx , pe , target , (void * )& rva , mca_spml_self );
69
+
70
+ #if HAVE_DECL_UCP_ATOMIC_OP_NBX
71
+ status_ptr = ucp_atomic_op_nbx (ucx_ctx -> ucp_peers [pe ].ucp_conn ,
72
+ op , & value , 1 , rva , ucx_mkey -> rkey ,
73
+ & mca_spml_ucp_request_params [size >> 3 ]);
74
+ if (OPAL_LIKELY (!UCS_PTR_IS_ERR (status_ptr ))) {
75
+ mca_spml_ucx_remote_op_posted (ucx_ctx , pe );
76
+ }
77
+ status = UCS_PTR_STATUS (status_ptr );
78
+ #else
51
79
status = ucp_atomic_post (ucx_ctx -> ucp_peers [pe ].ucp_conn ,
52
80
op , value , size , rva ,
53
81
ucx_mkey -> rkey );
54
-
82
+ #endif
55
83
if (OPAL_LIKELY (UCS_OK == status )) {
56
84
mca_spml_ucx_remote_op_posted (ucx_ctx , pe );
57
85
}
@@ -66,22 +94,41 @@ int mca_atomic_ucx_fop(shmem_ctx_t ctx,
66
94
uint64_t value ,
67
95
size_t size ,
68
96
int pe ,
97
+ #if HAVE_DECL_UCP_ATOMIC_OP_NBX
98
+ ucp_atomic_op_t op )
99
+ #else
69
100
ucp_atomic_fetch_op_t op )
101
+ #endif
70
102
{
71
103
ucs_status_ptr_t status_ptr ;
72
104
spml_ucx_mkey_t * ucx_mkey ;
73
105
uint64_t rva ;
74
106
mca_spml_ucx_ctx_t * ucx_ctx = (mca_spml_ucx_ctx_t * )ctx ;
107
+ #if HAVE_DECL_UCP_ATOMIC_OP_NBX
108
+ ucp_request_param_t param = {
109
+ .op_attr_mask = UCP_OP_ATTR_FIELD_DATATYPE |
110
+ UCP_OP_ATTR_FIELD_REPLY_BUFFER ,
111
+ .datatype = ucp_dt_make_contig (size ),
112
+ .reply_buffer = prev
113
+ };
114
+ #endif
75
115
76
116
assert ((8 == size ) || (4 == size ));
77
117
78
118
ucx_mkey = mca_spml_ucx_get_mkey (ctx , pe , target , (void * )& rva , mca_spml_self );
119
+ #if HAVE_DECL_UCP_ATOMIC_OP_NBX
120
+ status_ptr = ucp_atomic_op_nbx (ucx_ctx -> ucp_peers [pe ].ucp_conn , op , & value , 1 ,
121
+ rva , ucx_mkey -> rkey , & param );
122
+ return opal_common_ucx_wait_request (status_ptr , ucx_ctx -> ucp_worker [0 ],
123
+ "ucp_atomic_op_nbx" );
124
+ #else
79
125
status_ptr = ucp_atomic_fetch_nb (ucx_ctx -> ucp_peers [pe ].ucp_conn ,
80
126
op , value , prev , size ,
81
127
rva , ucx_mkey -> rkey ,
82
128
opal_common_ucx_empty_complete_cb );
83
129
return opal_common_ucx_wait_request (status_ptr , ucx_ctx -> ucp_worker [0 ],
84
130
"ucp_atomic_fetch_nb" );
131
+ #endif
85
132
}
86
133
87
134
static int mca_atomic_ucx_add (shmem_ctx_t ctx ,
@@ -90,7 +137,11 @@ static int mca_atomic_ucx_add(shmem_ctx_t ctx,
90
137
size_t size ,
91
138
int pe )
92
139
{
140
+ #if HAVE_DECL_UCP_ATOMIC_OP_NBX
141
+ return mca_atomic_ucx_op (ctx , target , value , size , pe , UCP_ATOMIC_OP_ADD );
142
+ #else
93
143
return mca_atomic_ucx_op (ctx , target , value , size , pe , UCP_ATOMIC_POST_OP_ADD );
144
+ #endif
94
145
}
95
146
96
147
static int mca_atomic_ucx_and (shmem_ctx_t ctx ,
@@ -99,7 +150,9 @@ static int mca_atomic_ucx_and(shmem_ctx_t ctx,
99
150
size_t size ,
100
151
int pe )
101
152
{
102
- #if HAVE_DECL_UCP_ATOMIC_POST_OP_AND
153
+ #if HAVE_DECL_UCP_ATOMIC_OP_NBX
154
+ return mca_atomic_ucx_op (ctx , target , value , size , pe , UCP_ATOMIC_OP_AND );
155
+ #elif HAVE_DECL_UCP_ATOMIC_POST_OP_AND
103
156
return mca_atomic_ucx_op (ctx , target , value , size , pe , UCP_ATOMIC_POST_OP_AND );
104
157
#else
105
158
return OSHMEM_ERR_NOT_IMPLEMENTED ;
@@ -112,7 +165,9 @@ static int mca_atomic_ucx_or(shmem_ctx_t ctx,
112
165
size_t size ,
113
166
int pe )
114
167
{
115
- #if HAVE_DECL_UCP_ATOMIC_POST_OP_OR
168
+ #if HAVE_DECL_UCP_ATOMIC_OP_NBX
169
+ return mca_atomic_ucx_op (ctx , target , value , size , pe , UCP_ATOMIC_OP_OR );
170
+ #elif HAVE_DECL_UCP_ATOMIC_POST_OP_OR
116
171
return mca_atomic_ucx_op (ctx , target , value , size , pe , UCP_ATOMIC_POST_OP_OR );
117
172
#else
118
173
return OSHMEM_ERR_NOT_IMPLEMENTED ;
@@ -125,7 +180,9 @@ static int mca_atomic_ucx_xor(shmem_ctx_t ctx,
125
180
size_t size ,
126
181
int pe )
127
182
{
128
- #if HAVE_DECL_UCP_ATOMIC_POST_OP_XOR
183
+ #if HAVE_DECL_UCP_ATOMIC_OP_NBX
184
+ return mca_atomic_ucx_op (ctx , target , value , size , pe , UCP_ATOMIC_OP_XOR );
185
+ #elif HAVE_DECL_UCP_ATOMIC_POST_OP_XOR
129
186
return mca_atomic_ucx_op (ctx , target , value , size , pe , UCP_ATOMIC_POST_OP_XOR );
130
187
#else
131
188
return OSHMEM_ERR_NOT_IMPLEMENTED ;
@@ -139,7 +196,11 @@ static int mca_atomic_ucx_fadd(shmem_ctx_t ctx,
139
196
size_t size ,
140
197
int pe )
141
198
{
199
+ #if HAVE_DECL_UCP_ATOMIC_OP_NBX
200
+ return mca_atomic_ucx_fop (ctx , target , prev , value , size , pe , UCP_ATOMIC_OP_ADD );
201
+ #else
142
202
return mca_atomic_ucx_fop (ctx , target , prev , value , size , pe , UCP_ATOMIC_FETCH_OP_FADD );
203
+ #endif
143
204
}
144
205
145
206
static int mca_atomic_ucx_fand (shmem_ctx_t ctx ,
@@ -149,7 +210,9 @@ static int mca_atomic_ucx_fand(shmem_ctx_t ctx,
149
210
size_t size ,
150
211
int pe )
151
212
{
152
- #if HAVE_DECL_UCP_ATOMIC_FETCH_OP_FAND
213
+ #if HAVE_DECL_UCP_ATOMIC_OP_NBX
214
+ return mca_atomic_ucx_fop (ctx , target , prev , value , size , pe , UCP_ATOMIC_OP_AND );
215
+ #elif HAVE_DECL_UCP_ATOMIC_FETCH_OP_FAND
153
216
return mca_atomic_ucx_fop (ctx , target , prev , value , size , pe , UCP_ATOMIC_FETCH_OP_FAND );
154
217
#else
155
218
return OSHMEM_ERR_NOT_IMPLEMENTED ;
@@ -163,7 +226,9 @@ static int mca_atomic_ucx_for(shmem_ctx_t ctx,
163
226
size_t size ,
164
227
int pe )
165
228
{
166
- #if HAVE_DECL_UCP_ATOMIC_FETCH_OP_FOR
229
+ #if HAVE_DECL_UCP_ATOMIC_OP_NBX
230
+ return mca_atomic_ucx_fop (ctx , target , prev , value , size , pe , UCP_ATOMIC_OP_OR );
231
+ #elif HAVE_DECL_UCP_ATOMIC_FETCH_OP_FOR
167
232
return mca_atomic_ucx_fop (ctx , target , prev , value , size , pe , UCP_ATOMIC_FETCH_OP_FOR );
168
233
#else
169
234
return OSHMEM_ERR_NOT_IMPLEMENTED ;
@@ -177,7 +242,9 @@ static int mca_atomic_ucx_fxor(shmem_ctx_t ctx,
177
242
size_t size ,
178
243
int pe )
179
244
{
180
- #if HAVE_DECL_UCP_ATOMIC_FETCH_OP_FXOR
245
+ #if HAVE_DECL_UCP_ATOMIC_OP_NBX
246
+ return mca_atomic_ucx_fop (ctx , target , prev , value , size , pe , UCP_ATOMIC_OP_XOR );
247
+ #elif HAVE_DECL_UCP_ATOMIC_FETCH_OP_FXOR
181
248
return mca_atomic_ucx_fop (ctx , target , prev , value , size , pe , UCP_ATOMIC_FETCH_OP_FXOR );
182
249
#else
183
250
return OSHMEM_ERR_NOT_IMPLEMENTED ;
@@ -191,7 +258,11 @@ static int mca_atomic_ucx_swap(shmem_ctx_t ctx,
191
258
size_t size ,
192
259
int pe )
193
260
{
261
+ #if HAVE_DECL_UCP_ATOMIC_OP_NBX
262
+ return mca_atomic_ucx_fop (ctx , target , prev , value , size , pe , UCP_ATOMIC_OP_SWAP );
263
+ #else
194
264
return mca_atomic_ucx_fop (ctx , target , prev , value , size , pe , UCP_ATOMIC_FETCH_OP_SWAP );
265
+ #endif
195
266
}
196
267
197
268
0 commit comments