Skip to content

Commit e25d3e6

Browse files
authored
Merge pull request #12839 from bosilca/topic/import_several_UCT_BTL_updates
Import several uct btl updates
2 parents 2ab0ed2 + 15cd497 commit e25d3e6

File tree

9 files changed

+61
-55
lines changed

9 files changed

+61
-55
lines changed

ompi/mca/pml/ob1/pml_ob1_isend.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ static inline int mca_pml_ob1_send_inline (const void *buf, size_t count,
143143
}
144144

145145
if (OPAL_UNLIKELY(OMPI_SUCCESS != rc)) {
146-
return rc;
146+
return rc;
147147
}
148148

149149
return (int) size;

ompi/mca/pml/ob1/pml_ob1_recvreq.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -382,7 +382,7 @@ static int mca_pml_ob1_recv_request_get_frag_failed (mca_pml_ob1_rdma_frag_t *fr
382382
}
383383
}
384384

385-
if (++frag->retries < mca_pml_ob1.rdma_retries_limit &&
385+
if (frag->retries < mca_pml_ob1.rdma_retries_limit &&
386386
OMPI_ERR_OUT_OF_RESOURCE == rc) {
387387
OPAL_THREAD_LOCK(&mca_pml_ob1.lock);
388388
opal_list_append(&mca_pml_ob1.rdma_pending, (opal_list_item_t*)frag);
@@ -413,6 +413,7 @@ static void mca_pml_ob1_rget_completion (mca_btl_base_module_t* btl, struct mca_
413413
/* check completion status */
414414
if (OPAL_UNLIKELY(OMPI_SUCCESS != status)) {
415415
status = mca_pml_ob1_recv_request_get_frag_failed (frag, status);
416+
/* fragment was returned or queue by the above call */
416417
if (OPAL_UNLIKELY(OMPI_SUCCESS != status)) {
417418
size_t skipped_bytes = recvreq->req_send_offset - recvreq->req_rdma_offset;
418419
opal_output_verbose(mca_pml_ob1_output, 1, "pml:ob1: %s: operation failed with code %d", __func__, status);
@@ -435,12 +436,12 @@ static void mca_pml_ob1_rget_completion (mca_btl_base_module_t* btl, struct mca_
435436
mca_pml_ob1_send_fin (recvreq->req_recv.req_base.req_proc,
436437
bml_btl, frag->rdma_hdr.hdr_rget.hdr_frag,
437438
frag->rdma_length, 0, 0);
439+
440+
MCA_PML_OB1_RDMA_FRAG_RETURN(frag);
438441
}
439442

440443
recv_request_pml_complete_check(recvreq);
441444

442-
MCA_PML_OB1_RDMA_FRAG_RETURN(frag);
443-
444445
MCA_PML_OB1_PROGRESS_PENDING(bml_btl);
445446
}
446447

ompi/mca/pml/ob1/pml_ob1_sendreq.c

Lines changed: 26 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
* Copyright (c) 2018-2019 Triad National Security, LLC. All rights
2323
* reserved.
2424
* Copyright (c) 2022 IBM Corporation. All rights reserved.
25+
* Copyright (c) 2024 Google, LLC. All rights reserved.
2526
* $COPYRIGHT$
2627
*
2728
* Additional copyrights may follow
@@ -1110,6 +1111,12 @@ mca_pml_ob1_send_request_schedule_once(mca_pml_ob1_send_request_t* sendreq)
11101111

11111112
range = get_send_range(sendreq);
11121113

1114+
if (NULL != sendreq->rdma_frag) {
1115+
/* this request was first attempted with RDMA but is now using send/recv */
1116+
MCA_PML_OB1_RDMA_FRAG_RETURN(sendreq->rdma_frag);
1117+
sendreq->rdma_frag = NULL;
1118+
}
1119+
11131120
while(range && (false == sendreq->req_throttle_sends ||
11141121
sendreq->req_pipeline_depth < mca_pml_ob1.send_pipeline_depth)) {
11151122
mca_pml_ob1_frag_hdr_t* hdr;
@@ -1268,30 +1275,31 @@ static void mca_pml_ob1_send_request_put_frag_failed (mca_pml_ob1_rdma_frag_t *f
12681275
mca_pml_ob1_send_request_t* sendreq = (mca_pml_ob1_send_request_t *) frag->rdma_req;
12691276
mca_bml_base_btl_t *bml_btl = frag->rdma_bml;
12701277

1271-
if (++frag->retries < mca_pml_ob1.rdma_retries_limit && OMPI_ERR_OUT_OF_RESOURCE == rc) {
1278+
if (frag->retries < mca_pml_ob1.rdma_retries_limit && OMPI_ERR_OUT_OF_RESOURCE == rc) {
12721279
/* queue the frag for later if there was a resource error */
12731280
OPAL_THREAD_LOCK(&mca_pml_ob1.lock);
12741281
opal_list_append(&mca_pml_ob1.rdma_pending, (opal_list_item_t*)frag);
12751282
OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock);
1276-
} else {
1283+
return;
1284+
}
1285+
12771286
#if OPAL_ENABLE_FT
1278-
if(!ompi_proc_is_active(sendreq->req_send.req_base.req_proc)) {
1279-
return;
1280-
}
1281-
#endif /* OPAL_ENABLE_FT */
1282-
/* tell receiver to deregister memory */
1283-
mca_pml_ob1_send_fin (sendreq->req_send.req_base.req_proc, bml_btl,
1284-
frag->rdma_hdr.hdr_rdma.hdr_frag, 0, MCA_BTL_NO_ORDER,
1285-
OPAL_ERR_TEMP_OUT_OF_RESOURCE);
1286-
1287-
/* send fragment by copy in/out */
1288-
mca_pml_ob1_send_request_copy_in_out(sendreq, frag->rdma_hdr.hdr_rdma.hdr_rdma_offset,
1289-
frag->rdma_length);
1290-
/* if a pointer to a receive request is not set it means that
1291-
* ACK was not yet received. Don't schedule sends before ACK */
1292-
if (NULL != sendreq->req_recv.pval)
1293-
mca_pml_ob1_send_request_schedule (sendreq);
1287+
if(!ompi_proc_is_active(sendreq->req_send.req_base.req_proc)) {
1288+
return;
12941289
}
1290+
#endif /* OPAL_ENABLE_FT */
1291+
/* tell receiver to deregister memory */
1292+
mca_pml_ob1_send_fin (sendreq->req_send.req_base.req_proc, bml_btl,
1293+
frag->rdma_hdr.hdr_rdma.hdr_frag, 0, MCA_BTL_NO_ORDER,
1294+
OPAL_ERR_TEMP_OUT_OF_RESOURCE);
1295+
1296+
/* send fragment by copy in/out */
1297+
mca_pml_ob1_send_request_copy_in_out(sendreq, frag->rdma_hdr.hdr_rdma.hdr_rdma_offset,
1298+
frag->rdma_length);
1299+
/* if a pointer to a receive request is not set it means that
1300+
* ACK was not yet received. Don't schedule sends before ACK */
1301+
if (NULL != sendreq->req_recv.pval)
1302+
mca_pml_ob1_send_request_schedule (sendreq);
12951303
}
12961304

12971305
/**

opal/datatype/opal_datatype_internal.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -539,7 +539,7 @@ struct opal_datatype_t;
539539
# define OPAL_DATATYPE_SAFEGUARD_POINTER(ACTPTR, LENGTH, INITPTR, PDATA, COUNT) \
540540
{ \
541541
unsigned char *__lower_bound = (INITPTR), *__upper_bound; \
542-
assert(((LENGTH) != 0) && ((COUNT) != 0)); \
542+
assert( (COUNT) != 0 ); \
543543
__lower_bound += (PDATA)->true_lb; \
544544
__upper_bound = (INITPTR) + (PDATA)->true_ub + \
545545
((PDATA)->ub - (PDATA)->lb) * ((COUNT) -1); \

opal/datatype/opal_datatype_position.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,8 +66,8 @@ static inline void position_single_block(opal_convertor_t *CONVERTOR, unsigned c
6666
}
6767

6868
/**
69-
* Advance the convertors' position according. Update the pointer and the remaining space
70-
* accordingly.
69+
* Advance the convertors' position according to account for *COUNT elements. Update
70+
* the pointer and the remaining space accordingly.
7171
*/
7272
static inline void position_predefined_data(opal_convertor_t *CONVERTOR, dt_elem_desc_t *ELEM,
7373
size_t *COUNT, unsigned char **POINTER, size_t *SPACE)
@@ -82,7 +82,8 @@ static inline void position_predefined_data(opal_convertor_t *CONVERTOR, dt_elem
8282

8383
if (cando_count > *(COUNT)) {
8484
cando_count = *(COUNT);
85-
}
85+
} else if( 0 == cando_count )
86+
return;
8687

8788
if (1 == _elem->blocklen) {
8889
DO_DEBUG(opal_output(0,

opal/mca/btl/sm/btl_sm_send.c

Lines changed: 0 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -73,18 +73,4 @@ int mca_btl_sm_send(struct mca_btl_base_module_t *btl, struct mca_btl_base_endpo
7373
}
7474

7575
return OPAL_SUCCESS;
76-
77-
#if 0
78-
if (((frag->hdr->flags & MCA_BTL_SM_FLAG_SINGLE_COPY) ||
79-
!(frag->base.des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP)) &&
80-
frag->base.des_cbfunc) {
81-
frag->base.des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
82-
83-
return OPAL_SUCCESS;
84-
}
85-
86-
/* data is gone (from the pml's perspective). frag callback/release will
87-
happen later */
88-
return 1;
89-
#endif
9076
}

opal/mca/btl/uct/btl_uct_am.c

Lines changed: 20 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ mca_btl_base_descriptor_t *mca_btl_uct_alloc(mca_btl_base_module_t *btl,
5151
}
5252

5353
static inline void _mca_btl_uct_send_pack(void *data, void *header, size_t header_size,
54-
opal_convertor_t *convertor, size_t payload_size)
54+
opal_convertor_t *convertor, size_t* payload_size)
5555
{
5656
uint32_t iov_count = 1;
5757
struct iovec iov;
@@ -64,11 +64,9 @@ static inline void _mca_btl_uct_send_pack(void *data, void *header, size_t heade
6464

6565
/* pack the data into the supplied buffer */
6666
iov.iov_base = (IOVBASE_TYPE *) ((intptr_t) data + header_size);
67-
iov.iov_len = length = payload_size;
67+
iov.iov_len = *payload_size;
6868

69-
(void) opal_convertor_pack(convertor, &iov, &iov_count, &length);
70-
71-
assert(length == payload_size);
69+
(void) opal_convertor_pack(convertor, &iov, &iov_count, payload_size);
7270
}
7371

7472
struct mca_btl_base_descriptor_t *mca_btl_uct_prepare_src(mca_btl_base_module_t *btl,
@@ -92,7 +90,10 @@ struct mca_btl_base_descriptor_t *mca_btl_uct_prepare_src(mca_btl_base_module_t
9290
}
9391

9492
_mca_btl_uct_send_pack((void *) ((intptr_t) frag->uct_iov.buffer + reserve), NULL, 0,
95-
convertor, *size);
93+
convertor, size);
94+
/* update the length of the fragment according to the convertor packed data */
95+
frag->segments[0].seg_len = reserve + *size;
96+
frag->uct_iov.length = frag->segments[0].seg_len;
9697
} else {
9798
opal_convertor_get_current_pointer(convertor, &data_ptr);
9899
assert(NULL != data_ptr);
@@ -286,7 +287,7 @@ static size_t mca_btl_uct_sendi_pack(void *data, void *arg)
286287

287288
am_header->value = args->am_header;
288289
_mca_btl_uct_send_pack((void *) ((intptr_t) data + 8), args->header, args->header_size,
289-
args->convertor, args->payload_size);
290+
args->convertor, &args->payload_size);
290291
return args->header_size + args->payload_size + 8;
291292
}
292293

@@ -329,9 +330,18 @@ int mca_btl_uct_sendi(mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpo
329330
} else if (msg_size < (size_t) MCA_BTL_UCT_TL_ATTR(uct_btl->am_tl, context->context_id)
330331
.cap.am.max_short) {
331332
int8_t *data = alloca(total_size);
332-
_mca_btl_uct_send_pack(data, header, header_size, convertor, payload_size);
333-
ucs_status = uct_ep_am_short(ep_handle, MCA_BTL_UCT_FRAG, am_header.value, data,
334-
total_size);
333+
size_t packed_payload_size = payload_size;
334+
_mca_btl_uct_send_pack(data, header, header_size, convertor, &packed_payload_size);
335+
if (packed_payload_size != payload_size) {
336+
/* This should never happen as the packed data should go in a single pack. But
337+
in case it does, fallback onto a descriptor allocation and let the caller
338+
send the data.
339+
*/
340+
ucs_status = UCS_ERR_NO_RESOURCE;
341+
} else {
342+
ucs_status = uct_ep_am_short(ep_handle, MCA_BTL_UCT_FRAG, am_header.value, data,
343+
total_size);
344+
}
335345
} else {
336346
ssize_t size;
337347

opal/mca/btl/uct/btl_uct_module.c

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -337,7 +337,8 @@ mca_btl_uct_module_t mca_btl_uct_module_template = {
337337

338338
/* set the default flags for this btl. uct provides us with rdma and both
339339
* fetching and non-fetching atomics (though limited to add and cswap) */
340-
.btl_flags = MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_ATOMIC_FOPS | MCA_BTL_FLAGS_ATOMIC_OPS,
340+
.btl_flags = MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_ATOMIC_FOPS | MCA_BTL_FLAGS_ATOMIC_OPS
341+
| MCA_BTL_FLAGS_RDMA_REMOTE_COMPLETION,
341342
.btl_atomic_flags = MCA_BTL_ATOMIC_SUPPORTS_ADD | MCA_BTL_ATOMIC_SUPPORTS_CSWAP
342343
| MCA_BTL_ATOMIC_SUPPORTS_SWAP | MCA_BTL_ATOMIC_SUPPORTS_32BIT,
343344

opal/mca/btl/uct/btl_uct_tl.c

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -78,11 +78,10 @@ static void mca_btl_uct_module_set_atomic_flags(mca_btl_uct_module_t *module, mc
7878
uint64_t atomic_flags32 = MCA_BTL_UCT_TL_ATTR(tl, 0).cap.atomic32.fop_flags;
7979
uint64_t atomic_flags64 = MCA_BTL_UCT_TL_ATTR(tl, 0).cap.atomic64.fop_flags;
8080

81-
/* NTH: don't really have a way to separate 32-bit and 64-bit right now */
82-
uint64_t all_flags = atomic_flags32 & atomic_flags64;
83-
84-
module->super.btl_atomic_flags = 0;
81+
uint64_t all_flags = atomic_flags64 | atomic_flags32;
8582

83+
module->super.btl_atomic_flags = (0 != atomic_flags32) ? MCA_BTL_ATOMIC_SUPPORTS_32BIT : 0;
84+
8685
if (cap_flags & UCT_IFACE_FLAG_ATOMIC_CPU) {
8786
module->super.btl_atomic_flags |= MCA_BTL_ATOMIC_SUPPORTS_GLOB;
8887
}

0 commit comments

Comments
 (0)