Skip to content

Commit 87fbe5f

Browse files
author
Ralph Castain
authored
Merge pull request #4680 from rhc54/topic/addhost
Continue resolving add_host behavior
2 parents cb5dfbe + 75eb565 commit 87fbe5f

File tree

4 files changed

+121
-77
lines changed

4 files changed

+121
-77
lines changed

orte/mca/odls/base/odls_base_default_fns.c

Lines changed: 67 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -109,9 +109,9 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *buffer,
109109
int rc, v;
110110
orte_job_t *jdata=NULL, *jptr;
111111
orte_job_map_t *map=NULL;
112-
opal_buffer_t *wireup, jobdata;
112+
opal_buffer_t *wireup, jobdata, priorjob;
113113
opal_byte_object_t bo, *boptr;
114-
int32_t numbytes, numjobs;
114+
int32_t numbytes;
115115
int8_t flag;
116116
void *nptr;
117117
uint32_t key;
@@ -270,49 +270,51 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *buffer,
270270
flag = 1;
271271
opal_dss.pack(buffer, &flag, 1, OPAL_INT8);
272272
OBJ_CONSTRUCT(&jobdata, opal_buffer_t);
273-
numjobs = 0;
274273
rc = opal_hash_table_get_first_key_uint32(orte_job_data, &key, (void **)&jptr, &nptr);
275274
while (OPAL_SUCCESS == rc) {
276275
/* skip the one we are launching now */
277276
if (NULL != jptr && jptr != jdata &&
278277
ORTE_PROC_MY_NAME->jobid != jptr->jobid) {
278+
OBJ_CONSTRUCT(&priorjob, opal_buffer_t);
279279
/* pack the job struct */
280-
if (ORTE_SUCCESS != (rc = opal_dss.pack(&jobdata, &jptr, 1, ORTE_JOB))) {
280+
if (ORTE_SUCCESS != (rc = opal_dss.pack(&priorjob, &jptr, 1, ORTE_JOB))) {
281281
ORTE_ERROR_LOG(rc);
282282
OBJ_DESTRUCT(&jobdata);
283+
OBJ_DESTRUCT(&priorjob);
283284
return rc;
284285
}
285286
/* pack the location of each proc */
286287
for (n=0; n < jptr->procs->size; n++) {
287288
if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jptr->procs, n))) {
288289
continue;
289290
}
290-
if (ORTE_SUCCESS != (rc = opal_dss.pack(&jobdata, &proc->parent, 1, ORTE_VPID))) {
291+
if (ORTE_SUCCESS != (rc = opal_dss.pack(&priorjob, &proc->parent, 1, ORTE_VPID))) {
291292
ORTE_ERROR_LOG(rc);
292293
OBJ_DESTRUCT(&jobdata);
294+
OBJ_DESTRUCT(&priorjob);
293295
return rc;
294296
}
295297
}
296-
++numjobs;
298+
/* pack the jobdata buffer */
299+
wireup = &priorjob;
300+
if (ORTE_SUCCESS != (rc = opal_dss.pack(&jobdata, &wireup, 1, OPAL_BUFFER))) {
301+
ORTE_ERROR_LOG(rc);
302+
OBJ_DESTRUCT(&jobdata);
303+
OBJ_DESTRUCT(&priorjob);
304+
return rc;
305+
}
306+
OBJ_DESTRUCT(&priorjob);
297307
}
298308
rc = opal_hash_table_get_next_key_uint32(orte_job_data, &key, (void **)&jptr, nptr, &nptr);
299309
}
300-
/* pack the number of jobs */
301-
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &numjobs, 1, OPAL_INT32))) {
310+
/* pack the jobdata buffer */
311+
wireup = &jobdata;
312+
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &wireup, 1, OPAL_BUFFER))) {
302313
ORTE_ERROR_LOG(rc);
303314
OBJ_DESTRUCT(&jobdata);
304315
return rc;
305316
}
306-
if (0 < numjobs) {
307-
/* pack the jobdata buffer */
308-
wireup = &jobdata;
309-
if (ORTE_SUCCESS != (rc = opal_dss.pack(buffer, &wireup, 1, OPAL_BUFFER))) {
310-
ORTE_ERROR_LOG(rc);
311-
OBJ_DESTRUCT(&jobdata);
312-
return rc;
313-
}
314-
OBJ_DESTRUCT(&jobdata);
315-
}
317+
OBJ_DESTRUCT(&jobdata);
316318
} else {
317319
flag = 0;
318320
opal_dss.pack(buffer, &flag, 1, OPAL_INT8);
@@ -367,8 +369,8 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer,
367369
orte_job_t *jdata=NULL, *daemons;
368370
orte_node_t *node;
369371
orte_vpid_t dmnvpid, v;
370-
int32_t n, k;
371-
opal_buffer_t *bptr;
372+
int32_t n;
373+
opal_buffer_t *bptr, *jptr;
372374
orte_proc_t *pptr, *dmn;
373375
orte_app_context_t *app;
374376
int8_t flag;
@@ -391,68 +393,69 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer,
391393
}
392394

393395
if (0 != flag) {
394-
/* see if additional jobs are included in the data */
396+
/* unpack the buffer containing the info */
395397
cnt=1;
396-
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &n, &cnt, OPAL_INT32))) {
398+
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &bptr, &cnt, OPAL_BUFFER))) {
397399
*job = ORTE_JOBID_INVALID;
398400
ORTE_ERROR_LOG(rc);
401+
OBJ_RELEASE(bptr);
399402
goto REPORT_ERROR;
400403
}
401-
402-
if (0 < n) {
403-
/* unpack the buffer containing the info */
404+
cnt=1;
405+
while (ORTE_SUCCESS == (rc = opal_dss.unpack(bptr, &jptr, &cnt, OPAL_BUFFER))) {
406+
/* unpack each job and add it to the local orte_job_data array */
404407
cnt=1;
405-
if (ORTE_SUCCESS != (rc = opal_dss.unpack(buffer, &bptr, &cnt, OPAL_BUFFER))) {
408+
if (ORTE_SUCCESS != (rc = opal_dss.unpack(jptr, &jdata, &cnt, ORTE_JOB))) {
406409
*job = ORTE_JOBID_INVALID;
407410
ORTE_ERROR_LOG(rc);
411+
OBJ_RELEASE(bptr);
412+
OBJ_RELEASE(jptr);
408413
goto REPORT_ERROR;
409414
}
410-
for (k=0; k < n; k++) {
411-
/* unpack each job and add it to the local orte_job_data array */
415+
/* check to see if we already have this one */
416+
if (NULL == orte_get_job_data_object(jdata->jobid)) {
417+
/* nope - add it */
418+
opal_hash_table_set_value_uint32(orte_job_data, jdata->jobid, jdata);
419+
} else {
420+
/* yep - so we can drop this copy */
421+
jdata->jobid = ORTE_JOBID_INVALID;
422+
OBJ_RELEASE(jdata);
423+
OBJ_RELEASE(jptr);
424+
cnt=1;
425+
continue;
426+
}
427+
/* unpack the location of each proc in this job */
428+
for (v=0; v < jdata->num_procs; v++) {
429+
if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, v))) {
430+
pptr = OBJ_NEW(orte_proc_t);
431+
pptr->name.jobid = jdata->jobid;
432+
pptr->name.vpid = v;
433+
opal_pointer_array_set_item(jdata->procs, v, pptr);
434+
}
412435
cnt=1;
413-
if (ORTE_SUCCESS != (rc = opal_dss.unpack(bptr, &jdata, &cnt, ORTE_JOB))) {
414-
*job = ORTE_JOBID_INVALID;
436+
if (ORTE_SUCCESS != (rc = opal_dss.unpack(jptr, &dmnvpid, &cnt, ORTE_VPID))) {
415437
ORTE_ERROR_LOG(rc);
438+
OBJ_RELEASE(jptr);
439+
OBJ_RELEASE(bptr);
416440
goto REPORT_ERROR;
417441
}
418-
/* check to see if we already have this one */
419-
if (NULL == orte_get_job_data_object(jdata->jobid)) {
420-
/* nope - add it */
421-
opal_hash_table_set_value_uint32(orte_job_data, jdata->jobid, jdata);
422-
} else {
423-
/* yep - so we can drop this copy */
424-
jdata->jobid = ORTE_JOBID_INVALID;
425-
OBJ_RELEASE(jdata);
426-
continue;
427-
}
428-
/* unpack the location of each proc in this job */
429-
for (v=0; v < jdata->num_procs; v++) {
430-
if (NULL == (pptr = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, v))) {
431-
pptr = OBJ_NEW(orte_proc_t);
432-
pptr->name.jobid = jdata->jobid;
433-
pptr->name.vpid = v;
434-
opal_pointer_array_set_item(jdata->procs, v, pptr);
435-
}
436-
cnt=1;
437-
if (ORTE_SUCCESS != (rc = opal_dss.unpack(bptr, &dmnvpid, &cnt, ORTE_VPID))) {
438-
ORTE_ERROR_LOG(rc);
439-
OBJ_RELEASE(jdata);
440-
goto REPORT_ERROR;
441-
}
442-
/* lookup the daemon */
443-
if (NULL == (dmn = (orte_proc_t*)opal_pointer_array_get_item(daemons->procs, dmnvpid))) {
444-
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
445-
rc = ORTE_ERR_NOT_FOUND;
446-
goto REPORT_ERROR;
447-
}
448-
/* connect the two */
449-
OBJ_RETAIN(dmn->node);
450-
pptr->node = dmn->node;
442+
/* lookup the daemon */
443+
if (NULL == (dmn = (orte_proc_t*)opal_pointer_array_get_item(daemons->procs, dmnvpid))) {
444+
ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
445+
rc = ORTE_ERR_NOT_FOUND;
446+
OBJ_RELEASE(jptr);
447+
OBJ_RELEASE(bptr);
448+
goto REPORT_ERROR;
451449
}
450+
/* connect the two */
451+
OBJ_RETAIN(dmn->node);
452+
pptr->node = dmn->node;
452453
}
453454
/* release the buffer */
454-
OBJ_RELEASE(bptr);
455+
OBJ_RELEASE(jptr);
456+
cnt = 1;
455457
}
458+
OBJ_RELEASE(bptr);
456459
}
457460

458461
/* unpack the job we are to launch */

orte/mca/plm/rsh/plm_rsh_module.c

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
* Copyright (c) 2008-2009 Sun Microsystems, Inc. All rights reserved.
1616
* Copyright (c) 2011-2017 IBM Corporation. All rights reserved.
1717
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
18-
* Copyright (c) 2015-2017 Research Organization for Information Science
18+
* Copyright (c) 2015-2018 Research Organization for Information Science
1919
* and Technology (RIST). All rights reserved.
2020
* $COPYRIGHT$
2121
*
@@ -887,6 +887,10 @@ static int remote_spawn(void)
887887
opal_list_append(&launch_list, &caddy->super);
888888
}
889889
OPAL_LIST_DESTRUCT(&coll);
890+
/* we NEVER use tree-spawn for secondary launches - e.g.,
891+
* due to a dynamic launch requesting add_hosts - so be
892+
* sure to turn it off here */
893+
mca_plm_rsh_component.no_tree_spawn = true;
890894

891895
/* trigger the event to start processing the launch list */
892896
OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output,
@@ -1280,6 +1284,10 @@ static void launch_daemons(int fd, short args, void *cbdata)
12801284
OBJ_RETAIN(caddy->daemon);
12811285
opal_list_append(&launch_list, &caddy->super);
12821286
}
1287+
/* we NEVER use tree-spawn for secondary launches - e.g.,
1288+
* due to a dynamic launch requesting add_hosts - so be
1289+
* sure to turn it off here */
1290+
mca_plm_rsh_component.no_tree_spawn = true;
12831291

12841292
/* set the job state to indicate the daemons are launched */
12851293
state->jdata->state = ORTE_JOB_STATE_DAEMONS_LAUNCHED;

orte/mca/ras/base/ras_base_allocate.c

Lines changed: 30 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,9 @@
1111
* All rights reserved.
1212
* Copyright (c) 2011-2012 Los Alamos National Security, LLC. All rights
1313
* reserved.
14-
* Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
14+
* Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
15+
* Copyright (c) 2018 Research Organization for Information Science
16+
* and Technology (RIST). All rights reserved.
1517
* $COPYRIGHT$
1618
*
1719
* Additional copyrights may follow
@@ -456,9 +458,9 @@ int orte_ras_base_add_hosts(orte_job_t *jdata)
456458
{
457459
int rc;
458460
opal_list_t nodes;
459-
int i;
461+
int i, n;
460462
orte_app_context_t *app;
461-
orte_node_t *node;
463+
orte_node_t *node, *next, *nptr;
462464
char *hosts;
463465

464466
/* construct a list to hold the results */
@@ -532,19 +534,35 @@ int orte_ras_base_add_hosts(orte_job_t *jdata)
532534

533535
/* if something was found, we add that to our global pool */
534536
if (!opal_list_is_empty(&nodes)) {
535-
/* mark all the nodes as "added" */
536-
OPAL_LIST_FOREACH(node, &nodes, orte_node_t) {
537+
/* the node insert code doesn't check for uniqueness, so we will
538+
* do so here - yes, this is an ugly, non-scalable loop, but this
539+
* is the exception case and so we can do it here */
540+
OPAL_LIST_FOREACH_SAFE(node, next, &nodes, orte_node_t) {
537541
node->state = ORTE_NODE_STATE_ADDED;
542+
for (n=0; n < orte_node_pool->size; n++) {
543+
if (NULL == (nptr = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, n))) {
544+
continue;
545+
}
546+
if (0 == strcmp(node->name, nptr->name)) {
547+
opal_list_remove_item(&nodes, &node->super);
548+
OBJ_RELEASE(node);
549+
break;
550+
}
551+
}
538552
}
539-
/* store the results in the global resource pool - this removes the
540-
* list items
541-
*/
542-
if (ORTE_SUCCESS != (rc = orte_ras_base_node_insert(&nodes, jdata))) {
543-
ORTE_ERROR_LOG(rc);
553+
if (!opal_list_is_empty(&nodes)) {
554+
/* store the results in the global resource pool - this removes the
555+
* list items
556+
*/
557+
if (ORTE_SUCCESS != (rc = orte_ras_base_node_insert(&nodes, jdata))) {
558+
ORTE_ERROR_LOG(rc);
559+
}
560+
/* mark that an updated nidmap must be communicated to existing daemons */
561+
orte_nidmap_communicated = false;
544562
}
545-
/* cleanup */
546-
OBJ_DESTRUCT(&nodes);
547563
}
564+
/* cleanup */
565+
OPAL_LIST_DESTRUCT(&nodes);
548566

549567
/* shall we display the results? */
550568
if (0 < opal_output_get_verbosity(orte_ras_base_framework.framework_output)) {

orte/test/mpi/add_host.c

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,21 @@ int main(int argc, char* argv[])
4747
}
4848
MPI_Comm_disconnect(&child);
4949
printf("Parent disconnected\n");
50+
/* do it again */
51+
MPI_Info_set(info, "add-host", "rhc003:24");
52+
if (MPI_SUCCESS != (rc = MPI_Comm_spawn(argv[0], MPI_ARGV_NULL, 3, info,
53+
0, MPI_COMM_WORLD, &child, MPI_ERRCODES_IGNORE))) {
54+
printf("Child failed to spawn\n");
55+
return rc;
56+
}
57+
printf("Parent done with second spawn\n");
58+
if (0 == rank) {
59+
msg = 38;
60+
printf("Parent sending message to second children\n");
61+
MPI_Send(&msg, 1, MPI_INT, 0, 1, child);
62+
}
63+
MPI_Comm_disconnect(&child);
64+
printf("Parent disconnected again\n");
5065
}
5166
/* Otherwise, we're the child */
5267
else {

0 commit comments

Comments
 (0)