@@ -109,9 +109,9 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *buffer,
109
109
int rc , v ;
110
110
orte_job_t * jdata = NULL , * jptr ;
111
111
orte_job_map_t * map = NULL ;
112
- opal_buffer_t * wireup , jobdata ;
112
+ opal_buffer_t * wireup , jobdata , priorjob ;
113
113
opal_byte_object_t bo , * boptr ;
114
- int32_t numbytes , numjobs ;
114
+ int32_t numbytes ;
115
115
int8_t flag ;
116
116
void * nptr ;
117
117
uint32_t key ;
@@ -270,49 +270,51 @@ int orte_odls_base_default_get_add_procs_data(opal_buffer_t *buffer,
270
270
flag = 1 ;
271
271
opal_dss .pack (buffer , & flag , 1 , OPAL_INT8 );
272
272
OBJ_CONSTRUCT (& jobdata , opal_buffer_t );
273
- numjobs = 0 ;
274
273
rc = opal_hash_table_get_first_key_uint32 (orte_job_data , & key , (void * * )& jptr , & nptr );
275
274
while (OPAL_SUCCESS == rc ) {
276
275
/* skip the one we are launching now */
277
276
if (NULL != jptr && jptr != jdata &&
278
277
ORTE_PROC_MY_NAME -> jobid != jptr -> jobid ) {
278
+ OBJ_CONSTRUCT (& priorjob , opal_buffer_t );
279
279
/* pack the job struct */
280
- if (ORTE_SUCCESS != (rc = opal_dss .pack (& jobdata , & jptr , 1 , ORTE_JOB ))) {
280
+ if (ORTE_SUCCESS != (rc = opal_dss .pack (& priorjob , & jptr , 1 , ORTE_JOB ))) {
281
281
ORTE_ERROR_LOG (rc );
282
282
OBJ_DESTRUCT (& jobdata );
283
+ OBJ_DESTRUCT (& priorjob );
283
284
return rc ;
284
285
}
285
286
/* pack the location of each proc */
286
287
for (n = 0 ; n < jptr -> procs -> size ; n ++ ) {
287
288
if (NULL == (proc = (orte_proc_t * )opal_pointer_array_get_item (jptr -> procs , n ))) {
288
289
continue ;
289
290
}
290
- if (ORTE_SUCCESS != (rc = opal_dss .pack (& jobdata , & proc -> parent , 1 , ORTE_VPID ))) {
291
+ if (ORTE_SUCCESS != (rc = opal_dss .pack (& priorjob , & proc -> parent , 1 , ORTE_VPID ))) {
291
292
ORTE_ERROR_LOG (rc );
292
293
OBJ_DESTRUCT (& jobdata );
294
+ OBJ_DESTRUCT (& priorjob );
293
295
return rc ;
294
296
}
295
297
}
296
- ++ numjobs ;
298
+ /* pack the jobdata buffer */
299
+ wireup = & priorjob ;
300
+ if (ORTE_SUCCESS != (rc = opal_dss .pack (& jobdata , & wireup , 1 , OPAL_BUFFER ))) {
301
+ ORTE_ERROR_LOG (rc );
302
+ OBJ_DESTRUCT (& jobdata );
303
+ OBJ_DESTRUCT (& priorjob );
304
+ return rc ;
305
+ }
306
+ OBJ_DESTRUCT (& priorjob );
297
307
}
298
308
rc = opal_hash_table_get_next_key_uint32 (orte_job_data , & key , (void * * )& jptr , nptr , & nptr );
299
309
}
300
- /* pack the number of jobs */
301
- if (ORTE_SUCCESS != (rc = opal_dss .pack (buffer , & numjobs , 1 , OPAL_INT32 ))) {
310
+ /* pack the jobdata buffer */
311
+ wireup = & jobdata ;
312
+ if (ORTE_SUCCESS != (rc = opal_dss .pack (buffer , & wireup , 1 , OPAL_BUFFER ))) {
302
313
ORTE_ERROR_LOG (rc );
303
314
OBJ_DESTRUCT (& jobdata );
304
315
return rc ;
305
316
}
306
- if (0 < numjobs ) {
307
- /* pack the jobdata buffer */
308
- wireup = & jobdata ;
309
- if (ORTE_SUCCESS != (rc = opal_dss .pack (buffer , & wireup , 1 , OPAL_BUFFER ))) {
310
- ORTE_ERROR_LOG (rc );
311
- OBJ_DESTRUCT (& jobdata );
312
- return rc ;
313
- }
314
- OBJ_DESTRUCT (& jobdata );
315
- }
317
+ OBJ_DESTRUCT (& jobdata );
316
318
} else {
317
319
flag = 0 ;
318
320
opal_dss .pack (buffer , & flag , 1 , OPAL_INT8 );
@@ -367,8 +369,8 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer,
367
369
orte_job_t * jdata = NULL , * daemons ;
368
370
orte_node_t * node ;
369
371
orte_vpid_t dmnvpid , v ;
370
- int32_t n , k ;
371
- opal_buffer_t * bptr ;
372
+ int32_t n ;
373
+ opal_buffer_t * bptr , * jptr ;
372
374
orte_proc_t * pptr , * dmn ;
373
375
orte_app_context_t * app ;
374
376
int8_t flag ;
@@ -391,68 +393,69 @@ int orte_odls_base_default_construct_child_list(opal_buffer_t *buffer,
391
393
}
392
394
393
395
if (0 != flag ) {
394
- /* see if additional jobs are included in the data */
396
+ /* unpack the buffer containing the info */
395
397
cnt = 1 ;
396
- if (ORTE_SUCCESS != (rc = opal_dss .unpack (buffer , & n , & cnt , OPAL_INT32 ))) {
398
+ if (ORTE_SUCCESS != (rc = opal_dss .unpack (buffer , & bptr , & cnt , OPAL_BUFFER ))) {
397
399
* job = ORTE_JOBID_INVALID ;
398
400
ORTE_ERROR_LOG (rc );
401
+ OBJ_RELEASE (bptr );
399
402
goto REPORT_ERROR ;
400
403
}
401
-
402
- if ( 0 < n ) {
403
- /* unpack the buffer containing the info */
404
+ cnt = 1 ;
405
+ while ( ORTE_SUCCESS == ( rc = opal_dss . unpack ( bptr , & jptr , & cnt , OPAL_BUFFER )) ) {
406
+ /* unpack each job and add it to the local orte_job_data array */
404
407
cnt = 1 ;
405
- if (ORTE_SUCCESS != (rc = opal_dss .unpack (buffer , & bptr , & cnt , OPAL_BUFFER ))) {
408
+ if (ORTE_SUCCESS != (rc = opal_dss .unpack (jptr , & jdata , & cnt , ORTE_JOB ))) {
406
409
* job = ORTE_JOBID_INVALID ;
407
410
ORTE_ERROR_LOG (rc );
411
+ OBJ_RELEASE (bptr );
412
+ OBJ_RELEASE (jptr );
408
413
goto REPORT_ERROR ;
409
414
}
410
- for (k = 0 ; k < n ; k ++ ) {
411
- /* unpack each job and add it to the local orte_job_data array */
415
+ /* check to see if we already have this one */
416
+ if (NULL == orte_get_job_data_object (jdata -> jobid )) {
417
+ /* nope - add it */
418
+ opal_hash_table_set_value_uint32 (orte_job_data , jdata -> jobid , jdata );
419
+ } else {
420
+ /* yep - so we can drop this copy */
421
+ jdata -> jobid = ORTE_JOBID_INVALID ;
422
+ OBJ_RELEASE (jdata );
423
+ OBJ_RELEASE (jptr );
424
+ cnt = 1 ;
425
+ continue ;
426
+ }
427
+ /* unpack the location of each proc in this job */
428
+ for (v = 0 ; v < jdata -> num_procs ; v ++ ) {
429
+ if (NULL == (pptr = (orte_proc_t * )opal_pointer_array_get_item (jdata -> procs , v ))) {
430
+ pptr = OBJ_NEW (orte_proc_t );
431
+ pptr -> name .jobid = jdata -> jobid ;
432
+ pptr -> name .vpid = v ;
433
+ opal_pointer_array_set_item (jdata -> procs , v , pptr );
434
+ }
412
435
cnt = 1 ;
413
- if (ORTE_SUCCESS != (rc = opal_dss .unpack (bptr , & jdata , & cnt , ORTE_JOB ))) {
414
- * job = ORTE_JOBID_INVALID ;
436
+ if (ORTE_SUCCESS != (rc = opal_dss .unpack (jptr , & dmnvpid , & cnt , ORTE_VPID ))) {
415
437
ORTE_ERROR_LOG (rc );
438
+ OBJ_RELEASE (jptr );
439
+ OBJ_RELEASE (bptr );
416
440
goto REPORT_ERROR ;
417
441
}
418
- /* check to see if we already have this one */
419
- if (NULL == orte_get_job_data_object (jdata -> jobid )) {
420
- /* nope - add it */
421
- opal_hash_table_set_value_uint32 (orte_job_data , jdata -> jobid , jdata );
422
- } else {
423
- /* yep - so we can drop this copy */
424
- jdata -> jobid = ORTE_JOBID_INVALID ;
425
- OBJ_RELEASE (jdata );
426
- continue ;
427
- }
428
- /* unpack the location of each proc in this job */
429
- for (v = 0 ; v < jdata -> num_procs ; v ++ ) {
430
- if (NULL == (pptr = (orte_proc_t * )opal_pointer_array_get_item (jdata -> procs , v ))) {
431
- pptr = OBJ_NEW (orte_proc_t );
432
- pptr -> name .jobid = jdata -> jobid ;
433
- pptr -> name .vpid = v ;
434
- opal_pointer_array_set_item (jdata -> procs , v , pptr );
435
- }
436
- cnt = 1 ;
437
- if (ORTE_SUCCESS != (rc = opal_dss .unpack (bptr , & dmnvpid , & cnt , ORTE_VPID ))) {
438
- ORTE_ERROR_LOG (rc );
439
- OBJ_RELEASE (jdata );
440
- goto REPORT_ERROR ;
441
- }
442
- /* lookup the daemon */
443
- if (NULL == (dmn = (orte_proc_t * )opal_pointer_array_get_item (daemons -> procs , dmnvpid ))) {
444
- ORTE_ERROR_LOG (ORTE_ERR_NOT_FOUND );
445
- rc = ORTE_ERR_NOT_FOUND ;
446
- goto REPORT_ERROR ;
447
- }
448
- /* connect the two */
449
- OBJ_RETAIN (dmn -> node );
450
- pptr -> node = dmn -> node ;
442
+ /* lookup the daemon */
443
+ if (NULL == (dmn = (orte_proc_t * )opal_pointer_array_get_item (daemons -> procs , dmnvpid ))) {
444
+ ORTE_ERROR_LOG (ORTE_ERR_NOT_FOUND );
445
+ rc = ORTE_ERR_NOT_FOUND ;
446
+ OBJ_RELEASE (jptr );
447
+ OBJ_RELEASE (bptr );
448
+ goto REPORT_ERROR ;
451
449
}
450
+ /* connect the two */
451
+ OBJ_RETAIN (dmn -> node );
452
+ pptr -> node = dmn -> node ;
452
453
}
453
454
/* release the buffer */
454
- OBJ_RELEASE (bptr );
455
+ OBJ_RELEASE (jptr );
456
+ cnt = 1 ;
455
457
}
458
+ OBJ_RELEASE (bptr );
456
459
}
457
460
458
461
/* unpack the job we are to launch */
0 commit comments