Skip to content

Commit 0bcf2e5

Browse files
committed
Fix typo and protect the autotuning code.
Remove outdated autotuning codes in HAN. Signed-off-by: Xi Luo <[email protected]> Signed-off-by: George Bosilca <[email protected]>
1 parent 0794048 commit 0bcf2e5

File tree

9 files changed

+83
-284
lines changed

9 files changed

+83
-284
lines changed

ompi/mca/coll/han/coll_han.h

Lines changed: 42 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -33,14 +33,6 @@ BEGIN_C_DECLS
3333
#define COLL_HAN_LOW_MODULES 2
3434
#define COLL_HAN_UP_MODULES 2
3535

36-
typedef struct {
37-
uint32_t umod;
38-
uint32_t lmod;
39-
uint32_t fs;
40-
uint32_t ualg;
41-
uint32_t us;
42-
} selection;
43-
4436
struct mca_bcast_argu_s {
4537
mca_coll_task_t *cur_task;
4638
void *buff;
@@ -203,21 +195,10 @@ typedef struct mca_coll_han_component_t {
203195
uint32_t han_scatter_up_module;
204196
/* low level module for scatter */
205197
uint32_t han_scatter_low_module;
206-
/* whether enable auto tune */
207-
uint32_t han_auto_tune;
208198
/* whether we need reproducible results
209199
* (but disables topological optimisations)
210200
*/
211201
uint32_t han_reproducible;
212-
/* create a 3D array
213-
* num_processes (n): 2 4 8 16 32 64 (6)
214-
* num_core (c): 2 4 8 12 (4)
215-
* message size (m): 1 - 4194304 (23)
216-
*/
217-
uint32_t han_auto_tune_n;
218-
uint32_t han_auto_tune_c;
219-
uint32_t han_auto_tune_m;
220-
selection *han_auto_tuned;
221202
bool use_simple_algorithm[COLLCOUNT];
222203

223204
/* Dynamic configuration rules */
@@ -342,9 +323,6 @@ int *mca_coll_han_topo_init(struct ompi_communicator_t *comm, mca_coll_han_modul
342323
/* Utils */
343324
void mca_coll_han_get_ranks(int *vranks, int root, int low_size, int *root_low_rank,
344325
int *root_up_rank);
345-
uint32_t han_auto_tuned_get_n(uint32_t n);
346-
uint32_t han_auto_tuned_get_c(uint32_t c);
347-
uint32_t han_auto_tuned_get_m(uint32_t m);
348326

349327
const char* mca_coll_han_colltype_to_str(COLLTYPE_T coll);
350328
const char* mca_coll_han_topo_lvl_to_str(TOPO_LVL_T topo_lvl);
@@ -382,11 +360,11 @@ mca_coll_han_scatter_intra_dynamic(SCATTER_BASE_ARGS,
382360

383361
/* Bcast */
384362
int mca_coll_han_bcast_intra_simple(void *buff,
385-
int count,
386-
struct ompi_datatype_t *dtype,
387-
int root,
388-
struct ompi_communicator_t *comm,
389-
mca_coll_base_module_t *module);
363+
int count,
364+
struct ompi_datatype_t *dtype,
365+
int root,
366+
struct ompi_communicator_t *comm,
367+
mca_coll_base_module_t *module);
390368
void mac_coll_han_set_bcast_argu(mca_bcast_argu_t * argu, mca_coll_task_t * cur_task, void *buff,
391369
int seg_count, struct ompi_datatype_t *dtype,
392370
int root_up_rank, int root_low_rank,
@@ -449,23 +427,23 @@ int mca_coll_han_reduce_t1_task(void *task_argu);
449427
/* Allreduce */
450428
int
451429
mca_coll_han_allreduce_intra_simple(const void *sbuf,
452-
void *rbuf,
453-
int count,
454-
struct ompi_datatype_t *dtype,
455-
struct ompi_op_t *op,
456-
struct ompi_communicator_t *comm,
457-
mca_coll_base_module_t *module);
430+
void *rbuf,
431+
int count,
432+
struct ompi_datatype_t *dtype,
433+
struct ompi_op_t *op,
434+
struct ompi_communicator_t *comm,
435+
mca_coll_base_module_t *module);
458436
int
459437
mca_coll_han_allreduce_reproducible_decision(struct ompi_communicator_t *comm,
460438
mca_coll_base_module_t *module);
461439
int
462440
mca_coll_han_allreduce_reproducible(const void *sbuf,
463441
void *rbuf,
464-
int count,
465-
struct ompi_datatype_t *dtype,
466-
struct ompi_op_t *op,
467-
struct ompi_communicator_t *comm,
468-
mca_coll_base_module_t *module);
442+
int count,
443+
struct ompi_datatype_t *dtype,
444+
struct ompi_op_t *op,
445+
struct ompi_communicator_t *comm,
446+
mca_coll_base_module_t *module);
469447

470448
void mac_coll_han_set_allreduce_argu(mca_allreduce_argu_t * argu,
471449
mca_coll_task_t * cur_task,
@@ -497,11 +475,11 @@ int mca_coll_han_allreduce_t3_task(void *task_argu);
497475
/* Scatter */
498476
int
499477
mca_coll_han_scatter_intra(const void *sbuf, int scount,
500-
struct ompi_datatype_t *sdtype,
501-
void *rbuf, int rcount,
502-
struct ompi_datatype_t *rdtype,
503-
int root,
504-
struct ompi_communicator_t *comm, mca_coll_base_module_t * module);
478+
struct ompi_datatype_t *sdtype,
479+
void *rbuf, int rcount,
480+
struct ompi_datatype_t *rdtype,
481+
int root,
482+
struct ompi_communicator_t *comm, mca_coll_base_module_t * module);
505483
int mca_coll_han_scatter_us_task(void *task_argu);
506484
int mca_coll_han_scatter_ls_task(void *task_argu);
507485
void mac_coll_han_set_scatter_argu(mca_scatter_argu_t * argu,
@@ -524,11 +502,11 @@ void mac_coll_han_set_scatter_argu(mca_scatter_argu_t * argu,
524502
/* Gather */
525503
int
526504
mca_coll_han_gather_intra(const void *sbuf, int scount,
527-
struct ompi_datatype_t *sdtype,
528-
void *rbuf, int rcount,
529-
struct ompi_datatype_t *rdtype,
530-
int root,
531-
struct ompi_communicator_t *comm, mca_coll_base_module_t * module);
505+
struct ompi_datatype_t *sdtype,
506+
void *rbuf, int rcount,
507+
struct ompi_datatype_t *rdtype,
508+
int root,
509+
struct ompi_communicator_t *comm, mca_coll_base_module_t * module);
532510
int mca_coll_han_gather_lg_task(void *task_argu);
533511
int mca_coll_han_gather_ug_task(void *task_argu);
534512
void mac_coll_han_set_gather_argu(mca_gather_argu_t * argu,
@@ -548,19 +526,19 @@ void mac_coll_han_set_gather_argu(mca_gather_argu_t * argu,
548526
int w_rank, bool noop, ompi_request_t * req);
549527
int
550528
mca_coll_han_gather_intra_simple(const void *sbuf, int scount,
551-
struct ompi_datatype_t *sdtype,
552-
void *rbuf, int rcount,
553-
struct ompi_datatype_t *rdtype,
554-
int root,
555-
struct ompi_communicator_t *comm,
556-
mca_coll_base_module_t *module);
529+
struct ompi_datatype_t *sdtype,
530+
void *rbuf, int rcount,
531+
struct ompi_datatype_t *rdtype,
532+
int root,
533+
struct ompi_communicator_t *comm,
534+
mca_coll_base_module_t *module);
557535
/* reordering after gather, for unordered ranks */
558536
void
559537
ompi_coll_han_reorder_gather(const void *sbuf,
560-
void *rbuf, int rcount,
561-
struct ompi_datatype_t *rdtype,
562-
struct ompi_communicator_t *comm,
563-
int * topo);
538+
void *rbuf, int rcount,
539+
struct ompi_datatype_t *rdtype,
540+
struct ompi_communicator_t *comm,
541+
int * topo);
564542

565543

566544

@@ -590,11 +568,12 @@ void mac_coll_han_set_allgather_argu(mca_allgather_argu_t * argu,
590568
bool noop, bool is_mapbycore, int *topo, ompi_request_t * req);
591569
int
592570
mca_coll_han_allgather_intra_simple(const void *sbuf, int scount,
593-
struct ompi_datatype_t *sdtype,
594-
void* rbuf, int rcount,
595-
struct ompi_datatype_t *rdtype,
596-
struct ompi_communicator_t *comm,
597-
mca_coll_base_module_t *module);
571+
struct ompi_datatype_t *sdtype,
572+
void* rbuf, int rcount,
573+
struct ompi_datatype_t *rdtype,
574+
struct ompi_communicator_t *comm,
575+
mca_coll_base_module_t *module);
598576

599577
END_C_DECLS
578+
600579
#endif /* MCA_COLL_HAN_EXPORT_H */

ompi/mca/coll/han/coll_han_allgather.c

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,7 @@ mca_coll_han_allgather_intra_simple(const void *sbuf, int scount,
285285
ptrdiff_t rsize, rgap = 0;
286286
/* Compute the size to receive all the local data, including datatypes empty gaps */
287287
rsize = opal_datatype_span(&rdtype->super, (int64_t)rcount * low_size, &rgap);
288-
// intermediary buffer on node leaders to gather on low comm
288+
/* intermediary buffer on node leaders to gather on low comm */
289289
tmp_buf = (char *) malloc(rsize);
290290
tmp_buf_start = tmp_buf - rgap;
291291
}
@@ -297,17 +297,18 @@ mca_coll_han_allgather_intra_simple(const void *sbuf, int scount,
297297
/* 2. allgather between node leaders, from tmp_buf to reorder_buf */
298298
if (low_rank == root_low_rank) {
299299
/* allocate buffer to store unordered result on node leaders
300-
* * if the processes are mapped-by core, no need to reorder:
301-
* * distribution of ranks on core first and node next,
302-
* * in a increasing order for both patterns */
300+
* if the processes are mapped-by core, no need to reorder:
301+
* distribution of ranks on core first and node next,
302+
* in a increasing order for both patterns.
303+
*/
303304
char *reorder_buf = NULL;
304305
char *reorder_buf_start = NULL;
305306
if (han_module->is_mapbycore) {
306307
reorder_buf_start = rbuf;
307308
} else {
308309
if (0 == low_rank && 0 == up_rank) { // first rank displays message
309310
OPAL_OUTPUT_VERBOSE((30, mca_coll_han_component.han_output,
310-
"[%d]: Future Allgather needs reordering: ", w_rank));
311+
"[%d]: Future Allgather needs reordering: ", up_rank));
311312
}
312313
ptrdiff_t rsize, rgap = 0;
313314
rsize = opal_datatype_span(&rdtype->super, (int64_t)rcount * low_size * up_size, &rgap);

ompi/mca/coll/han/coll_han_allreduce.c

Lines changed: 6 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -103,49 +103,12 @@ mca_coll_han_allreduce_intra(const void *sbuf,
103103
mca_coll_han_comm_create(comm, han_module);
104104
ompi_communicator_t *low_comm;
105105
ompi_communicator_t *up_comm;
106-
/* Auto tune is enabled */
107-
if (mca_coll_han_component.han_auto_tune && mca_coll_han_component.han_auto_tuned != NULL) {
108-
uint32_t n = han_auto_tuned_get_n(ompi_comm_size(han_module->cached_up_comms[0]));
109-
uint32_t c = han_auto_tuned_get_c(ompi_comm_size(han_module->cached_low_comms[0]));
110-
uint32_t m = han_auto_tuned_get_m(typelng * count);
111-
uint32_t id =
112-
n * mca_coll_han_component.han_auto_tune_c * mca_coll_han_component.han_auto_tune_m +
113-
c * mca_coll_han_component.han_auto_tune_m + m +
114-
mca_coll_han_component.han_auto_tune_n * mca_coll_han_component.han_auto_tune_c *
115-
mca_coll_han_component.han_auto_tune_m;
116-
uint32_t umod = mca_coll_han_component.han_auto_tuned[id].umod;
117-
uint32_t lmod = mca_coll_han_component.han_auto_tuned[id].lmod;
118-
uint32_t fs = mca_coll_han_component.han_auto_tuned[id].fs;
119-
/* ualg and us are only available when using ADAPT */
120-
/*
121-
uint32_t ualg = mca_coll_han_component.han_auto_tuned[id].ualg;
122-
uint32_t us = mca_coll_han_component.han_auto_tuned[id].us;
123-
*/
124-
/* Set up umod */
125-
up_comm = han_module->cached_up_comms[umod];
126-
/* Set up lmod */
127-
low_comm = han_module->cached_low_comms[lmod];
128-
/* Set up fs */
129-
COLL_BASE_COMPUTED_SEGCOUNT((size_t) fs, typelng, seg_count);
130-
/* Set up ualg and us, which is only available when using ADAPT */
131-
/*
132-
if (umod == 1) {
133-
((mca_coll_adapt_module_t *) (up_comm->c_coll->coll_ibcast_module))->adapt_component->
134-
adapt_ibcast_algorithm = ualg;
135-
((mca_coll_adapt_module_t *) (up_comm->c_coll->coll_ibcast_module))->adapt_component->
136-
adapt_ibcast_algorithm = ualg;
137-
((mca_coll_adapt_module_t *) (up_comm->c_coll->coll_ibcast_module))->adapt_component->
138-
adapt_ibcast_segment_size = us;
139-
((mca_coll_adapt_module_t *) (up_comm->c_coll->coll_ibcast_module))->adapt_component->
140-
adapt_ibcast_segment_size = us;
141-
}
142-
*/
143-
} else {
144-
low_comm = han_module->cached_low_comms[mca_coll_han_component.han_bcast_low_module];
145-
up_comm = han_module->cached_up_comms[mca_coll_han_component.han_bcast_up_module];
146-
COLL_BASE_COMPUTED_SEGCOUNT(mca_coll_han_component.han_allreduce_segsize, typelng,
147-
seg_count);
148-
}
106+
107+
/* use MCA parameters for now */
108+
low_comm = han_module->cached_low_comms[mca_coll_han_component.han_bcast_low_module];
109+
up_comm = han_module->cached_up_comms[mca_coll_han_component.han_bcast_up_module];
110+
COLL_BASE_COMPUTED_SEGCOUNT(mca_coll_han_component.han_allreduce_segsize, typelng,
111+
seg_count);
149112

150113
/* Determine number of elements sent per task. */
151114
OPAL_OUTPUT_VERBOSE((10, mca_coll_han_component.han_output,

ompi/mca/coll/han/coll_han_bcast.c

Lines changed: 5 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -83,45 +83,12 @@ mca_coll_han_bcast_intra(void *buff,
8383
mca_coll_han_comm_create(comm, han_module);
8484
ompi_communicator_t *low_comm;
8585
ompi_communicator_t *up_comm;
86-
/* Auto tune is enabled */
87-
if (mca_coll_han_component.han_auto_tune && mca_coll_han_component.han_auto_tuned != NULL) {
88-
uint32_t n = han_auto_tuned_get_n(ompi_comm_size(han_module->cached_up_comms[0]));
89-
uint32_t c = han_auto_tuned_get_c(ompi_comm_size(han_module->cached_low_comms[0]));
90-
uint32_t m = han_auto_tuned_get_m(typelng * count);
91-
uint32_t id =
92-
n * mca_coll_han_component.han_auto_tune_c * mca_coll_han_component.han_auto_tune_m +
93-
c * mca_coll_han_component.han_auto_tune_m + m;
94-
uint32_t umod = mca_coll_han_component.han_auto_tuned[id].umod;
95-
uint32_t lmod = mca_coll_han_component.han_auto_tuned[id].lmod;
96-
uint32_t fs = mca_coll_han_component.han_auto_tuned[id].fs;
97-
/* ualg and us are only available when using ADAPT */
98-
/*
99-
uint32_t ualg = mca_coll_han_component.han_auto_tuned[id].ualg;
100-
uint32_t us = mca_coll_han_component.han_auto_tuned[id].us;
101-
*/
102-
/* Set up umod */
103-
up_comm = han_module->cached_up_comms[umod];
104-
/* Set up lmod */
105-
low_comm = han_module->cached_low_comms[lmod];
106-
/* Set up fs */
107-
COLL_BASE_COMPUTED_SEGCOUNT((size_t) fs, typelng, seg_count);
108-
/* Set up ualg and us, which is only available when using ADAPT */
109-
/*
110-
if (umod == 1) {
111-
((mca_coll_adapt_module_t *) (up_comm->c_coll->coll_ibcast_module))->adapt_component->
112-
adapt_ibcast_algorithm = ualg;
113-
((mca_coll_adapt_module_t *) (up_comm->c_coll->coll_ibcast_module))->adapt_component->
114-
adapt_ibcast_segment_size = us;
115-
}
116-
*/
11786

118-
} else {
119-
/* If auto tune is disabled, use MCA parameters */
120-
low_comm = han_module->cached_low_comms[mca_coll_han_component.han_bcast_low_module];
121-
up_comm = han_module->cached_up_comms[mca_coll_han_component.han_bcast_up_module];
122-
COLL_BASE_COMPUTED_SEGCOUNT(mca_coll_han_component.han_bcast_segsize, typelng,
123-
seg_count);
124-
}
87+
/* use MCA parameters for now */
88+
low_comm = han_module->cached_low_comms[mca_coll_han_component.han_bcast_low_module];
89+
up_comm = han_module->cached_up_comms[mca_coll_han_component.han_bcast_up_module];
90+
COLL_BASE_COMPUTED_SEGCOUNT(mca_coll_han_component.han_bcast_segsize, typelng,
91+
seg_count);
12592

12693
int num_segments = (count + seg_count - 1) / seg_count;
12794
OPAL_OUTPUT_VERBOSE((20, mca_coll_han_component.han_output,

0 commit comments

Comments
 (0)