Skip to content

Commit 7bcc504

Browse files
dycz0fxbosilca
authored andcommitted
Remove outdated autotuning codes in HAN.
Signed-off-by: Xi Luo <[email protected]> Signed-off-by: George Bosilca <[email protected]>
1 parent 7a15cfa commit 7bcc504

File tree

5 files changed

+12
-203
lines changed

5 files changed

+12
-203
lines changed

ompi/mca/coll/han/coll_han.h

Lines changed: 0 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,6 @@
2424

2525
BEGIN_C_DECLS
2626

27-
/**
28-
* Auto-tuning is disabled for now.
29-
*/
30-
#define OMPI_MCA_COLL_HAN_AUTO_TUNE 0
31-
3227
/*
3328
* Today;
3429
* . only 2 modules available for intranode (low) level
@@ -38,14 +33,6 @@ BEGIN_C_DECLS
3833
#define COLL_HAN_LOW_MODULES 2
3934
#define COLL_HAN_UP_MODULES 2
4035

41-
typedef struct {
42-
uint32_t umod;
43-
uint32_t lmod;
44-
uint32_t fs;
45-
uint32_t ualg;
46-
uint32_t us;
47-
} selection;
48-
4936
struct mca_bcast_argu_s {
5037
mca_coll_task_t *cur_task;
5138
void *buff;
@@ -212,20 +199,6 @@ typedef struct mca_coll_han_component_t {
212199
* (but disables topological optimisations)
213200
*/
214201
uint32_t han_reproducible;
215-
#if OMPI_MCA_COLL_HAN_AUTO_TUNE
216-
/* whether enable auto tune */
217-
uint32_t han_auto_tune;
218-
/* create a 3D array
219-
* num_processes (n): 2 4 8 16 32 64 (6)
220-
* num_core (c): 2 4 8 12 (4)
221-
* message size (m): 1 - 4194304 (23)
222-
*/
223-
uint32_t han_auto_tune_n;
224-
uint32_t han_auto_tune_c;
225-
uint32_t han_auto_tune_m;
226-
char* han_auto_tune_filename;
227-
selection* han_auto_tuned;
228-
#endif /* OMPI_MCA_COLL_HAN_AUTO_TUNE */
229202
bool use_simple_algorithm[COLLCOUNT];
230203

231204
/* Dynamic configuration rules */
@@ -350,9 +323,6 @@ int *mca_coll_han_topo_init(struct ompi_communicator_t *comm, mca_coll_han_modul
350323
/* Utils */
351324
void mca_coll_han_get_ranks(int *vranks, int root, int low_size, int *root_low_rank,
352325
int *root_up_rank);
353-
uint32_t han_auto_tuned_get_n(uint32_t n);
354-
uint32_t han_auto_tuned_get_c(uint32_t c);
355-
uint32_t han_auto_tuned_get_m(uint32_t m);
356326

357327
const char* mca_coll_han_colltype_to_str(COLLTYPE_T coll);
358328
const char* mca_coll_han_topo_lvl_to_str(TOPO_LVL_T topo_lvl);

ompi/mca/coll/han/coll_han_allreduce.c

Lines changed: 6 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -103,52 +103,12 @@ mca_coll_han_allreduce_intra(const void *sbuf,
103103
mca_coll_han_comm_create(comm, han_module);
104104
ompi_communicator_t *low_comm;
105105
ompi_communicator_t *up_comm;
106-
#if OMPI_MCA_COLL_HAN_AUTO_TUNE
107-
/* Auto tune is enabled */
108-
if (mca_coll_han_component.han_auto_tune && mca_coll_han_component.han_auto_tuned != NULL) {
109-
uint32_t n = han_auto_tuned_get_n(ompi_comm_size(han_module->cached_up_comms[0]));
110-
uint32_t c = han_auto_tuned_get_c(ompi_comm_size(han_module->cached_low_comms[0]));
111-
uint32_t m = han_auto_tuned_get_m(typelng * count);
112-
uint32_t id =
113-
n * mca_coll_han_component.han_auto_tune_c * mca_coll_han_component.han_auto_tune_m +
114-
c * mca_coll_han_component.han_auto_tune_m + m +
115-
mca_coll_han_component.han_auto_tune_n * mca_coll_han_component.han_auto_tune_c *
116-
mca_coll_han_component.han_auto_tune_m;
117-
uint32_t umod = mca_coll_han_component.han_auto_tuned[id].umod;
118-
uint32_t lmod = mca_coll_han_component.han_auto_tuned[id].lmod;
119-
uint32_t fs = mca_coll_han_component.han_auto_tuned[id].fs;
120-
/* ualg and us are only available when using ADAPT */
121-
/*
122-
uint32_t ualg = mca_coll_han_component.han_auto_tuned[id].ualg;
123-
uint32_t us = mca_coll_han_component.han_auto_tuned[id].us;
124-
*/
125-
/* Set up umod */
126-
up_comm = han_module->cached_up_comms[umod];
127-
/* Set up lmod */
128-
low_comm = han_module->cached_low_comms[lmod];
129-
/* Set up fs */
130-
COLL_BASE_COMPUTED_SEGCOUNT((size_t) fs, typelng, seg_count);
131-
/* Set up ualg and us, which is only available when using ADAPT */
132-
/*
133-
if (umod == 1) {
134-
((mca_coll_adapt_module_t *) (up_comm->c_coll->coll_ibcast_module))->adapt_component->
135-
adapt_ibcast_algorithm = ualg;
136-
((mca_coll_adapt_module_t *) (up_comm->c_coll->coll_ibcast_module))->adapt_component->
137-
adapt_ibcast_algorithm = ualg;
138-
((mca_coll_adapt_module_t *) (up_comm->c_coll->coll_ibcast_module))->adapt_component->
139-
adapt_ibcast_segment_size = us;
140-
((mca_coll_adapt_module_t *) (up_comm->c_coll->coll_ibcast_module))->adapt_component->
141-
adapt_ibcast_segment_size = us;
142-
}
143-
*/
144-
} else
145-
#endif /* OMPI_MCA_COLL_HAN_AUTO_TUNE */
146-
{
147-
low_comm = han_module->cached_low_comms[mca_coll_han_component.han_bcast_low_module];
148-
up_comm = han_module->cached_up_comms[mca_coll_han_component.han_bcast_up_module];
149-
COLL_BASE_COMPUTED_SEGCOUNT(mca_coll_han_component.han_allreduce_segsize, typelng,
150-
seg_count);
151-
}
106+
107+
/* use MCA parameters for now */
108+
low_comm = han_module->cached_low_comms[mca_coll_han_component.han_bcast_low_module];
109+
up_comm = han_module->cached_up_comms[mca_coll_han_component.han_bcast_up_module];
110+
COLL_BASE_COMPUTED_SEGCOUNT(mca_coll_han_component.han_allreduce_segsize, typelng,
111+
seg_count);
152112

153113
/* Determine number of elements sent per task. */
154114
OPAL_OUTPUT_VERBOSE((10, mca_coll_han_component.han_output,

ompi/mca/coll/han/coll_han_bcast.c

Lines changed: 6 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -83,38 +83,12 @@ mca_coll_han_bcast_intra(void *buff,
8383
mca_coll_han_comm_create(comm, han_module);
8484
ompi_communicator_t *low_comm;
8585
ompi_communicator_t *up_comm;
86-
/* Auto tune is enabled */
87-
#if OMPI_MCA_COLL_HAN_AUTO_TUNE
88-
if (mca_coll_han_component.han_auto_tune && mca_coll_han_component.han_auto_tuned != NULL) {
89-
uint32_t n = han_auto_tuned_get_n(ompi_comm_size(han_module->cached_up_comms[0]));
90-
uint32_t c = han_auto_tuned_get_c(ompi_comm_size(han_module->cached_low_comms[0]));
91-
uint32_t m = han_auto_tuned_get_m(typelng * count);
92-
uint32_t id =
93-
n * mca_coll_han_component.han_auto_tune_c * mca_coll_han_component.han_auto_tune_m +
94-
c * mca_coll_han_component.han_auto_tune_m + m;
95-
uint32_t umod = mca_coll_han_component.han_auto_tuned[id].umod;
96-
uint32_t lmod = mca_coll_han_component.han_auto_tuned[id].lmod;
97-
uint32_t fs = mca_coll_han_component.han_auto_tuned[id].fs;
98-
/* ualg and us are only available when using ADAPT */
99-
/*
100-
uint32_t ualg = mca_coll_han_component.han_auto_tuned[id].ualg;
101-
uint32_t us = mca_coll_han_component.han_auto_tuned[id].us;
102-
*/
103-
/* Set up umod */
104-
up_comm = han_module->cached_up_comms[umod];
105-
/* Set up lmod */
106-
low_comm = han_module->cached_low_comms[lmod];
107-
/* Set up fs */
108-
COLL_BASE_COMPUTED_SEGCOUNT((size_t) fs, typelng, seg_count);
109-
} else
110-
#endif /* OMPI_MCA_COLL_HAN_AUTO_TUNE */
111-
{
112-
/* If auto tune is disabled, use MCA parameters */
113-
low_comm = han_module->cached_low_comms[mca_coll_han_component.han_bcast_low_module];
114-
up_comm = han_module->cached_up_comms[mca_coll_han_component.han_bcast_up_module];
115-
COLL_BASE_COMPUTED_SEGCOUNT(mca_coll_han_component.han_bcast_segsize, typelng,
116-
seg_count);
117-
}
86+
87+
/* use MCA parameters for now */
88+
low_comm = han_module->cached_low_comms[mca_coll_han_component.han_bcast_low_module];
89+
up_comm = han_module->cached_up_comms[mca_coll_han_component.han_bcast_up_module];
90+
COLL_BASE_COMPUTED_SEGCOUNT(mca_coll_han_component.han_bcast_segsize, typelng,
91+
seg_count);
11892

11993
int num_segments = (count + seg_count - 1) / seg_count;
12094
OPAL_OUTPUT_VERBOSE((20, mca_coll_han_component.han_output,

ompi/mca/coll/han/coll_han_component.c

Lines changed: 0 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -90,19 +90,6 @@ mca_coll_han_component_t mca_coll_han_component = {
9090
*/
9191
static int han_open(void)
9292
{
93-
#if OMPI_MCA_COLL_HAN_AUTO_TUNE
94-
if( mca_coll_han_component.han_auto_tune &&
95-
(NULL != mca_coll_han_component.han_auto_tune_filename) ) {
96-
mca_coll_han_component.han_auto_tuned =
97-
(selection *) malloc(2 * mca_coll_han_component.han_auto_tune_n * mca_coll_han_component.han_auto_tune_c *
98-
mca_coll_han_component.han_auto_tune_m * sizeof(selection));
99-
FILE *file = fopen(mca_coll_han_component.han_auto_tune_filename, "r");
100-
fread(mca_coll_han_component.han_auto_tuned, sizeof(selection),
101-
2 * mca_coll_han_component.han_auto_tune_n * mca_coll_han_component.han_auto_tune_c * mca_coll_han_component.han_auto_tune_m, file);
102-
fclose(file);
103-
}
104-
#endif /* OMPI_MCA_COLL_HAN_AUTO_TUNE */
105-
10693
/* Get the global coll verbosity: it will be ours */
10794
mca_coll_han_component.han_output = ompi_coll_base_framework.framework_output;
10895

@@ -116,12 +103,6 @@ static int han_open(void)
116103
*/
117104
static int han_close(void)
118105
{
119-
#if OMPI_MCA_COLL_HAN_AUTO_TUNE
120-
if( NULL != mca_coll_han_component.han_auto_tuned ) {
121-
free(mca_coll_han_component.han_auto_tuned);
122-
mca_coll_han_component.han_auto_tuned = NULL;
123-
}
124-
#endif /* OMPI_MCA_COLL_HAN_AUTO_TUNE */
125106
mca_coll_han_free_dynamic_rules();
126107
return OMPI_SUCCESS;
127108
}
@@ -414,44 +395,6 @@ static int han_register(void)
414395
}
415396
}
416397

417-
#if OMPI_MCA_COLL_HAN_AUTO_TUNE
418-
cs->han_auto_tune = 0;
419-
(void) mca_base_component_var_register(c, "auto_tune",
420-
"whether enable auto tune, 0 disable, 1 enable, default 0",
421-
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
422-
OPAL_INFO_LVL_9,
423-
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_auto_tune);
424-
425-
cs->han_auto_tune_n = 5;
426-
(void) mca_base_component_var_register(c, "auto_tune_n",
427-
"auto tune n",
428-
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
429-
OPAL_INFO_LVL_9,
430-
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_auto_tune_n);
431-
432-
cs->han_auto_tune_c = 3;
433-
(void) mca_base_component_var_register(c, "auto_tune_c",
434-
"auto tune c",
435-
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
436-
OPAL_INFO_LVL_9,
437-
MCA_BASE_VAR_SCOPE_READONLY, &cs->han_auto_tune_c);
438-
439-
cs->han_auto_tune_m = 21;
440-
(void) mca_base_component_var_register(c, "auto_tune_m",
441-
"auto tune n",
442-
MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
443-
OPAL_INFO_LVL_9,
444-
MCA_BASE_VAR_SCOPE_READONLY,
445-
&cs->han_auto_tune_m);
446-
cs->han_auto_tune_filename = NULL;
447-
(void) mca_base_component_var_register(c, "auto_tune_file",
448-
"Autotuning file name",
449-
MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
450-
OPAL_INFO_LVL_9,
451-
MCA_BASE_VAR_SCOPE_READONLY,
452-
&cs->han_auto_tune_filename);
453-
#endif /* OMPI_MCA_COLL_HAN_AUTO_TUNE */
454-
455398
/* Dynamic rules */
456399
cs->use_dynamic_file_rules = false;
457400
(void) mca_base_component_var_register(&mca_coll_han_component.super.collm_version,

ompi/mca/coll/han/coll_han_utils.c

Lines changed: 0 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -18,41 +18,3 @@ void mca_coll_han_get_ranks(int *vranks, int root, int low_size, int *root_low_r
1818
*root_up_rank = vranks[root] / low_size;
1919
*root_low_rank = vranks[root] % low_size;
2020
}
21-
22-
uint32_t han_auto_tuned_get_n(uint32_t n)
23-
{
24-
uint32_t avail[5] = { 4, 8, 16, 32, 64 };
25-
uint32_t i;
26-
for (i = 0; i < 5; i++) {
27-
if (avail[i] >= n) {
28-
return i;
29-
}
30-
}
31-
return i - 1;
32-
}
33-
34-
uint32_t han_auto_tuned_get_c(uint32_t c)
35-
{
36-
uint32_t avail[3] = { 4, 8, 12 };
37-
uint32_t i;
38-
for (i = 0; i < 3; i++) {
39-
if (avail[i] >= c) {
40-
return i;
41-
}
42-
}
43-
return i - 1;
44-
}
45-
46-
uint32_t han_auto_tuned_get_m(uint32_t m)
47-
{
48-
uint32_t avail[21] =
49-
{ 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, 131072,
50-
262144, 524288, 1048576, 2097152, 4194304 };
51-
uint32_t i;
52-
for (i = 0; i < 21; i++) {
53-
if (avail[i] >= m) {
54-
return i;
55-
}
56-
}
57-
return i - 1;
58-
}

0 commit comments

Comments
 (0)