Skip to content

Commit fe3c3c5

Browse files
author
Luke Robison
committed
coll/tuned: New dynamic rules file format
This commit provides a new format for the coll/tuned dynamic rules file using json. It also modifies matching rules to use a first-valid-match strategy, which is intended to be simpler in cases where a new matching rule may be added. A converter written in python is available in the contrib directory, however we still accept files in either format. Signed-off-by: Luke Robison <[email protected]>
1 parent c8dfb09 commit fe3c3c5

23 files changed

+1014
-123
lines changed
Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
import re
2+
import json
3+
from collections import OrderedDict
4+
5+
coll_dict = {
6+
'allgather' : 0,
7+
'allgatherv' : 1,
8+
'allreduce' : 2,
9+
'alltoall' : 3,
10+
'alltoallv' : 4,
11+
'alltoallw' : 5,
12+
'barrier' : 6,
13+
'bcast' : 7,
14+
'exscan' : 8,
15+
'gather' : 9,
16+
'gatherv' : 10,
17+
'reduce' : 11,
18+
'reducescatter' : 12,
19+
'reducescatterblock' : 13,
20+
'scan' : 14,
21+
'scatter' : 15,
22+
'scatterv' : 16,
23+
'neighbor_allgather' : 17,
24+
'neighbor_allgatherv' : 18,
25+
'neighbor_alltoall' : 19,
26+
'neighbor_alltoallv' : 20,
27+
'neighbor_alltoallw' : 21 }
28+
coll_dict_rev = { v:k for k,v in coll_dict.items() }
29+
30+
han_component_dict = {
31+
"self" : 0,
32+
"basic" : 1,
33+
"libnbc" : 2,
34+
"tuned" : 3,
35+
"sm" : 4,
36+
"adapt" : 5,
37+
"han" : 6,
38+
}
39+
40+
han_topo_level_dict = {
41+
'intra_node' : 0,
42+
'inter_node' : 1,
43+
'global_communicator' : 2,
44+
}
45+
46+
47+
def strip_comments(line):
48+
return re.sub(r"#.*","",line).strip()
49+
50+
class GenericOpenMPIRuleReader():
51+
def __init__(self, fp, fname_for_prints=""):
52+
self.fp = fp
53+
# The 1-indexed line number which corresponds to the next byte of fp read.
54+
self.jline = 1
55+
self.line_start = 0
56+
def get_next_line(self):
57+
while True:
58+
self.line_start = self.fp.tell()
59+
line = self.fp.readline()
60+
if not line: return None
61+
self.jline += 1
62+
if strip_comments(line):
63+
return line
64+
65+
def isnext_digit(self):
66+
# ompi_coll_base_file_peek_next_char_isdigit
67+
tell = self.fp.tell()
68+
while True:
69+
next = self.fp.read(1)
70+
if next in ' \t':
71+
tell += 1
72+
continue
73+
self.fp.seek(tell)
74+
return next in '0123456789'
75+
76+
def get_next(self):
77+
# (ompi_coll_base_file_getnext_long)
78+
while True:
79+
line = self.get_next_line()
80+
if not line: return None
81+
UNK = -1
82+
jnum_start = UNK
83+
jnum_end = UNK
84+
for jc in range(len(line)):
85+
if line[jc] in "#":
86+
break
87+
if line[jc] in '0123456789':
88+
if jnum_start == UNK:
89+
jnum_start = jc
90+
jnum_end = jc
91+
else:
92+
if jnum_end != UNK:
93+
break
94+
if jnum_end != UNK:
95+
self.fp.seek(self.line_start+jnum_end+1)
96+
# decrement the line number, the next read will continue on this line.
97+
self.jline -= 1
98+
return int(line[jnum_start:jnum_end+1])
99+
100+
def read_header(self):
101+
line = self.get_next_line()
102+
match = re.match("rule-file-version-([0-9])", line)
103+
if match:
104+
return int(match.group(1))
105+
else:
106+
self.jline -= 1
107+
self.fp.seek(self.line_start)
108+
return 1
109+
110+
class TunedRuleReader(GenericOpenMPIRuleReader):
111+
def load_rulefile(self):
112+
json_root = OrderedDict()
113+
file_ver = self.read_header()
114+
json_root['rule_file_version'] = 3
115+
json_root['module'] = 'tuned'
116+
json_root['collectives'] = OrderedDict()
117+
118+
ncollectives = self.get_next()
119+
for jcol in range(ncollectives):
120+
coll_id = self.get_next()
121+
coll_name = coll_dict_rev[coll_id]
122+
comm_rules = []
123+
ncomm_sizes = self.get_next()
124+
for jcomm_size in range(ncomm_sizes):
125+
comm_size = self.get_next()
126+
nmsg_sizes = self.get_next()
127+
comm_rule = OrderedDict()
128+
comm_rule['comm_size_min'] = 0
129+
if jcomm_size+1 < ncomm_sizes:
130+
comm_rule['comm_size_max'] = max(comm_size-1, 0)
131+
if jcomm_size > 0:
132+
comm_rule['comm_size_min'] = comm_rules[jcomm_size-1]['comm_size_max'] + 1
133+
msg_rules = []
134+
for jmsg in range(nmsg_sizes):
135+
msg_size = self.get_next()
136+
result_alg = self.get_next()
137+
result_topo_faninout = self.get_next()
138+
result_segsize = self.get_next()
139+
rule = OrderedDict()
140+
rule['msg_size_min'] = msg_size
141+
if jmsg < nmsg_sizes - 1:
142+
rule['msg_size_max'] = 'Inf'
143+
if jmsg > 0:
144+
msg_rules[jmsg-1]['msg_size_max'] = msg_size - 1
145+
rule['alg'] = result_alg
146+
if result_topo_faninout != 0:
147+
rule['faninout'] = result_topo_faninout
148+
if result_segsize != 0:
149+
rule['segsize'] = result_segsize
150+
result_maxreq = 0
151+
if file_ver > 1 and self.isnext_digit():
152+
result_maxreq = self.get_next()
153+
if result_maxreq != 0:
154+
rule['reqs'] = result_maxreq
155+
msg_rules.append(rule)
156+
comm_rule['rules'] = msg_rules
157+
comm_rules.append(comm_rule)
158+
json_root['collectives'][coll_name] = comm_rules
159+
return json_root
160+
161+
class TunedRuleWriter():
162+
def __init__(self):
163+
pass
164+
def to_file(json_rules):
165+
for coll in coll_dict.keys():
166+
if coll in json_rules['collectives']:
167+
pass
168+
169+
if __name__ == '__main__':
170+
import argparse
171+
parser = argparse.ArgumentParser()
172+
parser.add_argument("--input","-i", type=argparse.FileType('r'), required=True)
173+
# parser.add_argument("--output","-o",type=argparse.FileType('w'), required=True)
174+
175+
args = parser.parse_args()
176+
reader = TunedRuleReader(args.input)
177+
print(json.dumps(reader.load_rulefile(), indent=4))

docs/tuning-apps/coll-tuned.rst

Lines changed: 65 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -92,14 +92,77 @@ after.
9292
.. code-block:: sh
9393
9494
shell$ mpirun ... --mca coll_tuned_use_dynamic_rules 1 \
95-
--mca coll_tuned_dynamic_rules_filename /path/to/my_rules.conf ...
95+
--mca coll_tuned_dynamic_rules_filename /path/to/my_rules.json ...
9696
9797
The loaded set of rules then are used to select the algorithm
9898
to use based on the collective, the communicator size, and the message size.
9999
Collectives for which rules have not be specified in the file will make use of
100100
the *fixed decision* rules as usual.
101101

102-
Dynamic tuning files are organized in this format:
102+
Starting with Open MPI 6.0, dynamic tuning files can be specified in JSON
103+
format, although the classic format will still be accepted. A converter script
104+
is also available to transfer classic format files into JSON.
105+
106+
The JSON format can be checked using the schema in
107+
`docs/tuning-apps/tuned_dynamic_file_schema.json`. If your editor supports it,
108+
this schema may provide validation of your file along with helpful tooltips for
109+
each variable.
110+
111+
An example file is shown here:
112+
113+
.. code-block:: json
114+
115+
{
116+
"$schema": "tuned_schema.json",
117+
"rule_file_version" : 3,
118+
"module" : "tuned",
119+
"collectives" : {
120+
"allreduce" :
121+
[
122+
{
123+
"comm_size_min" : 64,
124+
"comm_size_max" : 128,
125+
"rules" : [
126+
{
127+
"msg_size_min" : 512,
128+
"msg_size_max" : 511999,
129+
"alg" : 2,
130+
},
131+
{
132+
"msg_size_min" : 512000,
133+
"msg_size_max" : "inf",
134+
"alg" : "recursive_doubling",
135+
"reqs" : 8
136+
}
137+
]
138+
}
139+
]
140+
}
141+
}
142+
143+
In this toy example the MPI_Allreduce collective (indicated by the `allreduce`
144+
field) has two algorithms that will only be used on communicators with between
145+
64 and 128 ranks. Additionally, those rules only apply to certain message
146+
sizes. All others communicator sizes or message sizes fall back to the default
147+
set of rules, and collectives other than MPI_Allreduce are not affected.
148+
149+
Unlike in the classic file format, there is no need to specify a default rule or
150+
specify rules in increasing order. Overlapping message sizes or communicator
151+
sizes are allowed, and won't emit warnings.
152+
153+
The process for selecting the matching rule is a simple first-match principle.
154+
During communicator creation, the first set of communicator-rules which
155+
satisfies the requirements (`comm_size_min`/`comm_size_max`) is selected. Then,
156+
during each collective call, the message size is used to find the first matching
157+
entry in the "rules" list.
158+
159+
The algorithm selected is indicated by the `alg` field. It may be either an
160+
integer mapping to the classic file format, or a string. In both cases, the
161+
value is checked against the appropriate coll_tuned_<collectived>_algorithm MCA
162+
parameter, and un-recognized values will cause the rule to be ignored.
163+
164+
165+
Classic file format:
103166

104167
.. code-block:: sh
105168
:linenos:
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
{
2+
"$schema": "https://json-schema.org/draft/2019-09/schema#",
3+
"title": "OpenMPITunedRules",
4+
"description": "Defines configuration for the Open MPI Tuned module to select which collective algorithms will be used depending on comm size, message size, etc.",
5+
"type": "object",
6+
"required": ["rule_file_version","module","collectives"],
7+
"additionalProperties" : false,
8+
"properties": {
9+
"rule_file_version": {
10+
"description": "The version of this configuration file",
11+
"type": "number"
12+
},
13+
"module": {
14+
"description": "The collective module intended to use these rules (tuned)",
15+
"type": "string"
16+
},
17+
"$schema": {
18+
"description": "The schema used for validation",
19+
"type": "string"
20+
},
21+
"collectives" : {
22+
"description": "The collectives, each with their own rules. Each collective is indicated by a lowercase property such as \"allgather\"",
23+
"type": "object",
24+
"additionalProperties" : false,
25+
"patternProperties": {
26+
"^(allgather|allreduce|alltoall|alltoallv|alltoallw|barrier)$": {
27+
"type" : "array",
28+
"items": { "$ref" : "#/$defs/comm_size_rule" }
29+
},
30+
"^(bcast|exscan|gather|gatherv|reduce|reducescatter|reducescatterblock)$": {
31+
"type" : "array",
32+
"items": { "$ref" : "#/$defs/comm_size_rule" }
33+
},
34+
"^(scan|scatter|scatterv|neighbor_allgather|neighbor_allgatherv)$": {
35+
"type" : "array",
36+
"items": { "$ref" : "#/$defs/comm_size_rule" }
37+
},
38+
"^(neighbor_alltoall|neighbor_alltoallv|neighbor_alltoallw)$": {
39+
"type" : "array",
40+
"items": { "$ref" : "#/$defs/comm_size_rule" }
41+
}
42+
}
43+
}
44+
},
45+
46+
"$defs": {
47+
"msg_size_rule": {
48+
"type": "object",
49+
"required": ["alg"],
50+
"additionalProperties" : false,
51+
"properties" : {
52+
"msg_size_min" : {
53+
"description" : "The smallest message size in bytes this rule applies to",
54+
"anyOf" : { "$ref" : "#/$defs/int_or_inf" }
55+
},
56+
"msg_size_max" : {
57+
"description" : "The largest message size (inclusive) in bytes this rule applies to",
58+
"anyOf" : { "$ref" : "#/$defs/int_or_inf" }
59+
},
60+
"alg" : {
61+
"description" : "The algorithm to use for this collective. Integer or name, see coll_tuned_<collective>_algorithm for options.",
62+
"type" : [ "string", "integer"]
63+
},
64+
"reqs" : {
65+
"description" : "Algorithm parameter: Use this many requests. Some algorithms may ignore this option.",
66+
"type" : [ "integer"]
67+
},
68+
"faninout" : {
69+
"description" : "Algorithm parameter: Fan in and/or out by this much. Some algorithms may ignore this option.",
70+
"type" : [ "integer"]
71+
}
72+
}
73+
},
74+
75+
"comm_size_rule": {
76+
"type": "object",
77+
"required": ["rules"],
78+
"additionalProperties" : false,
79+
"properties" : {
80+
"comm_size_min" : {
81+
"description" : "The smallest size communicator these rules apply to",
82+
"anyOf" : { "$ref" : "#/$defs/int_or_inf" }
83+
},
84+
"comm_size_max" : {
85+
"description" : "The largest (inclusive) size communicator these rules apply to",
86+
"anyOf" : { "$ref" : "#/$defs/int_or_inf" }
87+
},
88+
"comm_rank_distribution" : {
89+
"description" : "A description of how the ranks are distributed within the communicator",
90+
"enum" : ["any", "one-per-node", "single-node"]
91+
},
92+
93+
"rules" : {
94+
"description" : "A list of rules. The first matching rule is selected. If no match is found, defaults are used.",
95+
"type" : "array",
96+
"items": { "$ref" : "#/$defs/msg_size_rule" }
97+
}
98+
}
99+
},
100+
"collective_identifier": {
101+
"enum" : [
102+
"allgather",
103+
"allreduce",
104+
"alltoall",
105+
"alltoallv",
106+
"alltoallw",
107+
"barrier",
108+
"bcast",
109+
"exscan",
110+
"gather",
111+
"gatherv",
112+
"reduce",
113+
"reducescatter",
114+
"reducescatterblock",
115+
"scan",
116+
"scatter",
117+
"scatterv",
118+
"neighbor_allgather",
119+
"neighbor_allgatherv",
120+
"neighbor_alltoall",
121+
"neighbor_alltoallv",
122+
"neighbor_alltoallw"
123+
]
124+
},
125+
"int_or_inf": [
126+
{ "type" : "integer" },
127+
{ "enum": ["inf","INF","Inf"] }
128+
]
129+
}
130+
}

ompi/mca/coll/tuned/coll_tuned.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,4 +216,9 @@ struct mca_coll_tuned_module_t {
216216
typedef struct mca_coll_tuned_module_t mca_coll_tuned_module_t;
217217
OBJ_CLASS_DECLARATION(mca_coll_tuned_module_t);
218218

219+
int coll_tuned_alg_from_str(int collective_id, const char *alg_name, int *alg_index);
220+
int coll_tuned_alg_to_str(int collective_id, int alg_value, char **alg_string);
221+
int coll_tuned_alg_register_options(int collective_id, mca_base_var_enum_t *options);
222+
223+
219224
#endif /* MCA_COLL_TUNED_EXPORT_H */

0 commit comments

Comments
 (0)