Skip to content

Commit ba0d28e

Browse files
committed
OSHMEM/MCA/SPML/UCX: removed RC checks where not needed, fixed bugs
Signed-off-by: Roie Danino <[email protected]>
1 parent e62b7c7 commit ba0d28e

File tree

2 files changed

+50
-74
lines changed

2 files changed

+50
-74
lines changed

oshmem/mca/spml/ucx/spml_ucx.c

Lines changed: 47 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -292,17 +292,6 @@ static void mca_spml_ucx_rkey_store_put(mca_spml_ucx_rkey_store_t *store,
292292
ucp_rkey_destroy(rkey);
293293
}
294294

295-
static void mca_spml_ucx_team_world_init()
296-
{
297-
int rc = mca_spml_ucx_team_split_strided(NULL, 0, 1, oshmem_num_procs(), NULL, 0,
298-
&SHMEM_TEAM_WORLD);
299-
300-
if (rc != OSHMEM_SUCCESS) {
301-
SPML_UCX_ERROR("mca_spml_ucx_team_split_strided failed (SHMEM_TEAM_WORLD creation)");
302-
oshmem_shmem_abort(-1);
303-
}
304-
}
305-
306295
int mca_spml_ucx_enable(bool enable)
307296
{
308297
SPML_UCX_VERBOSE(50, "*** ucx ENABLED ****");
@@ -461,14 +450,6 @@ int mca_spml_ucx_ctx_mkey_del(mca_spml_ucx_ctx_t *ucx_ctx, int pe, uint32_t segn
461450
return OSHMEM_SUCCESS;
462451
}
463452

464-
static void mca_spml_ucx_team_world_destroy()
465-
{
466-
if (SHMEM_TEAM_WORLD != NULL) {
467-
mca_spml_ucx_team_destroy(SHMEM_TEAM_WORLD);
468-
SHMEM_TEAM_WORLD = NULL;
469-
}
470-
}
471-
472453
int mca_spml_ucx_del_procs(oshmem_group_t* group, size_t nprocs)
473454
{
474455
size_t ucp_workers = mca_spml_ucx.ucp_workers;
@@ -478,8 +459,6 @@ int mca_spml_ucx_del_procs(oshmem_group_t* group, size_t nprocs)
478459

479460
oshmem_shmem_barrier();
480461

481-
mca_spml_ucx_team_world_destroy();
482-
483462
if (!mca_spml_ucx_ctx_default.ucp_peers) {
484463
return OSHMEM_SUCCESS;
485464
}
@@ -1779,15 +1758,21 @@ int mca_spml_ucx_team_sync(shmem_team_t team)
17791758
int mca_spml_ucx_team_my_pe(shmem_team_t team)
17801759
{
17811760
mca_spml_ucx_team_t *ucx_team = (mca_spml_ucx_team_t *)team;
1782-
SPML_UCX_VALIDATE_TEAM(team);
1761+
1762+
if (team == SHMEM_TEAM_WORLD) {
1763+
return shmem_my_pe();
1764+
}
17831765

17841766
return ucx_team->my_pe;
17851767
}
17861768

17871769
int mca_spml_ucx_team_n_pes(shmem_team_t team)
17881770
{
17891771
mca_spml_ucx_team_t *ucx_team = (mca_spml_ucx_team_t *)team;
1790-
SPML_UCX_VALIDATE_TEAM(team);
1772+
1773+
if (team == SHMEM_TEAM_WORLD) {
1774+
return shmem_n_pes();
1775+
}
17911776

17921777
return ucx_team->n_pes;
17931778
}
@@ -1823,10 +1808,6 @@ int mca_spml_ucx_team_translate_pe(shmem_team_t src_team, int src_pe,
18231808

18241809
global_pe = ucx_src_team->start + src_pe * ucx_src_team->stride;
18251810

1826-
SPML_UCX_WARN("team_translate_pe(src_team=%p, src_pe=%d, dest_team=%p), global pe: %d, "
1827-
"src_team->start: %d, src pe: %d, src_team->stride: %d",
1828-
src_team, src_pe, dest_team, global_pe, ucx_src_team->start, src_pe, ucx_src_team->stride);
1829-
18301811
if (dest_team == SHMEM_TEAM_WORLD) {
18311812
return global_pe;
18321813
}
@@ -1845,42 +1826,38 @@ int mca_spml_ucx_team_split_strided(shmem_team_t parent_team, int start, int
18451826
{
18461827
mca_spml_ucx_team_t *ucx_parent_team;
18471828
mca_spml_ucx_team_t *ucx_new_team;
1829+
int base_pe;
1830+
int base_start;
1831+
int base_stride;
18481832
int my_pe;
18491833

18501834
SPML_UCX_ASSERT(((start + size * stride) <= oshmem_num_procs()) && (start < size) && (stride > 0) && (size > 0));
18511835

1852-
SPML_UCX_WARN("team_split_strided(parent_team=%p, start=%d, stride=%d, size=%d, config=%p, "
1853-
"config_mask=%ld, new_team=%p)",
1854-
parent_team, start, stride, size, config, config_mask, new_team);
1855-
1856-
ucx_new_team = (mca_spml_ucx_team_t *)malloc(sizeof(mca_spml_ucx_team_t));
1857-
ucx_new_team->start = start;
1858-
ucx_new_team->stride = stride;
1859-
1860-
if (parent_team == NULL) {
1861-
my_pe = shmem_my_pe();
1836+
if (parent_team == SHMEM_TEAM_WORLD) {
1837+
base_pe = shmem_my_pe();
1838+
base_start = 0;
1839+
base_stride = 1;
18621840
} else {
18631841
ucx_parent_team = (mca_spml_ucx_team_t*) parent_team;
1842+
base_pe = ucx_parent_team->my_pe;
1843+
base_start = ucx_parent_team->start;
1844+
base_stride = ucx_parent_team->stride;
1845+
}
18641846

1865-
SPML_UCX_VALIDATE_TEAM(parent_team);
1866-
if (mca_spml_ucx_is_pe_in_strided_team(ucx_parent_team->my_pe, start, stride, size)) {
1867-
my_pe = (ucx_parent_team->my_pe - start) / stride;
1868-
SPML_UCX_WARN("split: my_pe at parent team: %d, start: %d, stride: %d, size: %d, "
1869-
"my_pe at new team: %d", ucx_parent_team->my_pe, start, stride, size, my_pe);
1870-
} else {
1871-
/* not in team, according to spec it should be SHMEM_TEAM_INVALID but its value is NULL which
1872-
can be also interpreted as 0 (first pe), therefore -1 is used */
1873-
1874-
SPML_UCX_WARN("pe #%d is not part of the new team", ucx_parent_team->my_pe);
1875-
my_pe = SPML_UCX_PE_NOT_IN_TEAM;
1876-
}
1877-
1878-
/* In order to simplify pe translations start and stride are calculated with respect to
1879-
* world_team */
1880-
ucx_new_team->start += ucx_parent_team->start;
1881-
ucx_new_team->stride *= ucx_parent_team->stride;
1847+
if (mca_spml_ucx_is_pe_in_strided_team(base_pe, start, stride, size)) {
1848+
my_pe = (base_pe - start) / stride;
1849+
} else {
1850+
/* not in team, according to spec it should be SHMEM_TEAM_INVALID but its value is NULL which
1851+
can be also interpreted as 0 (first pe), therefore -1 is used */
1852+
my_pe = SPML_UCX_PE_NOT_IN_TEAM;
18821853
}
18831854

1855+
/* In order to simplify pe translations start and stride are calculated with respect to
1856+
* world_team */
1857+
ucx_new_team = (mca_spml_ucx_team_t *)malloc(sizeof(mca_spml_ucx_team_t));
1858+
ucx_new_team->start = base_start + start;
1859+
ucx_new_team->stride = base_stride * stride;
1860+
18841861
ucx_new_team->n_pes = size;
18851862
ucx_new_team->my_pe = my_pe;
18861863

@@ -1890,10 +1867,10 @@ int mca_spml_ucx_team_split_strided(shmem_team_t parent_team, int start, int
18901867
memcpy(&ucx_new_team->config->super, config, sizeof(shmem_team_config_t));
18911868
}
18921869

1893-
ucx_new_team->config = config;
1894-
ucx_new_team->parent_team = parent_team;
1870+
ucx_new_team->config = (mca_spml_ucx_team_config_t*)config;
1871+
ucx_new_team->parent_team = (mca_spml_ucx_team_t*)parent_team;
18951872

1896-
*new_team = ucx_new_team;
1873+
*new_team = (shmem_team_t)ucx_new_team;
18971874

18981875
return OSHMEM_SUCCESS;
18991876
}
@@ -1904,21 +1881,29 @@ int mca_spml_ucx_team_split_2d(shmem_team_t parent_team, int xrange, const
19041881
shmem_team_t *yaxis_team)
19051882
{
19061883
mca_spml_ucx_team_t *ucx_parent_team = (mca_spml_ucx_team_t*) parent_team;
1907-
int yrange = ucx_parent_team->n_pes / xrange;
1908-
int pe_x = ucx_parent_team->my_pe % xrange;
1909-
int pe_y = ucx_parent_team->my_pe / xrange;
1884+
int parent_n_pes = (parent_team == SHMEM_TEAM_WORLD) ?
1885+
oshmem_num_procs() :
1886+
ucx_parent_team->n_pes;
1887+
int parent_my_pe = (parent_team == SHMEM_TEAM_WORLD) ?
1888+
shmem_my_pe() :
1889+
ucx_parent_team->my_pe;
1890+
int yrange = parent_n_pes / xrange;
1891+
int pe_x = parent_my_pe % xrange;
1892+
int pe_y = parent_my_pe / xrange;
19101893
int rc;
19111894

19121895
/* Create x-team of my_pe */
1913-
rc = mca_spml_ucx_team_split_strided(parent_team, pe_y * xrange, 1, xrange, xaxis_config, xaxis_mask, xaxis_team);
1896+
rc = mca_spml_ucx_team_split_strided(parent_team, pe_y * xrange, 1, xrange,
1897+
xaxis_config, xaxis_mask, xaxis_team);
19141898

19151899
if (rc != OSHMEM_SUCCESS) {
19161900
SPML_UCX_ERROR("mca_spml_ucx_team_split_strided failed (x-axis team creation)");
19171901
return rc;
19181902
}
19191903

19201904
/* Create y-team of my_pe */
1921-
rc = mca_spml_ucx_team_split_strided(parent_team, pe_x, xrange, yrange, yaxis_config, yaxis_mask, yaxis_team);
1905+
rc = mca_spml_ucx_team_split_strided(parent_team, pe_x, xrange, yrange,
1906+
yaxis_config, yaxis_mask, yaxis_team);
19221907
if (rc != OSHMEM_SUCCESS) {
19231908
SPML_UCX_ERROR("mca_spml_ucx_team_split_strided failed (y-axis team creation)");
19241909
goto out_free_xaxis;

oshmem/shmem/c/shmem_team.c

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -51,14 +51,9 @@ void shmem_team_sync(shmem_team_t team)
5151

5252
int shmem_team_my_pe(shmem_team_t team)
5353
{
54-
int rc = 0;
55-
5654
RUNTIME_CHECK_INIT();
5755

58-
rc = MCA_SPML_CALL(team_my_pe(team));
59-
RUNTIME_CHECK_IMPL_RC(rc);
60-
61-
return rc;
56+
return MCA_SPML_CALL(team_my_pe(team));
6257
}
6358

6459
int shmem_team_n_pes(shmem_team_t team)
@@ -85,15 +80,11 @@ int shmem_team_get_config(shmem_team_t team, long config_mask, shmem_team_config
8580
}
8681
int shmem_team_translate_pe(shmem_team_t src_team, int src_pe, shmem_team_t dest_team)
8782
{
88-
int rc = 0;
89-
9083
RUNTIME_CHECK_INIT();
9184

92-
rc = MCA_SPML_CALL(team_translate_pe(src_team, src_pe, dest_team));
93-
RUNTIME_CHECK_IMPL_RC(rc);
94-
95-
return rc;
85+
return MCA_SPML_CALL(team_translate_pe(src_team, src_pe, dest_team));
9686
}
87+
9788
int shmem_team_split_strided (shmem_team_t parent_team, int start, int stride,
9889
int size, const shmem_team_config_t *config, long config_mask,
9990
shmem_team_t *new_team)

0 commit comments

Comments
 (0)