Skip to content

Commit 071169b

Browse files
committed
oshmem/shmem: Allocate and exchange base segment address beforehand
Signed-off-by: Thomas Vegas <[email protected]>
1 parent c393881 commit 071169b

File tree

7 files changed

+180
-24
lines changed

7 files changed

+180
-24
lines changed

oshmem/mca/memheap/base/memheap_base_select.c

Lines changed: 133 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525
#include "oshmem/mca/sshmem/base/base.h"
2626
#include "ompi/util/timings.h"
2727

28+
#include <sys/mman.h>
29+
2830
mca_memheap_base_config_t mca_memheap_base_config = {
2931
.device_nic_mem_seg_size = 0
3032
};
@@ -106,6 +108,127 @@ static size_t _memheap_size(void)
106108
return (size_t) memheap_align(oshmem_shmem_info_env.symmetric_heap_size);
107109
}
108110

111+
static void *memheap_mmap_get(void *hint, size_t size)
112+
{
113+
void *addr;
114+
115+
addr = mmap(hint, size,
116+
PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
117+
if (addr == MAP_FAILED) {
118+
return NULL;
119+
}
120+
121+
return addr;
122+
}
123+
124+
static int memheap_exchange_base_address(size_t size, void **address)
125+
{
126+
int nprocs = oshmem_num_procs();
127+
void *base = NULL;
128+
void *ptr = NULL;
129+
int rc, i;
130+
void **bases;
131+
132+
bases = calloc(nprocs, sizeof(*bases));
133+
if (NULL == bases) {
134+
return OSHMEM_ERROR;
135+
}
136+
137+
if (oshmem_my_proc_id() == 0) {
138+
ptr = memheap_mmap_get(NULL, size);
139+
base = ptr;
140+
}
141+
142+
rc = oshmem_shmem_bcast(&base, sizeof(base), 0);
143+
if (OSHMEM_SUCCESS != rc) {
144+
MEMHEAP_ERROR("Failed to exchange allocated vma for base segment "
145+
"(error %d)", rc);
146+
goto out;
147+
}
148+
149+
if (oshmem_my_proc_id() != 0) {
150+
ptr = memheap_mmap_get(base, size);
151+
}
152+
153+
MEMHEAP_VERBOSE(100, "#%d: exchange base address: base %p: %s",
154+
oshmem_my_proc_id(), base,
155+
(NULL != ptr)? "ok" : "unavailable");
156+
157+
rc = oshmem_shmem_allgather(&ptr, bases, sizeof(ptr));
158+
if (OSHMEM_SUCCESS != rc) {
159+
MEMHEAP_ERROR("Failed to exchange selected vma for base segment "
160+
"(error %d)", rc);
161+
goto out;
162+
}
163+
164+
*address = base;
165+
for (i = 0; i < nprocs; i++) {
166+
if ((NULL == bases[i]) || (bases[i] != base)) {
167+
*address = NULL;
168+
break;
169+
}
170+
}
171+
172+
out:
173+
if (((OSHMEM_SUCCESS != rc) || (*address == NULL)) && (NULL != ptr)) {
174+
(void)munmap(ptr, size);
175+
}
176+
177+
free(bases);
178+
return rc;
179+
}
180+
181+
182+
/*
183+
* The returned mca_sshmem_base_start_address value is reserved by using
184+
* mmap() for the expected size.
185+
*/
186+
static int memheap_base_segment_setup(size_t size)
187+
{
188+
int rc;
189+
190+
if (mca_sshmem_base_start_address == (void *)UINTPTR_MAX) {
191+
if (UINTPTR_MAX == 0xFFFFFFFF) {
192+
/**
193+
* if 32 bit we set sshmem_base_start_adress to 0
194+
* to let OS allocate segment automatically
195+
*/
196+
mca_sshmem_base_start_address = NULL;
197+
return OSHMEM_SUCCESS;
198+
}
199+
200+
rc = memheap_exchange_base_address(size, &mca_sshmem_base_start_address);
201+
if (OSHMEM_SUCCESS != rc) {
202+
MEMHEAP_ERROR("Failed to setup base segment address (error %d)", rc);
203+
return rc;
204+
}
205+
206+
if (NULL != mca_sshmem_base_start_address) {
207+
goto done; /* Region is reserved */
208+
}
209+
210+
#if defined(__aarch64__)
211+
mca_sshmem_base_start_address = (void*)0xAB0000000000;
212+
#else
213+
mca_sshmem_base_start_address = (void*)0xFF000000;
214+
#endif
215+
}
216+
217+
if (NULL == memheap_mmap_get(mca_sshmem_base_start_address, size)) {
218+
MEMHEAP_ERROR("Failed to create segment address %p/%zu",
219+
mca_sshmem_base_start_address, size);
220+
return OSHMEM_ERROR;
221+
}
222+
223+
done:
224+
if (oshmem_my_proc_id() == 0) {
225+
MEMHEAP_VERBOSE(10, "Using symmetric segment address %p/%zu",
226+
mca_sshmem_base_start_address, size);
227+
}
228+
229+
return OSHMEM_SUCCESS;
230+
}
231+
109232
static memheap_context_t* _memheap_create(void)
110233
{
111234
int rc = OSHMEM_SUCCESS;
@@ -123,13 +246,18 @@ static memheap_context_t* _memheap_create(void)
123246

124247
OPAL_TIMING_ENV_NEXT(timing, "_memheap_size()");
125248

126-
/* Inititialize symmetric area */
127-
if (OSHMEM_SUCCESS == rc) {
128-
rc = mca_memheap_base_alloc_init(&mca_memheap_base_map,
129-
user_size + MEMHEAP_BASE_PRIVATE_SIZE, 0,
130-
"regular_mem");
249+
/* Locate and reserve symmetric area */
250+
rc = memheap_base_segment_setup(user_size + MEMHEAP_BASE_PRIVATE_SIZE);
251+
if (OSHMEM_SUCCESS != rc) {
252+
MEMHEAP_ERROR("Failed to negotiate base segment addres");
253+
return NULL;
131254
}
132255

256+
/* Inititialize symmetric area */
257+
rc = mca_memheap_base_alloc_init(&mca_memheap_base_map,
258+
user_size + MEMHEAP_BASE_PRIVATE_SIZE, 0,
259+
"regular_mem");
260+
133261
OPAL_TIMING_ENV_NEXT(timing, "mca_memheap_base_alloc_init()");
134262

135263
/* Initialize atomic symmetric area */

oshmem/mca/memheap/base/memheap_base_static.c

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,17 @@
2525
#include <pthread.h>
2626

2727
static int _check_perms(const char *perm);
28+
static int _check_non_static_segment(const map_segment_t *mem_segs,
29+
int n_segment,
30+
const void *start, const void *end);
2831
static int _check_address(void *start, void **end);
2932
static int _check_pathname(uint64_t inode, const char *pathname);
3033

3134
int mca_memheap_base_static_init(mca_memheap_map_t *map)
3235
{
3336
/* read and parse segments from /proc/self/maps */
3437
int ret = OSHMEM_SUCCESS;
38+
int n_segments = map->n_segments;
3539
uint64_t total_mem = 0;
3640
void* start;
3741
void* end;
@@ -54,14 +58,6 @@ int mca_memheap_base_static_init(mca_memheap_map_t *map)
5458
return OSHMEM_ERROR;
5559
}
5660

57-
#ifdef __linux__
58-
extern unsigned _end;
59-
if (mca_sshmem_base_start_address < (uintptr_t)&_end) {
60-
MEMHEAP_WARN("sshmem base start address is inside data region"
61-
" (%p < %p)", mca_sshmem_base_start_address, &_end);
62-
}
63-
#endif
64-
6561
while (NULL != fgets(line, sizeof(line), fp)) {
6662
if (3 > sscanf(line,
6763
"%llx-%llx %s %llx %s %llx %s",
@@ -77,6 +73,12 @@ int mca_memheap_base_static_init(mca_memheap_map_t *map)
7773
goto out;
7874
}
7975

76+
if (OSHMEM_ERROR == _check_non_static_segment(
77+
map->mem_segs, n_segments,
78+
start, end)) {
79+
continue;
80+
}
81+
8082
if (OSHMEM_ERROR == _check_address(start, &end))
8183
continue;
8284

@@ -138,6 +140,26 @@ static int _check_perms(const char *perms)
138140
return OSHMEM_ERROR;
139141
}
140142

143+
static int _check_non_static_segment(const map_segment_t *mem_segs,
144+
int n_segment,
145+
const void *start, const void *end)
146+
{
147+
int i;
148+
149+
for (i = 0; i < n_segment; i++) {
150+
if ((start <= mem_segs[i].super.va_base) &&
151+
(mem_segs[i].super.va_base < end)) {
152+
MEMHEAP_VERBOSE(100,
153+
"non static segment: %p-%p already exists as %p-%p",
154+
start, end, mem_segs[i].super.va_base,
155+
mem_segs[i].super.va_end);
156+
return OSHMEM_ERROR;
157+
}
158+
}
159+
160+
return OSHMEM_SUCCESS;
161+
}
162+
141163
static int _check_address(void *start, void **end)
142164
{
143165
/* FIXME Linux specific code */

oshmem/mca/sshmem/base/sshmem_base_open.c

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -31,17 +31,7 @@
3131
* globals
3232
*/
3333

34-
/**
35-
* if 32 bit we set sshmem_base_start_address to 0
36-
* to let OS allocate segment automatically
37-
*/
38-
#if UINTPTR_MAX == 0xFFFFFFFF
39-
void *mca_sshmem_base_start_address = (void*)0;
40-
#elif defined(__aarch64__)
41-
void* mca_sshmem_base_start_address = (void*)0xAB0000000000;
42-
#else
43-
void* mca_sshmem_base_start_address = (void*)0xFF000000;
44-
#endif
34+
void *mca_sshmem_base_start_address = UINTPTR_MAX;
4535

4636
char * mca_sshmem_base_backing_file_dir = NULL;
4737

oshmem/mca/sshmem/mmap/sshmem_mmap_module.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ segment_create(map_segment_t *ds_buf,
126126
/* init the contents of map_segment_t */
127127
shmem_ds_reset(ds_buf);
128128

129+
(void)munmap(mca_sshmem_base_start_address, size);
129130
addr = mmap((void *)mca_sshmem_base_start_address,
130131
size,
131132
PROT_READ | PROT_WRITE,

oshmem/mca/sshmem/sysv/sshmem_sysv_module.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,7 @@ segment_create(map_segment_t *ds_buf,
170170
}
171171

172172
/* Attach to the segment */
173+
(void)munmap(mca_sshmem_base_start_address, size);
173174
addr = shmat(shmid, (void *) mca_sshmem_base_start_address, 0);
174175
if (addr == (void *) -1L) {
175176
opal_show_help("help-oshmem-sshmem.txt",

oshmem/runtime/oshmem_shmem_exchange.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,15 @@
1616
#include "oshmem/runtime/runtime.h"
1717
#include "oshmem/runtime/params.h"
1818

19+
int oshmem_shmem_bcast(void *buf, int elem_size, int root)
20+
{
21+
int rc;
22+
23+
rc = PMPI_Bcast(buf, elem_size, MPI_BYTE, root, oshmem_comm_world);
24+
25+
return rc;
26+
}
27+
1928
int oshmem_shmem_allgather(void *send_buf, void *rcv_buf, int elem_size)
2029
{
2130
int rc;

oshmem/runtime/runtime.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,11 @@ int oshmem_shmem_finalize(void);
128128
*/
129129
OSHMEM_DECLSPEC int oshmem_shmem_abort(int errcode);
130130

131+
/**
132+
* Broadcast between all PEs
133+
*/
134+
OSHMEM_DECLSPEC int oshmem_shmem_bcast(void *buf, int elem_size, int root);
135+
131136
/**
132137
* Allgather between all PEs
133138
*/

0 commit comments

Comments
 (0)