Skip to content

Commit 49fb30c

Browse files
committed
Move yield capability to opal thread component
This adds three new mca parameters: threads_pthreads_yield_strategy to choose the strategy (sched_yield, usleep, nanosleep), threads_pthreads_usleep_time (time passed to usleep), and threads_pthreads_nanosleep_time (time passed to nanosleep) A thread component may also signal that yield-when-idle should be the default. Signed-off-by: Joseph Schuchart <[email protected]>
1 parent 560ebc5 commit 49fb30c

File tree

10 files changed

+162
-15
lines changed

10 files changed

+162
-15
lines changed

ompi/runtime/ompi_mpi_params.c

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@
4343
#include "opal/util/show_help.h"
4444
#include "opal/runtime/opal.h"
4545
#include "opal/runtime/opal_params.h"
46+
#include "opal/mca/threads/threads.h"
47+
4648
/*
4749
* Global variables
4850
*
@@ -62,7 +64,7 @@ bool ompi_mpi_keep_fqdn_hostnames = false;
6264
bool ompi_have_sparse_group_storage = OPAL_INT_TO_BOOL(OMPI_GROUP_SPARSE);
6365
bool ompi_use_sparse_group_storage = OPAL_INT_TO_BOOL(OMPI_GROUP_SPARSE);
6466

65-
bool ompi_mpi_yield_when_idle = false;
67+
bool ompi_mpi_yield_when_idle;
6668
int ompi_mpi_event_tick_rate = -1;
6769
char *ompi_mpi_show_mca_params_string = NULL;
6870
bool ompi_mpi_have_sparse_group_storage = !!(OMPI_GROUP_SPARSE);
@@ -118,7 +120,9 @@ int ompi_mpi_register_params(void)
118120
OPAL_INFO_LVL_9,
119121
MCA_BASE_VAR_SCOPE_READONLY,
120122
&ompi_mpi_oversubscribe);
121-
ompi_mpi_yield_when_idle = ompi_mpi_oversubscribe;
123+
124+
/* if the threads module requires yielding we use that as default but allow it to be overridden */
125+
ompi_mpi_yield_when_idle = (OPAL_THREAD_YIELD_WHEN_IDLE_DEFAULT) ? OPAL_THREAD_YIELD_WHEN_IDLE_DEFAULT : ompi_mpi_oversubscribe;
122126
(void) mca_base_var_register("ompi", "mpi", NULL, "yield_when_idle",
123127
"Yield the processor when waiting for MPI communication (for MPI processes, will default to 1 when oversubscribing nodes)",
124128
MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,

opal/mca/threads/argobots/threads_argobots_threads.h

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* Copyright (c) 2004-2005 The University of Tennessee and The University
77
* of Tennessee Research Foundation. All rights
88
* reserved.
9-
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
9+
* Copyright (c) 2004-2020 High Performance Computing Center Stuttgart,
1010
* University of Stuttgart. All rights reserved.
1111
* Copyright (c) 2004-2005 The Regents of the University of California.
1212
* All rights reserved.
@@ -37,4 +37,14 @@ struct opal_thread_t {
3737
void *t_ret;
3838
};
3939

40+
41+
/* Argobots are cooperatively scheduled so yield when idle */
42+
#define OPAL_THREAD_YIELD_WHEN_IDLE_DEFAULT true
43+
44+
static inline
45+
void opal_thread_yield(void)
46+
{
47+
ABT_thread_yield();
48+
}
49+
4050
#endif /* OPAL_MCA_THREADS_ARGOBOTS_THREADS_ARGOBOTS_THREADS_H */

opal/mca/threads/pthreads/Makefile.am

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,4 +29,6 @@ libmca_threads_pthreads_la_SOURCES = \
2929
threads_pthreads_threads.h \
3030
threads_pthreads_tsd.h \
3131
threads_pthreads_wait_sync.c \
32-
threads_pthreads_wait_sync.h
32+
threads_pthreads_wait_sync.h \
33+
threads_pthreads_yield.c \
34+
threads_pthreads.h
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2+
/*
3+
* Copyright (c) 2020 High Performance Computing Center Stuttgart,
4+
* University of Stuttgart. All rights reserved.
5+
*
6+
* $COPYRIGHT$
7+
*
8+
* Additional copyrights may follow
9+
*
10+
* $HEADER$
11+
*/
12+
13+
14+
#ifndef OPAL_MCA_THREADS_PTHREADS_THREADS_PTHREADS_H
15+
#define OPAL_MCA_THREADS_PTHREADS_THREADS_PTHREADS_H
16+
17+
#include "opal_config.h"
18+
#include <stdint.h>
19+
20+
typedef enum {
21+
OPAL_PTHREADS_YIELD_SCHED_YIELD = 0,
22+
OPAL_PTHREADS_YIELD_USLEEP = 1,
23+
OPAL_PTHREADS_YIELD_NANOSLEEP = 2
24+
} opal_threads_pthreads_yield_strategy_t;
25+
26+
/* The yield strategy to use */
27+
OPAL_DECLSPEC extern opal_threads_pthreads_yield_strategy_t opal_threads_pthreads_yield_strategy;
28+
29+
/* The length to usleep, if enabled */
30+
OPAL_DECLSPEC extern uint64_t opal_threads_pthreads_yield_usleep;
31+
32+
/* The length to nanosleep, if enabled */
33+
OPAL_DECLSPEC extern uint64_t opal_threads_pthreads_yield_nsleep;
34+
35+
#endif /* OPAL_MCA_THREADS_PTHREADS_THREADS_PTHREADS_H */

opal/mca/threads/pthreads/threads_pthreads_component.c

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,15 @@
2323

2424
#include "opal_config.h"
2525

26+
#include "opal/mca/threads/pthreads/threads_pthreads.h"
2627
#include "opal/mca/threads/thread.h"
2728
#include "opal/mca/threads/threads.h"
2829
#include "opal/constants.h"
30+
#include "opal/util/printf.h"
31+
2932

3033
static int opal_threads_pthreads_open(void);
34+
static int opal_threads_pthreads_register(void);
3135

3236
const opal_threads_base_component_1_0_0_t mca_threads_pthreads_component = {
3337
/* First, the mca_component_t struct containing meta information
@@ -41,13 +45,41 @@ const opal_threads_base_component_1_0_0_t mca_threads_pthreads_component = {
4145
OPAL_RELEASE_VERSION),
4246

4347
.mca_open_component = opal_threads_pthreads_open,
48+
.mca_register_component_params = opal_threads_pthreads_register
4449
},
4550
.threadsc_data = {
4651
/* The component is checkpoint ready */
4752
MCA_BASE_METADATA_PARAM_CHECKPOINT
4853
},
4954
};
5055

56+
int opal_threads_pthreads_register(void)
57+
{
58+
char * description_str;
59+
opal_asprintf(&description_str, "Pthread yield strategy to use (0: sched_yield, 1: usleep, 2: nanosleep (default: %d)",
60+
opal_threads_pthreads_yield_strategy);
61+
(void) mca_base_component_var_register(&mca_threads_pthreads_component.threadsc_version, "yield_strategy", description_str,
62+
MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0, 0, OPAL_INFO_LVL_3,
63+
MCA_BASE_VAR_SCOPE_CONSTANT, &opal_threads_pthreads_yield_strategy);
64+
free(description_str);
65+
66+
opal_asprintf(&description_str, "Number of microseconds to sleep when using usleep as the pthread yield strategy (default: %ld)",
67+
opal_threads_pthreads_yield_usleep);
68+
(void) mca_base_component_var_register(&mca_threads_pthreads_component.threadsc_version, "usleep_time", description_str,
69+
MCA_BASE_VAR_TYPE_UINT64_T, NULL, 0, 0, OPAL_INFO_LVL_3,
70+
MCA_BASE_VAR_SCOPE_CONSTANT, &opal_threads_pthreads_yield_usleep);
71+
free(description_str);
72+
73+
opal_asprintf(&description_str, "Number of nanoseconds to sleep when using nanosleep as the pthread yield strategy (default: %ld)",
74+
opal_threads_pthreads_yield_nsleep);
75+
(void) mca_base_component_var_register(&mca_threads_pthreads_component.threadsc_version, "nanosleep_time", description_str,
76+
MCA_BASE_VAR_TYPE_UINT64_T, NULL, 0, 0, OPAL_INFO_LVL_3,
77+
MCA_BASE_VAR_SCOPE_CONSTANT, &opal_threads_pthreads_yield_nsleep);
78+
free(description_str);
79+
80+
return OPAL_SUCCESS;
81+
}
82+
5183
int opal_threads_pthreads_open(void)
5284
{
5385
return OPAL_SUCCESS;

opal/mca/threads/pthreads/threads_pthreads_threads.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* Copyright (c) 2004-2006 The University of Tennessee and The University
77
* of Tennessee Research Foundation. All rights
88
* reserved.
9-
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
9+
* Copyright (c) 2004-2020 High Performance Computing Center Stuttgart,
1010
* University of Stuttgart. All rights reserved.
1111
* Copyright (c) 2004-2005 The Regents of the University of California.
1212
* All rights reserved.
@@ -37,4 +37,7 @@ struct opal_thread_t {
3737
pthread_t t_handle;
3838
};
3939

40+
/* Pthreads do not need to yield when idle */
41+
#define OPAL_THREAD_YIELD_WHEN_IDLE_DEFAULT false
42+
4043
#endif /* OPAL_MCA_THREADS_PTHREADS_THREADS_PTHREADS_THREADS_H */
Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
/* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2+
/*
3+
* Copyright (c) 2020 High Performance Computing Center Stuttgart,
4+
* University of Stuttgart. All rights reserved.
5+
*
6+
* $COPYRIGHT$
7+
*
8+
* Additional copyrights may follow
9+
*
10+
* $HEADER$
11+
*/
12+
13+
#include <unistd.h>
14+
#include <time.h>
15+
#ifdef HAVE_SCHED_H
16+
#include <sched.h>
17+
#endif
18+
19+
#include "opal/mca/threads/threads.h"
20+
#include "opal/mca/threads/pthreads/threads_pthreads.h"
21+
#include "opal/sys/atomic.h"
22+
23+
24+
opal_threads_pthreads_yield_strategy_t opal_threads_pthreads_yield_strategy = OPAL_PTHREADS_YIELD_SCHED_YIELD;
25+
uint64_t opal_threads_pthreads_yield_usleep = 1;
26+
uint64_t opal_threads_pthreads_yield_nsleep = 100;
27+
28+
29+
void opal_thread_yield(void)
30+
{
31+
switch (opal_threads_pthreads_yield_strategy) {
32+
case OPAL_PTHREADS_YIELD_SCHED_YIELD:
33+
#ifdef HAVE_SCHED_H
34+
sched_yield();
35+
break;
36+
#endif
37+
/* fall-through if sched_yield is not available */
38+
case OPAL_PTHREADS_YIELD_USLEEP:
39+
usleep(opal_threads_pthreads_yield_usleep);
40+
break;
41+
case OPAL_PTHREADS_YIELD_NANOSLEEP:
42+
{
43+
static struct timespec sleeptime = {.tv_sec = -1, .tv_nsec = -1};
44+
if (OPAL_UNLIKELY(-1 == sleeptime.tv_nsec)) {
45+
sleeptime.tv_sec = opal_threads_pthreads_yield_nsleep / 1E9;
46+
opal_atomic_wmb();
47+
sleeptime.tv_nsec = opal_threads_pthreads_yield_nsleep - (sleeptime.tv_sec*(uint64_t)1E9);
48+
}
49+
nanosleep(&sleeptime, NULL);
50+
break;
51+
}
52+
}
53+
}
54+

opal/mca/threads/qthreads/threads_qthreads_threads.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* Copyright (c) 2004-2005 The University of Tennessee and The University
77
* of Tennessee Research Foundation. All rights
88
* reserved.
9-
* Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
9+
* Copyright (c) 2004-2020 High Performance Computing Center Stuttgart,
1010
* University of Stuttgart. All rights reserved.
1111
* Copyright (c) 2004-2005 The Regents of the University of California.
1212
* All rights reserved.
@@ -37,4 +37,13 @@ struct opal_thread_t {
3737
void *t_ret;
3838
};
3939

40+
/* Qthreads are cooperatively scheduled so yield when idle */
41+
#define OPAL_THREAD_YIELD_WHEN_IDLE_DEFAULT true
42+
43+
static inline
44+
void opal_thread_yield(void)
45+
{
46+
qthread_yield();
47+
}
48+
4049
#endif /* OPAL_MCA_THREADS_QTHREADS_THREADS_QTHREADS_THREADS_H */

opal/mca/threads/threads.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,7 @@ OPAL_DECLSPEC bool opal_thread_self_compare(opal_thread_t *);
132132
OPAL_DECLSPEC opal_thread_t *opal_thread_get_self(void);
133133
OPAL_DECLSPEC void opal_thread_kill(opal_thread_t *, int sig);
134134
OPAL_DECLSPEC void opal_thread_set_main(void);
135+
OPAL_DECLSPEC void opal_thread_yield(void);
135136

136137
END_C_DECLS
137138

opal/runtime/opal_progress.c

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,6 @@
2525

2626
#include "opal_config.h"
2727

28-
#ifdef HAVE_SCHED_H
29-
#include <sched.h>
30-
#endif
31-
3228
#include "opal/runtime/opal_progress.h"
3329
#include "opal/mca/event/event.h"
3430
#include "opal/mca/base/mca_base_var.h"
@@ -37,6 +33,7 @@
3733
#include "opal/util/output.h"
3834
#include "opal/runtime/opal_params.h"
3935
#include "opal/runtime/opal.h"
36+
#include "opal/mca/threads/threads.h"
4037

4138
#define OPAL_PROGRESS_USE_TIMERS (OPAL_TIMER_CYCLE_SUPPORTED || OPAL_TIMER_USEC_SUPPORTED)
4239
#define OPAL_PROGRESS_ONLY_USEC_NATIVE (OPAL_TIMER_USEC_NATIVE && !OPAL_TIMER_CYCLE_NATIVE)
@@ -66,7 +63,7 @@ static volatile opal_progress_callback_t *callbacks_lp = NULL;
6663
static size_t callbacks_lp_len = 0;
6764
static size_t callbacks_lp_size = 0;
6865

69-
/* do we want to call sched_yield() if nothing happened */
66+
/* do we want to yield() if nothing happened */
7067
bool opal_progress_yield_when_idle = false;
7168

7269
#if OPAL_PROGRESS_USE_TIMERS
@@ -213,7 +210,7 @@ static int opal_progress_events(void)
213210
* be called. We don't propogate errors from the progress functions,
214211
* so no action is taken if they return failures. The functions are
215212
* expected to return the number of events progressed, to determine
216-
* whether or not we should call sched_yield() during MPI progress.
213+
* whether or not we should yield the CPU during MPI progress.
217214
* This is only losely tracked, as an error return can cause the number
218215
* of progressed events to appear lower than it actually is. We don't
219216
* care, as the cost of that happening is far outweighed by the cost
@@ -247,16 +244,16 @@ opal_progress(void)
247244
opal_progress_events();
248245
}
249246

250-
#if OPAL_HAVE_SCHED_YIELD
251247
if (opal_progress_yield_when_idle && events <= 0) {
252248
/* If there is nothing to do - yield the processor - otherwise
253249
* we could consume the processor for the entire time slice. If
254250
* the processor is oversubscribed - this will result in a best-case
255251
* latency equivalent to the time-slice.
252+
* With some thread implementations, yielding might be required
253+
* to ensure correct scheduling of all communicating threads.
256254
*/
257-
sched_yield();
255+
opal_thread_yield();
258256
}
259-
#endif /* defined(HAVE_SCHED_YIELD) */
260257
}
261258

262259

0 commit comments

Comments
 (0)