Skip to content

Commit a81b319

Browse files
committed
backport improvements on GC scheduler shutdown
1 parent 8490f73 commit a81b319

File tree

3 files changed

+152
-73
lines changed

3 files changed

+152
-73
lines changed

src/gc.c

Lines changed: 139 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -2856,13 +2856,16 @@ JL_EXTENSION NOINLINE void gc_mark_loop_serial(jl_ptls_t ptls)
28562856
gc_drain_own_chunkqueue(ptls, &ptls->mark_queue);
28572857
}
28582858

2859-
void gc_mark_and_steal(jl_ptls_t ptls)
2859+
int gc_mark_and_steal(jl_ptls_t ptls)
28602860
{
28612861
jl_gc_markqueue_t *mq = &ptls->mark_queue;
28622862
jl_gc_markqueue_t *mq_master = NULL;
28632863
int master_tid = jl_atomic_load(&gc_master_tid);
2864-
if (master_tid != -1)
2865-
mq_master = &gc_all_tls_states[master_tid]->mark_queue;
2864+
if (master_tid == -1) {
2865+
return 0;
2866+
}
2867+
mq_master = &gc_all_tls_states[master_tid]->mark_queue;
2868+
int marked = 0;
28662869
void *new_obj;
28672870
jl_gc_chunk_t c;
28682871
pop : {
@@ -2878,6 +2881,7 @@ void gc_mark_and_steal(jl_ptls_t ptls)
28782881
goto steal;
28792882
}
28802883
mark : {
2884+
marked = 1;
28812885
gc_mark_outrefs(ptls, mq, new_obj, 0);
28822886
goto pop;
28832887
}
@@ -2906,12 +2910,10 @@ void gc_mark_and_steal(jl_ptls_t ptls)
29062910
}
29072911
}
29082912
// Try to steal chunk from master thread
2909-
if (mq_master != NULL) {
2910-
c = gc_chunkqueue_steal_from(mq_master);
2911-
if (c.cid != GC_empty_chunk) {
2912-
gc_mark_chunk(ptls, mq, &c);
2913-
goto pop;
2914-
}
2913+
c = gc_chunkqueue_steal_from(mq_master);
2914+
if (c.cid != GC_empty_chunk) {
2915+
gc_mark_chunk(ptls, mq, &c);
2916+
goto pop;
29152917
}
29162918
// Try to steal pointer from random GC thread
29172919
for (int i = 0; i < 4 * jl_n_gcthreads; i++) {
@@ -2928,37 +2930,141 @@ void gc_mark_and_steal(jl_ptls_t ptls)
29282930
if (new_obj != NULL)
29292931
goto mark;
29302932
}
2931-
// Try to steal pointer from master thread
2932-
if (mq_master != NULL) {
2933-
new_obj = gc_ptr_queue_steal_from(mq_master);
2934-
if (new_obj != NULL)
2935-
goto mark;
2933+
new_obj = gc_ptr_queue_steal_from(mq_master);
2934+
if (new_obj != NULL)
2935+
goto mark;
2936+
}
2937+
return marked;
2938+
}
2939+
2940+
#define GC_BACKOFF_MIN_LG2 (10)
2941+
#define GC_BACKOFF_MAX_LG2 (18)
2942+
2943+
STATIC_INLINE void gc_backoff_reset_state(gc_backoff_state_t *s) JL_NOTSAFEPOINT
2944+
{
2945+
s->backoff_phase = GC_SPINNING;
2946+
s->backoff_lg2 = GC_BACKOFF_MIN_LG2;
2947+
s->n_spins_at_max = 0;
2948+
}
2949+
2950+
STATIC_INLINE void gc_backoff(gc_backoff_state_t *s) JL_NOTSAFEPOINT
2951+
{
2952+
if (s->backoff_phase == GC_SPINNING) {
2953+
// spin for 2^backoff_lg2 cycles
2954+
uint64_t c0 = cycleclock();
2955+
do {
2956+
jl_cpu_pause();
2957+
} while (cycleclock() - c0 < (1 << s->backoff_lg2));
2958+
if (s->backoff_lg2 == GC_BACKOFF_MAX_LG2) {
2959+
s->n_spins_at_max++;
2960+
// has been spinning for a while... should
2961+
// just sleep in the next failed steal attempt
2962+
if (s->n_spins_at_max >= 8) {
2963+
s->backoff_phase = GC_SLEEPING;
2964+
}
2965+
}
2966+
else {
2967+
s->backoff_lg2++;
29362968
}
29372969
}
2970+
else {
2971+
// sleep for 1ms
2972+
uv_sleep(1);
2973+
}
29382974
}
29392975

2940-
void gc_mark_loop_parallel(jl_ptls_t ptls, int master)
2976+
int gc_some_work_left_in_queue(jl_ptls_t ptls) JL_NOTSAFEPOINT
2977+
{
2978+
if (jl_atomic_load_relaxed(&ptls->mark_queue.ptr_queue.bottom) !=
2979+
jl_atomic_load_relaxed(&ptls->mark_queue.ptr_queue.top)) {
2980+
return 1;
2981+
}
2982+
if (jl_atomic_load_relaxed(&ptls->mark_queue.chunk_queue.bottom) !=
2983+
jl_atomic_load_relaxed(&ptls->mark_queue.chunk_queue.top)) {
2984+
return 1;
2985+
}
2986+
return 0;
2987+
}
2988+
2989+
int gc_some_work_left(void) JL_NOTSAFEPOINT
2990+
{
2991+
for (int i = gc_first_tid; i < gc_first_tid + jl_n_gcthreads; i++) {
2992+
jl_ptls_t ptls2 = gc_all_tls_states[i];
2993+
if (gc_some_work_left_in_queue(ptls2)) {
2994+
return 1;
2995+
}
2996+
}
2997+
int master_tid = jl_atomic_load(&gc_master_tid);
2998+
if (master_tid != -1) {
2999+
jl_ptls_t ptls2 = gc_all_tls_states[master_tid];
3000+
if (gc_some_work_left_in_queue(ptls2)) {
3001+
return 1;
3002+
}
3003+
}
3004+
return 0;
3005+
}
3006+
3007+
void gc_mark_loop_master_init(jl_ptls_t ptls)
3008+
{
3009+
jl_atomic_store(&gc_master_tid, ptls->tid);
3010+
// Wake threads up and try to do some work
3011+
uv_mutex_lock(&gc_threads_lock);
3012+
jl_atomic_fetch_add(&gc_n_threads_marking, 1);
3013+
uv_cond_broadcast(&gc_threads_cond);
3014+
uv_mutex_unlock(&gc_threads_lock);
3015+
gc_mark_and_steal(ptls);
3016+
jl_atomic_fetch_add(&gc_n_threads_marking, -1);
3017+
}
3018+
3019+
void gc_mark_loop_parallel(jl_ptls_t ptls)
29413020
{
2942-
int backoff = GC_BACKOFF_MIN;
2943-
if (master) {
2944-
jl_atomic_store(&gc_master_tid, ptls->tid);
2945-
// Wake threads up and try to do some work
3021+
gc_backoff_state_t s;
3022+
gc_backoff_reset_state(&s);
3023+
while (jl_atomic_load(&gc_n_threads_marking) > 0) {
3024+
if (gc_some_work_left()) {
3025+
// Try to become a thief while other threads are marking
3026+
jl_atomic_fetch_add(&gc_n_threads_marking, 1);
3027+
int marked = gc_mark_and_steal(ptls);
3028+
jl_atomic_fetch_add(&gc_n_threads_marking, -1);
3029+
if (marked) {
3030+
gc_backoff_reset_state(&s);
3031+
}
3032+
}
3033+
gc_backoff(&s);
3034+
}
3035+
}
3036+
3037+
void gc_mark_loop_master(jl_ptls_t ptls)
3038+
{
3039+
gc_mark_loop_master_init(ptls);
3040+
gc_mark_loop_parallel(ptls);
3041+
}
3042+
3043+
STATIC_INLINE int gc_may_mark(void) JL_NOTSAFEPOINT
3044+
{
3045+
return jl_atomic_load(&gc_n_threads_marking) > 0;
3046+
}
3047+
3048+
STATIC_INLINE int gc_may_sweep(jl_ptls_t ptls) JL_NOTSAFEPOINT
3049+
{
3050+
return jl_atomic_load(&ptls->gc_sweeps_requested) > 0;
3051+
}
3052+
3053+
void gc_worker_loop(jl_ptls_t ptls)
3054+
{
3055+
while (1) {
29463056
uv_mutex_lock(&gc_threads_lock);
2947-
jl_atomic_fetch_add(&gc_n_threads_marking, 1);
2948-
uv_cond_broadcast(&gc_threads_cond);
3057+
while (!gc_may_mark() && !gc_may_sweep(ptls)) {
3058+
uv_cond_wait(&gc_threads_cond, &gc_threads_lock);
3059+
}
29493060
uv_mutex_unlock(&gc_threads_lock);
2950-
gc_mark_and_steal(ptls);
2951-
jl_atomic_fetch_add(&gc_n_threads_marking, -1);
2952-
}
2953-
while (jl_atomic_load(&gc_n_threads_marking) > 0) {
2954-
// Try to become a thief while other threads are marking
2955-
jl_atomic_fetch_add(&gc_n_threads_marking, 1);
2956-
if (jl_atomic_load(&gc_master_tid) != -1) {
2957-
gc_mark_and_steal(ptls);
3061+
if (gc_may_mark()) {
3062+
gc_mark_loop_parallel(ptls);
3063+
}
3064+
if (gc_may_sweep(ptls)) { // not an else!
3065+
gc_sweep_pool_parallel();
3066+
jl_atomic_fetch_add(&ptls->gc_sweeps_requested, -1);
29583067
}
2959-
jl_atomic_fetch_add(&gc_n_threads_marking, -1);
2960-
// Failed to steal
2961-
gc_backoff(&backoff);
29623068
}
29633069
}
29643070

@@ -2968,7 +3074,7 @@ void gc_mark_loop(jl_ptls_t ptls)
29683074
gc_mark_loop_serial(ptls);
29693075
}
29703076
else {
2971-
gc_mark_loop_parallel(ptls, 1);
3077+
gc_mark_loop_master(ptls);
29723078
}
29733079
}
29743080

src/gc.h

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,16 @@ typedef struct _jl_gc_chunk_t {
113113
#define GC_PTR_QUEUE_INIT_SIZE (1 << 18) // initial size of queue of `jl_value_t *`
114114
#define GC_CHUNK_QUEUE_INIT_SIZE (1 << 14) // initial size of chunk-queue
115115

116+
// State used for GC scheduling
117+
typedef struct {
118+
#define GC_SPINNING 0
119+
#define GC_SLEEPING 1
120+
uint8_t backoff_phase; // whether the thread is spinning or sleeping
121+
// between failed steal attempts
122+
size_t backoff_lg2; // expontial backoff log counter
123+
size_t n_spins_at_max; // number of times it spinned at the maximum backoff
124+
} gc_backoff_state_t;
125+
116126
// layout for big (>2k) objects
117127

118128
JL_EXTENSION typedef struct _bigval_t {
@@ -189,19 +199,6 @@ extern jl_gc_page_stack_t global_page_pool_lazily_freed;
189199
extern jl_gc_page_stack_t global_page_pool_clean;
190200
extern jl_gc_page_stack_t global_page_pool_freed;
191201

192-
#define GC_BACKOFF_MIN 4
193-
#define GC_BACKOFF_MAX 12
194-
195-
STATIC_INLINE void gc_backoff(int *i) JL_NOTSAFEPOINT
196-
{
197-
if (*i < GC_BACKOFF_MAX) {
198-
(*i)++;
199-
}
200-
for (int j = 0; j < (1 << *i); j++) {
201-
jl_cpu_pause();
202-
}
203-
}
204-
205202
// Lock-free stack implementation taken
206203
// from Herlihy's "The Art of Multiprocessor Programming"
207204
// XXX: this is not a general-purpose lock-free stack. We can
@@ -452,8 +449,7 @@ void gc_mark_finlist_(jl_gc_markqueue_t *mq, jl_value_t **fl_begin, jl_value_t *
452449
void gc_mark_finlist(jl_gc_markqueue_t *mq, arraylist_t *list, size_t start) JL_NOTSAFEPOINT;
453450
void gc_mark_loop_serial_(jl_ptls_t ptls, jl_gc_markqueue_t *mq);
454451
void gc_mark_loop_serial(jl_ptls_t ptls);
455-
void gc_mark_loop_parallel(jl_ptls_t ptls, int master);
456-
void gc_sweep_pool_parallel(void);
452+
void gc_worker_loop(jl_ptls_t ptls);
457453
void gc_free_pages(void);
458454
void sweep_stack_pools(void);
459455
void jl_gc_debug_init(void);

src/partr.c

Lines changed: 2 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -108,17 +108,7 @@ void jl_init_threadinginfra(void)
108108

109109
void JL_NORETURN jl_finish_task(jl_task_t *t);
110110

111-
static inline int may_mark(void) JL_NOTSAFEPOINT
112-
{
113-
return (jl_atomic_load(&gc_n_threads_marking) > 0);
114-
}
115-
116-
static inline int may_sweep(jl_ptls_t ptls) JL_NOTSAFEPOINT
117-
{
118-
return (jl_atomic_load(&ptls->gc_sweeps_requested) > 0);
119-
}
120-
121-
// gc thread function
111+
// parallel gc thread function
122112
void jl_gc_threadfun(void *arg)
123113
{
124114
jl_threadarg_t *targ = (jl_threadarg_t*)arg;
@@ -133,20 +123,7 @@ void jl_gc_threadfun(void *arg)
133123
// free the thread argument here
134124
free(targ);
135125

136-
while (1) {
137-
uv_mutex_lock(&gc_threads_lock);
138-
while (!may_mark() && !may_sweep(ptls)) {
139-
uv_cond_wait(&gc_threads_cond, &gc_threads_lock);
140-
}
141-
uv_mutex_unlock(&gc_threads_lock);
142-
if (may_mark()) {
143-
gc_mark_loop_parallel(ptls, 0);
144-
}
145-
if (may_sweep(ptls)) { // not an else!
146-
gc_sweep_pool_parallel();
147-
jl_atomic_fetch_add(&ptls->gc_sweeps_requested, -1);
148-
}
149-
}
126+
gc_worker_loop(ptls);
150127
}
151128

152129
// thread function: used by all mutator threads except the main thread

0 commit comments

Comments
 (0)