Skip to content

Commit bab2f63

Browse files
committed
improvements on GC scheduler shutdown
1 parent 3c0d480 commit bab2f63

File tree

3 files changed

+151
-72
lines changed

3 files changed

+151
-72
lines changed

src/gc.c

Lines changed: 139 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -2814,13 +2814,16 @@ JL_EXTENSION NOINLINE void gc_mark_loop_serial(jl_ptls_t ptls)
28142814
gc_drain_own_chunkqueue(ptls, &ptls->mark_queue);
28152815
}
28162816

2817-
void gc_mark_and_steal(jl_ptls_t ptls)
2817+
int gc_mark_and_steal(jl_ptls_t ptls)
28182818
{
28192819
jl_gc_markqueue_t *mq = &ptls->mark_queue;
28202820
jl_gc_markqueue_t *mq_master = NULL;
28212821
int master_tid = jl_atomic_load(&gc_master_tid);
2822-
if (master_tid != -1)
2823-
mq_master = &gc_all_tls_states[master_tid]->mark_queue;
2822+
if (master_tid == -1) {
2823+
return 0;
2824+
}
2825+
mq_master = &gc_all_tls_states[master_tid]->mark_queue;
2826+
int marked = 0;
28242827
void *new_obj;
28252828
jl_gc_chunk_t c;
28262829
pop : {
@@ -2836,6 +2839,7 @@ void gc_mark_and_steal(jl_ptls_t ptls)
28362839
goto steal;
28372840
}
28382841
mark : {
2842+
marked = 1;
28392843
gc_mark_outrefs(ptls, mq, new_obj, 0);
28402844
goto pop;
28412845
}
@@ -2864,12 +2868,10 @@ void gc_mark_and_steal(jl_ptls_t ptls)
28642868
}
28652869
}
28662870
// Try to steal chunk from master thread
2867-
if (mq_master != NULL) {
2868-
c = gc_chunkqueue_steal_from(mq_master);
2869-
if (c.cid != GC_empty_chunk) {
2870-
gc_mark_chunk(ptls, mq, &c);
2871-
goto pop;
2872-
}
2871+
c = gc_chunkqueue_steal_from(mq_master);
2872+
if (c.cid != GC_empty_chunk) {
2873+
gc_mark_chunk(ptls, mq, &c);
2874+
goto pop;
28732875
}
28742876
// Try to steal pointer from random GC thread
28752877
for (int i = 0; i < 4 * jl_n_markthreads; i++) {
@@ -2886,37 +2888,141 @@ void gc_mark_and_steal(jl_ptls_t ptls)
28862888
if (new_obj != NULL)
28872889
goto mark;
28882890
}
2889-
// Try to steal pointer from master thread
2890-
if (mq_master != NULL) {
2891-
new_obj = gc_ptr_queue_steal_from(mq_master);
2892-
if (new_obj != NULL)
2893-
goto mark;
2891+
new_obj = gc_ptr_queue_steal_from(mq_master);
2892+
if (new_obj != NULL)
2893+
goto mark;
2894+
}
2895+
return marked;
2896+
}
2897+
2898+
#define GC_BACKOFF_MIN_LG2 (10)
2899+
#define GC_BACKOFF_MAX_LG2 (18)
2900+
2901+
STATIC_INLINE void gc_backoff_reset_state(gc_backoff_state_t *s) JL_NOTSAFEPOINT
2902+
{
2903+
s->backoff_phase = GC_SPINNING;
2904+
s->backoff_lg2 = GC_BACKOFF_MIN_LG2;
2905+
s->n_spins_at_max = 0;
2906+
}
2907+
2908+
STATIC_INLINE void gc_backoff(gc_backoff_state_t *s) JL_NOTSAFEPOINT
2909+
{
2910+
if (s->backoff_phase == GC_SPINNING) {
2911+
// spin for 2^backoff_lg2 cycles
2912+
uint64_t c0 = cycleclock();
2913+
do {
2914+
jl_cpu_pause();
2915+
} while (cycleclock() - c0 < (1 << s->backoff_lg2));
2916+
if (s->backoff_lg2 == GC_BACKOFF_MAX_LG2) {
2917+
s->n_spins_at_max++;
2918+
// has been spinning for a while... should
2919+
// just sleep in the next failed steal attempt
2920+
if (s->n_spins_at_max >= 8) {
2921+
s->backoff_phase = GC_SLEEPING;
2922+
}
2923+
}
2924+
else {
2925+
s->backoff_lg2++;
28942926
}
28952927
}
2928+
else {
2929+
// sleep for 1ms
2930+
uv_sleep(1);
2931+
}
28962932
}
28972933

2898-
void gc_mark_loop_parallel(jl_ptls_t ptls, int master)
2934+
int gc_some_work_left_in_queue(jl_ptls_t ptls) JL_NOTSAFEPOINT
2935+
{
2936+
if (jl_atomic_load_relaxed(&ptls->mark_queue.ptr_queue.bottom) !=
2937+
jl_atomic_load_relaxed(&ptls->mark_queue.ptr_queue.top)) {
2938+
return 1;
2939+
}
2940+
if (jl_atomic_load_relaxed(&ptls->mark_queue.chunk_queue.bottom) !=
2941+
jl_atomic_load_relaxed(&ptls->mark_queue.chunk_queue.top)) {
2942+
return 1;
2943+
}
2944+
return 0;
2945+
}
2946+
2947+
int gc_some_work_left(void) JL_NOTSAFEPOINT
2948+
{
2949+
for (int i = gc_first_tid; i < gc_first_tid + jl_n_markthreads; i++) {
2950+
jl_ptls_t ptls2 = gc_all_tls_states[i];
2951+
if (gc_some_work_left_in_queue(ptls2)) {
2952+
return 1;
2953+
}
2954+
}
2955+
int master_tid = jl_atomic_load(&gc_master_tid);
2956+
if (master_tid != -1) {
2957+
jl_ptls_t ptls2 = gc_all_tls_states[master_tid];
2958+
if (gc_some_work_left_in_queue(ptls2)) {
2959+
return 1;
2960+
}
2961+
}
2962+
return 0;
2963+
}
2964+
2965+
void gc_mark_loop_master_init(jl_ptls_t ptls)
2966+
{
2967+
jl_atomic_store(&gc_master_tid, ptls->tid);
2968+
// Wake threads up and try to do some work
2969+
uv_mutex_lock(&gc_threads_lock);
2970+
jl_atomic_fetch_add(&gc_n_threads_marking, 1);
2971+
uv_cond_broadcast(&gc_threads_cond);
2972+
uv_mutex_unlock(&gc_threads_lock);
2973+
gc_mark_and_steal(ptls);
2974+
jl_atomic_fetch_add(&gc_n_threads_marking, -1);
2975+
}
2976+
2977+
void gc_mark_loop_parallel(jl_ptls_t ptls)
28992978
{
2900-
int backoff = GC_BACKOFF_MIN;
2901-
if (master) {
2902-
jl_atomic_store(&gc_master_tid, ptls->tid);
2903-
// Wake threads up and try to do some work
2979+
gc_backoff_state_t s;
2980+
gc_backoff_reset_state(&s);
2981+
while (jl_atomic_load(&gc_n_threads_marking) > 0) {
2982+
if (gc_some_work_left()) {
2983+
// Try to become a thief while other threads are marking
2984+
jl_atomic_fetch_add(&gc_n_threads_marking, 1);
2985+
int marked = gc_mark_and_steal(ptls);
2986+
jl_atomic_fetch_add(&gc_n_threads_marking, -1);
2987+
if (marked) {
2988+
gc_backoff_reset_state(&s);
2989+
}
2990+
}
2991+
gc_backoff(&s);
2992+
}
2993+
}
2994+
2995+
void gc_mark_loop_master(jl_ptls_t ptls)
2996+
{
2997+
gc_mark_loop_master_init(ptls);
2998+
gc_mark_loop_parallel(ptls);
2999+
}
3000+
3001+
STATIC_INLINE int gc_may_mark(void) JL_NOTSAFEPOINT
3002+
{
3003+
return jl_atomic_load(&gc_n_threads_marking) > 0;
3004+
}
3005+
3006+
STATIC_INLINE int gc_may_sweep(jl_ptls_t ptls) JL_NOTSAFEPOINT
3007+
{
3008+
return jl_atomic_load(&ptls->gc_sweeps_requested) > 0;
3009+
}
3010+
3011+
void gc_worker_loop(jl_ptls_t ptls)
3012+
{
3013+
while (1) {
29043014
uv_mutex_lock(&gc_threads_lock);
2905-
jl_atomic_fetch_add(&gc_n_threads_marking, 1);
2906-
uv_cond_broadcast(&gc_threads_cond);
3015+
while (!gc_may_mark() && !gc_may_sweep(ptls)) {
3016+
uv_cond_wait(&gc_threads_cond, &gc_threads_lock);
3017+
}
29073018
uv_mutex_unlock(&gc_threads_lock);
2908-
gc_mark_and_steal(ptls);
2909-
jl_atomic_fetch_add(&gc_n_threads_marking, -1);
2910-
}
2911-
while (jl_atomic_load(&gc_n_threads_marking) > 0) {
2912-
// Try to become a thief while other threads are marking
2913-
jl_atomic_fetch_add(&gc_n_threads_marking, 1);
2914-
if (jl_atomic_load(&gc_master_tid) != -1) {
2915-
gc_mark_and_steal(ptls);
3019+
if (gc_may_mark()) {
3020+
gc_mark_loop_parallel(ptls);
3021+
}
3022+
if (gc_may_sweep(ptls)) { // not an else!
3023+
gc_sweep_pool_parallel();
3024+
jl_atomic_fetch_add(&ptls->gc_sweeps_requested, -1);
29163025
}
2917-
jl_atomic_fetch_add(&gc_n_threads_marking, -1);
2918-
// Failed to steal
2919-
gc_backoff(&backoff);
29203026
}
29213027
}
29223028

@@ -2926,7 +3032,7 @@ void gc_mark_loop(jl_ptls_t ptls)
29263032
gc_mark_loop_serial(ptls);
29273033
}
29283034
else {
2929-
gc_mark_loop_parallel(ptls, 1);
3035+
gc_mark_loop_master(ptls);
29303036
}
29313037
}
29323038

src/gc.h

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,16 @@ typedef struct _jl_gc_chunk_t {
114114
#define GC_PTR_QUEUE_INIT_SIZE (1 << 18) // initial size of queue of `jl_value_t *`
115115
#define GC_CHUNK_QUEUE_INIT_SIZE (1 << 14) // initial size of chunk-queue
116116

117+
// State used for GC scheduling
118+
typedef struct {
119+
#define GC_SPINNING 0
120+
#define GC_SLEEPING 1
121+
uint8_t backoff_phase; // whether the thread is spinning or sleeping
122+
// between failed steal attempts
123+
size_t backoff_lg2; // expontial backoff log counter
124+
size_t n_spins_at_max; // number of times it spinned at the maximum backoff
125+
} gc_backoff_state_t;
126+
117127
// layout for big (>2k) objects
118128

119129
JL_EXTENSION typedef struct _bigval_t {
@@ -186,19 +196,6 @@ extern jl_gc_page_stack_t global_page_pool_lazily_freed;
186196
extern jl_gc_page_stack_t global_page_pool_clean;
187197
extern jl_gc_page_stack_t global_page_pool_freed;
188198

189-
#define GC_BACKOFF_MIN 4
190-
#define GC_BACKOFF_MAX 12
191-
192-
STATIC_INLINE void gc_backoff(int *i) JL_NOTSAFEPOINT
193-
{
194-
if (*i < GC_BACKOFF_MAX) {
195-
(*i)++;
196-
}
197-
for (int j = 0; j < (1 << *i); j++) {
198-
jl_cpu_pause();
199-
}
200-
}
201-
202199
// Lock-free stack implementation taken
203200
// from Herlihy's "The Art of Multiprocessor Programming"
204201
// XXX: this is not a general-purpose lock-free stack. We can
@@ -458,8 +455,7 @@ void gc_mark_finlist_(jl_gc_markqueue_t *mq, jl_value_t **fl_begin, jl_value_t *
458455
void gc_mark_finlist(jl_gc_markqueue_t *mq, arraylist_t *list, size_t start) JL_NOTSAFEPOINT;
459456
void gc_mark_loop_serial_(jl_ptls_t ptls, jl_gc_markqueue_t *mq);
460457
void gc_mark_loop_serial(jl_ptls_t ptls);
461-
void gc_mark_loop_parallel(jl_ptls_t ptls, int master);
462-
void gc_sweep_pool_parallel(void);
458+
void gc_worker_loop(jl_ptls_t ptls);
463459
void gc_free_pages(void);
464460
void sweep_stack_pools(void);
465461
void jl_gc_debug_init(void);

src/partr.c

Lines changed: 1 addition & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -107,16 +107,6 @@ void jl_init_threadinginfra(void)
107107

108108
void JL_NORETURN jl_finish_task(jl_task_t *t);
109109

110-
static inline int may_mark(void) JL_NOTSAFEPOINT
111-
{
112-
return (jl_atomic_load(&gc_n_threads_marking) > 0);
113-
}
114-
115-
static inline int may_sweep(jl_ptls_t ptls) JL_NOTSAFEPOINT
116-
{
117-
return (jl_atomic_load(&ptls->gc_sweeps_requested) > 0);
118-
}
119-
120110
// parallel gc thread function
121111
void jl_parallel_gc_threadfun(void *arg)
122112
{
@@ -132,20 +122,7 @@ void jl_parallel_gc_threadfun(void *arg)
132122
// free the thread argument here
133123
free(targ);
134124

135-
while (1) {
136-
uv_mutex_lock(&gc_threads_lock);
137-
while (!may_mark() && !may_sweep(ptls)) {
138-
uv_cond_wait(&gc_threads_cond, &gc_threads_lock);
139-
}
140-
uv_mutex_unlock(&gc_threads_lock);
141-
if (may_mark()) {
142-
gc_mark_loop_parallel(ptls, 0);
143-
}
144-
if (may_sweep(ptls)) { // not an else!
145-
gc_sweep_pool_parallel();
146-
jl_atomic_fetch_add(&ptls->gc_sweeps_requested, -1);
147-
}
148-
}
125+
gc_worker_loop(ptls);
149126
}
150127

151128
// concurrent gc thread function

0 commit comments

Comments
 (0)