Skip to content

Commit 01fb58b

Browse files
htejuntorvalds
authored andcommitted
slab: remove synchronous synchronize_sched() from memcg cache deactivation path
With kmem cgroup support enabled, kmem_caches can be created and destroyed frequently and a great number of near empty kmem_caches can accumulate if there are a lot of transient cgroups and the system is not under memory pressure. When memory reclaim starts under such conditions, it can lead to consecutive deactivation and destruction of many kmem_caches, easily hundreds of thousands on moderately large systems, exposing scalability issues in the current slab management code. This is one of the patches to address the issue. slub uses synchronize_sched() to deactivate a memcg cache. synchronize_sched() is an expensive and slow operation and doesn't scale when a huge number of caches are destroyed back-to-back. While there used to be a simple batching mechanism, the batching was too restricted to be helpful. This patch implements slab_deactivate_memcg_cache_rcu_sched() which slub can use to schedule sched RCU callback instead of performing synchronize_sched() synchronously while holding cgroup_mutex. While this adds online cpus, mems and slab_mutex operations, operating on these locks back-to-back from the same kworker, which is what's gonna happen when there are many to deactivate, isn't expensive at all and this gets rid of the scalability problem completely. Link: http://lkml.kernel.org/r/[email protected] Signed-off-by: Tejun Heo <[email protected]> Reported-by: Jay Vana <[email protected]> Acked-by: Vladimir Davydov <[email protected]> Cc: Christoph Lameter <[email protected]> Cc: Pekka Enberg <[email protected]> Cc: David Rientjes <[email protected]> Cc: Joonsoo Kim <[email protected]> Signed-off-by: Andrew Morton <[email protected]> Signed-off-by: Linus Torvalds <[email protected]>
1 parent c9fc586 commit 01fb58b

File tree

4 files changed

+76
-4
lines changed

4 files changed

+76
-4
lines changed

include/linux/slab.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -582,6 +582,12 @@ struct memcg_cache_params {
582582
struct mem_cgroup *memcg;
583583
struct list_head children_node;
584584
struct list_head kmem_caches_node;
585+
586+
void (*deact_fn)(struct kmem_cache *);
587+
union {
588+
struct rcu_head deact_rcu_head;
589+
struct work_struct deact_work;
590+
};
585591
};
586592
};
587593
};

mm/slab.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,8 @@ static __always_inline void memcg_uncharge_slab(struct page *page, int order,
307307

308308
extern void slab_init_memcg_params(struct kmem_cache *);
309309
extern void memcg_link_cache(struct kmem_cache *s);
310+
extern void slab_deactivate_memcg_cache_rcu_sched(struct kmem_cache *s,
311+
void (*deact_fn)(struct kmem_cache *));
310312

311313
#else /* CONFIG_MEMCG && !CONFIG_SLOB */
312314

mm/slab_common.c

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -627,6 +627,66 @@ void memcg_create_kmem_cache(struct mem_cgroup *memcg,
627627
put_online_cpus();
628628
}
629629

630+
static void kmemcg_deactivate_workfn(struct work_struct *work)
631+
{
632+
struct kmem_cache *s = container_of(work, struct kmem_cache,
633+
memcg_params.deact_work);
634+
635+
get_online_cpus();
636+
get_online_mems();
637+
638+
mutex_lock(&slab_mutex);
639+
640+
s->memcg_params.deact_fn(s);
641+
642+
mutex_unlock(&slab_mutex);
643+
644+
put_online_mems();
645+
put_online_cpus();
646+
647+
/* done, put the ref from slab_deactivate_memcg_cache_rcu_sched() */
648+
css_put(&s->memcg_params.memcg->css);
649+
}
650+
651+
static void kmemcg_deactivate_rcufn(struct rcu_head *head)
652+
{
653+
struct kmem_cache *s = container_of(head, struct kmem_cache,
654+
memcg_params.deact_rcu_head);
655+
656+
/*
657+
* We need to grab blocking locks. Bounce to ->deact_work. The
658+
* work item shares the space with the RCU head and can't be
659+
* initialized eariler.
660+
*/
661+
INIT_WORK(&s->memcg_params.deact_work, kmemcg_deactivate_workfn);
662+
schedule_work(&s->memcg_params.deact_work);
663+
}
664+
665+
/**
666+
* slab_deactivate_memcg_cache_rcu_sched - schedule deactivation after a
667+
* sched RCU grace period
668+
* @s: target kmem_cache
669+
* @deact_fn: deactivation function to call
670+
*
671+
* Schedule @deact_fn to be invoked with online cpus, mems and slab_mutex
672+
* held after a sched RCU grace period. The slab is guaranteed to stay
673+
* alive until @deact_fn is finished. This is to be used from
674+
* __kmemcg_cache_deactivate().
675+
*/
676+
void slab_deactivate_memcg_cache_rcu_sched(struct kmem_cache *s,
677+
void (*deact_fn)(struct kmem_cache *))
678+
{
679+
if (WARN_ON_ONCE(is_root_cache(s)) ||
680+
WARN_ON_ONCE(s->memcg_params.deact_fn))
681+
return;
682+
683+
/* pin memcg so that @s doesn't get destroyed in the middle */
684+
css_get(&s->memcg_params.memcg->css);
685+
686+
s->memcg_params.deact_fn = deact_fn;
687+
call_rcu_sched(&s->memcg_params.deact_rcu_head, kmemcg_deactivate_rcufn);
688+
}
689+
630690
void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg)
631691
{
632692
int idx;

mm/slub.c

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3957,6 +3957,12 @@ int __kmem_cache_shrink(struct kmem_cache *s)
39573957
}
39583958

39593959
#ifdef CONFIG_MEMCG
3960+
static void kmemcg_cache_deact_after_rcu(struct kmem_cache *s)
3961+
{
3962+
/* called with all the locks held after a sched RCU grace period */
3963+
__kmem_cache_shrink(s);
3964+
}
3965+
39603966
void __kmemcg_cache_deactivate(struct kmem_cache *s)
39613967
{
39623968
/*
@@ -3968,11 +3974,9 @@ void __kmemcg_cache_deactivate(struct kmem_cache *s)
39683974

39693975
/*
39703976
* s->cpu_partial is checked locklessly (see put_cpu_partial), so
3971-
* we have to make sure the change is visible.
3977+
* we have to make sure the change is visible before shrinking.
39723978
*/
3973-
synchronize_sched();
3974-
3975-
__kmem_cache_shrink(s);
3979+
slab_deactivate_memcg_cache_rcu_sched(s, kmemcg_cache_deact_after_rcu);
39763980
}
39773981
#endif
39783982

0 commit comments

Comments
 (0)