aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVladimir Davydov <vdavydov@parallels.com>2015-02-12 17:59:47 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2015-02-12 21:54:10 -0500
commitd6e0b7fa11862433773d986b5f995ffdf47ce672 (patch)
tree031830bb978d8861c3089941480de8effe9ccc6a
parentce3712d74d8ed531a9fd0fbb711ff8fefbacdd9f (diff)
slub: make dead caches discard free slabs immediately
To speed up further allocations SLUB may store empty slabs in per cpu/node partial lists instead of freeing them immediately. This prevents per memcg caches destruction, because kmem caches created for a memory cgroup are only destroyed after the last page charged to the cgroup is freed. To fix this issue, this patch resurrects approach first proposed in [1]. It forbids SLUB to cache empty slabs after the memory cgroup that the cache belongs to was destroyed. It is achieved by setting kmem_cache's cpu_partial and min_partial constants to 0 and tuning put_cpu_partial() so that it would drop frozen empty slabs immediately if cpu_partial = 0. The runtime overhead is minimal. From all the hot functions, we only touch relatively cold put_cpu_partial(): we make it call unfreeze_partials() after freezing a slab that belongs to an offline memory cgroup. Since slab freezing exists to avoid moving slabs from/to a partial list on free/alloc, and there can't be allocations from dead caches, it shouldn't cause any overhead. We do have to disable preemption for put_cpu_partial() to achieve that though. The original patch was accepted well and even merged to the mm tree. However, I decided to withdraw it due to changes happening to the memcg core at that time. I had an idea of introducing per-memcg shrinkers for kmem caches, but now, as memcg has finally settled down, I do not see it as an option, because SLUB shrinker would be too costly to call since SLUB does not keep free slabs on a separate list. Besides, we currently do not even call per-memcg shrinkers for offline memcgs. Overall, it would introduce much more complexity to both SLUB and memcg than this small patch. Regarding to SLAB, there's no problem with it, because it shrinks per-cpu/node caches periodically. Thanks to list_lru reparenting, we no longer keep entries for offline cgroups in per-memcg arrays (such as memcg_cache_params->memcg_caches), so we do not have to bother if a per-memcg cache will be shrunk a bit later than it could be. [1] http://thread.gmane.org/gmane.linux.kernel.mm/118649/focus=118650 Signed-off-by: Vladimir Davydov <vdavydov@parallels.com> Cc: Christoph Lameter <cl@linux.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: David Rientjes <rientjes@google.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/slab.c4
-rw-r--r--mm/slab.h2
-rw-r--r--mm/slab_common.c15
-rw-r--r--mm/slob.c2
-rw-r--r--mm/slub.c31
5 files changed, 43 insertions, 11 deletions
diff --git a/mm/slab.c b/mm/slab.c
index 7894017bc160..c4b89eaf4c96 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2382,7 +2382,7 @@ out:
2382 return nr_freed; 2382 return nr_freed;
2383} 2383}
2384 2384
2385int __kmem_cache_shrink(struct kmem_cache *cachep) 2385int __kmem_cache_shrink(struct kmem_cache *cachep, bool deactivate)
2386{ 2386{
2387 int ret = 0; 2387 int ret = 0;
2388 int node; 2388 int node;
@@ -2404,7 +2404,7 @@ int __kmem_cache_shutdown(struct kmem_cache *cachep)
2404{ 2404{
2405 int i; 2405 int i;
2406 struct kmem_cache_node *n; 2406 struct kmem_cache_node *n;
2407 int rc = __kmem_cache_shrink(cachep); 2407 int rc = __kmem_cache_shrink(cachep, false);
2408 2408
2409 if (rc) 2409 if (rc)
2410 return rc; 2410 return rc;
diff --git a/mm/slab.h b/mm/slab.h
index 0a56d76ac0e9..4c3ac12dd644 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -138,7 +138,7 @@ static inline unsigned long kmem_cache_flags(unsigned long object_size,
138#define CACHE_CREATE_MASK (SLAB_CORE_FLAGS | SLAB_DEBUG_FLAGS | SLAB_CACHE_FLAGS) 138#define CACHE_CREATE_MASK (SLAB_CORE_FLAGS | SLAB_DEBUG_FLAGS | SLAB_CACHE_FLAGS)
139 139
140int __kmem_cache_shutdown(struct kmem_cache *); 140int __kmem_cache_shutdown(struct kmem_cache *);
141int __kmem_cache_shrink(struct kmem_cache *); 141int __kmem_cache_shrink(struct kmem_cache *, bool);
142void slab_kmem_cache_release(struct kmem_cache *); 142void slab_kmem_cache_release(struct kmem_cache *);
143 143
144struct seq_file; 144struct seq_file;
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 0873bcc61c7a..1a1cc89acaa3 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -549,10 +549,13 @@ void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg)
549{ 549{
550 int idx; 550 int idx;
551 struct memcg_cache_array *arr; 551 struct memcg_cache_array *arr;
552 struct kmem_cache *s; 552 struct kmem_cache *s, *c;
553 553
554 idx = memcg_cache_id(memcg); 554 idx = memcg_cache_id(memcg);
555 555
556 get_online_cpus();
557 get_online_mems();
558
556 mutex_lock(&slab_mutex); 559 mutex_lock(&slab_mutex);
557 list_for_each_entry(s, &slab_caches, list) { 560 list_for_each_entry(s, &slab_caches, list) {
558 if (!is_root_cache(s)) 561 if (!is_root_cache(s))
@@ -560,9 +563,17 @@ void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg)
560 563
561 arr = rcu_dereference_protected(s->memcg_params.memcg_caches, 564 arr = rcu_dereference_protected(s->memcg_params.memcg_caches,
562 lockdep_is_held(&slab_mutex)); 565 lockdep_is_held(&slab_mutex));
566 c = arr->entries[idx];
567 if (!c)
568 continue;
569
570 __kmem_cache_shrink(c, true);
563 arr->entries[idx] = NULL; 571 arr->entries[idx] = NULL;
564 } 572 }
565 mutex_unlock(&slab_mutex); 573 mutex_unlock(&slab_mutex);
574
575 put_online_mems();
576 put_online_cpus();
566} 577}
567 578
568void memcg_destroy_kmem_caches(struct mem_cgroup *memcg) 579void memcg_destroy_kmem_caches(struct mem_cgroup *memcg)
@@ -649,7 +660,7 @@ int kmem_cache_shrink(struct kmem_cache *cachep)
649 660
650 get_online_cpus(); 661 get_online_cpus();
651 get_online_mems(); 662 get_online_mems();
652 ret = __kmem_cache_shrink(cachep); 663 ret = __kmem_cache_shrink(cachep, false);
653 put_online_mems(); 664 put_online_mems();
654 put_online_cpus(); 665 put_online_cpus();
655 return ret; 666 return ret;
diff --git a/mm/slob.c b/mm/slob.c
index 96a86206a26b..94a7fede6d48 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -618,7 +618,7 @@ int __kmem_cache_shutdown(struct kmem_cache *c)
618 return 0; 618 return 0;
619} 619}
620 620
621int __kmem_cache_shrink(struct kmem_cache *d) 621int __kmem_cache_shrink(struct kmem_cache *d, bool deactivate)
622{ 622{
623 return 0; 623 return 0;
624} 624}
diff --git a/mm/slub.c b/mm/slub.c
index 7fa27aee9b6e..06cdb1829dc9 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2007,6 +2007,7 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
2007 int pages; 2007 int pages;
2008 int pobjects; 2008 int pobjects;
2009 2009
2010 preempt_disable();
2010 do { 2011 do {
2011 pages = 0; 2012 pages = 0;
2012 pobjects = 0; 2013 pobjects = 0;
@@ -2040,6 +2041,14 @@ static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
2040 2041
2041 } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) 2042 } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page)
2042 != oldpage); 2043 != oldpage);
2044 if (unlikely(!s->cpu_partial)) {
2045 unsigned long flags;
2046
2047 local_irq_save(flags);
2048 unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
2049 local_irq_restore(flags);
2050 }
2051 preempt_enable();
2043#endif 2052#endif
2044} 2053}
2045 2054
@@ -3369,7 +3378,7 @@ EXPORT_SYMBOL(kfree);
3369 * being allocated from last increasing the chance that the last objects 3378 * being allocated from last increasing the chance that the last objects
3370 * are freed in them. 3379 * are freed in them.
3371 */ 3380 */
3372int __kmem_cache_shrink(struct kmem_cache *s) 3381int __kmem_cache_shrink(struct kmem_cache *s, bool deactivate)
3373{ 3382{
3374 int node; 3383 int node;
3375 int i; 3384 int i;
@@ -3381,11 +3390,23 @@ int __kmem_cache_shrink(struct kmem_cache *s)
3381 unsigned long flags; 3390 unsigned long flags;
3382 int ret = 0; 3391 int ret = 0;
3383 3392
3393 if (deactivate) {
3394 /*
3395 * Disable empty slabs caching. Used to avoid pinning offline
3396 * memory cgroups by kmem pages that can be freed.
3397 */
3398 s->cpu_partial = 0;
3399 s->min_partial = 0;
3400
3401 /*
3402 * s->cpu_partial is checked locklessly (see put_cpu_partial),
3403 * so we have to make sure the change is visible.
3404 */
3405 kick_all_cpus_sync();
3406 }
3407
3384 flush_all(s); 3408 flush_all(s);
3385 for_each_kmem_cache_node(s, node, n) { 3409 for_each_kmem_cache_node(s, node, n) {
3386 if (!n->nr_partial)
3387 continue;
3388
3389 INIT_LIST_HEAD(&discard); 3410 INIT_LIST_HEAD(&discard);
3390 for (i = 0; i < SHRINK_PROMOTE_MAX; i++) 3411 for (i = 0; i < SHRINK_PROMOTE_MAX; i++)
3391 INIT_LIST_HEAD(promote + i); 3412 INIT_LIST_HEAD(promote + i);
@@ -3440,7 +3461,7 @@ static int slab_mem_going_offline_callback(void *arg)
3440 3461
3441 mutex_lock(&slab_mutex); 3462 mutex_lock(&slab_mutex);
3442 list_for_each_entry(s, &slab_caches, list) 3463 list_for_each_entry(s, &slab_caches, list)
3443 __kmem_cache_shrink(s); 3464 __kmem_cache_shrink(s, false);
3444 mutex_unlock(&slab_mutex); 3465 mutex_unlock(&slab_mutex);
3445 3466
3446 return 0; 3467 return 0;