aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVladimir Davydov <vdavydov.dev@gmail.com>2017-03-16 20:48:31 -0400
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2017-03-22 07:43:38 -0400
commitbc01eb939899762eede303ffbbbfcda197316234 (patch)
tree551b4b7771ca8137e71306cdc908e23fc0a72127
parentbd2de45031b9b05738c91b87c1c360471c075bbd (diff)
slub: move synchronize_sched out of slab_mutex on shrink
[ Upstream commit 89e364db71fb5e7fc8d93228152abfa67daf35fa ] synchronize_sched() is a heavy operation and calling it per each cache owned by a memory cgroup being destroyed may take quite some time. What is worse, it's currently called under the slab_mutex, stalling all works doing cache creation/destruction. Actually, there isn't much point in calling synchronize_sched() for each cache - it's enough to call it just once - after setting cpu_partial for all caches and before shrinking them. This way, we can also move it out of the slab_mutex, which we have to hold for iterating over the slab cache list. Link: https://bugzilla.kernel.org/show_bug.cgi?id=172991 Link: http://lkml.kernel.org/r/0a10d71ecae3db00fb4421bcd3f82bcc911f4be4.1475329751.git.vdavydov.dev@gmail.com Signed-off-by: Vladimir Davydov <vdavydov.dev@gmail.com> Reported-by: Doug Smythies <dsmythies@telus.net> Acked-by: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Christoph Lameter <cl@linux.com> Cc: David Rientjes <rientjes@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Michal Hocko <mhocko@kernel.org> Cc: Pekka Enberg <penberg@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Sasha Levin <alexander.levin@verizon.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r--mm/slab.c4
-rw-r--r--mm/slab.h2
-rw-r--r--mm/slab_common.c27
-rw-r--r--mm/slob.c2
-rw-r--r--mm/slub.c19
5 files changed, 31 insertions, 23 deletions
diff --git a/mm/slab.c b/mm/slab.c
index bd878f051a3b..1f82d16a0518 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2332,7 +2332,7 @@ out:
2332 return nr_freed; 2332 return nr_freed;
2333} 2333}
2334 2334
2335int __kmem_cache_shrink(struct kmem_cache *cachep, bool deactivate) 2335int __kmem_cache_shrink(struct kmem_cache *cachep)
2336{ 2336{
2337 int ret = 0; 2337 int ret = 0;
2338 int node; 2338 int node;
@@ -2352,7 +2352,7 @@ int __kmem_cache_shrink(struct kmem_cache *cachep, bool deactivate)
2352 2352
2353int __kmem_cache_shutdown(struct kmem_cache *cachep) 2353int __kmem_cache_shutdown(struct kmem_cache *cachep)
2354{ 2354{
2355 return __kmem_cache_shrink(cachep, false); 2355 return __kmem_cache_shrink(cachep);
2356} 2356}
2357 2357
2358void __kmem_cache_release(struct kmem_cache *cachep) 2358void __kmem_cache_release(struct kmem_cache *cachep)
diff --git a/mm/slab.h b/mm/slab.h
index bc05fdc3edce..ceb7d70cdb76 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -146,7 +146,7 @@ static inline unsigned long kmem_cache_flags(unsigned long object_size,
146 146
147int __kmem_cache_shutdown(struct kmem_cache *); 147int __kmem_cache_shutdown(struct kmem_cache *);
148void __kmem_cache_release(struct kmem_cache *); 148void __kmem_cache_release(struct kmem_cache *);
149int __kmem_cache_shrink(struct kmem_cache *, bool); 149int __kmem_cache_shrink(struct kmem_cache *);
150void slab_kmem_cache_release(struct kmem_cache *); 150void slab_kmem_cache_release(struct kmem_cache *);
151 151
152struct seq_file; 152struct seq_file;
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 329b03843863..5d2f24fbafc5 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -573,6 +573,29 @@ void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg)
573 get_online_cpus(); 573 get_online_cpus();
574 get_online_mems(); 574 get_online_mems();
575 575
576#ifdef CONFIG_SLUB
577 /*
578 * In case of SLUB, we need to disable empty slab caching to
579 * avoid pinning the offline memory cgroup by freeable kmem
580 * pages charged to it. SLAB doesn't need this, as it
581 * periodically purges unused slabs.
582 */
583 mutex_lock(&slab_mutex);
584 list_for_each_entry(s, &slab_caches, list) {
585 c = is_root_cache(s) ? cache_from_memcg_idx(s, idx) : NULL;
586 if (c) {
587 c->cpu_partial = 0;
588 c->min_partial = 0;
589 }
590 }
591 mutex_unlock(&slab_mutex);
592 /*
593 * kmem_cache->cpu_partial is checked locklessly (see
594 * put_cpu_partial()). Make sure the change is visible.
595 */
596 synchronize_sched();
597#endif
598
576 mutex_lock(&slab_mutex); 599 mutex_lock(&slab_mutex);
577 list_for_each_entry(s, &slab_caches, list) { 600 list_for_each_entry(s, &slab_caches, list) {
578 if (!is_root_cache(s)) 601 if (!is_root_cache(s))
@@ -584,7 +607,7 @@ void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg)
584 if (!c) 607 if (!c)
585 continue; 608 continue;
586 609
587 __kmem_cache_shrink(c, true); 610 __kmem_cache_shrink(c);
588 arr->entries[idx] = NULL; 611 arr->entries[idx] = NULL;
589 } 612 }
590 mutex_unlock(&slab_mutex); 613 mutex_unlock(&slab_mutex);
@@ -755,7 +778,7 @@ int kmem_cache_shrink(struct kmem_cache *cachep)
755 get_online_cpus(); 778 get_online_cpus();
756 get_online_mems(); 779 get_online_mems();
757 kasan_cache_shrink(cachep); 780 kasan_cache_shrink(cachep);
758 ret = __kmem_cache_shrink(cachep, false); 781 ret = __kmem_cache_shrink(cachep);
759 put_online_mems(); 782 put_online_mems();
760 put_online_cpus(); 783 put_online_cpus();
761 return ret; 784 return ret;
diff --git a/mm/slob.c b/mm/slob.c
index 5ec158054ffe..eac04d4357ec 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -634,7 +634,7 @@ void __kmem_cache_release(struct kmem_cache *c)
634{ 634{
635} 635}
636 636
637int __kmem_cache_shrink(struct kmem_cache *d, bool deactivate) 637int __kmem_cache_shrink(struct kmem_cache *d)
638{ 638{
639 return 0; 639 return 0;
640} 640}
diff --git a/mm/slub.c b/mm/slub.c
index 7aa0e97af928..58c7526f8de2 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3887,7 +3887,7 @@ EXPORT_SYMBOL(kfree);
3887 * being allocated from last increasing the chance that the last objects 3887 * being allocated from last increasing the chance that the last objects
3888 * are freed in them. 3888 * are freed in them.
3889 */ 3889 */
3890int __kmem_cache_shrink(struct kmem_cache *s, bool deactivate) 3890int __kmem_cache_shrink(struct kmem_cache *s)
3891{ 3891{
3892 int node; 3892 int node;
3893 int i; 3893 int i;
@@ -3899,21 +3899,6 @@ int __kmem_cache_shrink(struct kmem_cache *s, bool deactivate)
3899 unsigned long flags; 3899 unsigned long flags;
3900 int ret = 0; 3900 int ret = 0;
3901 3901
3902 if (deactivate) {
3903 /*
3904 * Disable empty slabs caching. Used to avoid pinning offline
3905 * memory cgroups by kmem pages that can be freed.
3906 */
3907 s->cpu_partial = 0;
3908 s->min_partial = 0;
3909
3910 /*
3911 * s->cpu_partial is checked locklessly (see put_cpu_partial),
3912 * so we have to make sure the change is visible.
3913 */
3914 synchronize_sched();
3915 }
3916
3917 flush_all(s); 3902 flush_all(s);
3918 for_each_kmem_cache_node(s, node, n) { 3903 for_each_kmem_cache_node(s, node, n) {
3919 INIT_LIST_HEAD(&discard); 3904 INIT_LIST_HEAD(&discard);
@@ -3970,7 +3955,7 @@ static int slab_mem_going_offline_callback(void *arg)
3970 3955
3971 mutex_lock(&slab_mutex); 3956 mutex_lock(&slab_mutex);
3972 list_for_each_entry(s, &slab_caches, list) 3957 list_for_each_entry(s, &slab_caches, list)
3973 __kmem_cache_shrink(s, false); 3958 __kmem_cache_shrink(s);
3974 mutex_unlock(&slab_mutex); 3959 mutex_unlock(&slab_mutex);
3975 3960
3976 return 0; 3961 return 0;