diff options
author | Vladimir Davydov <vdavydov.dev@gmail.com> | 2017-03-16 20:48:31 -0400 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2017-03-22 07:43:38 -0400 |
commit | bc01eb939899762eede303ffbbbfcda197316234 (patch) | |
tree | 551b4b7771ca8137e71306cdc908e23fc0a72127 | |
parent | bd2de45031b9b05738c91b87c1c360471c075bbd (diff) |
slub: move synchronize_sched out of slab_mutex on shrink
[ Upstream commit 89e364db71fb5e7fc8d93228152abfa67daf35fa ]
synchronize_sched() is a heavy operation and calling it per each cache
owned by a memory cgroup being destroyed may take quite some time. What
is worse, it's currently called under the slab_mutex, stalling all works
doing cache creation/destruction.
Actually, there isn't much point in calling synchronize_sched() for each
cache - it's enough to call it just once - after setting cpu_partial for
all caches and before shrinking them. This way, we can also move it out
of the slab_mutex, which we have to hold for iterating over the slab
cache list.
Link: https://bugzilla.kernel.org/show_bug.cgi?id=172991
Link: http://lkml.kernel.org/r/0a10d71ecae3db00fb4421bcd3f82bcc911f4be4.1475329751.git.vdavydov.dev@gmail.com
Signed-off-by: Vladimir Davydov <vdavydov.dev@gmail.com>
Reported-by: Doug Smythies <dsmythies@telus.net>
Acked-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Pekka Enberg <penberg@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sasha Levin <alexander.levin@verizon.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r-- | mm/slab.c | 4 | ||||
-rw-r--r-- | mm/slab.h | 2 | ||||
-rw-r--r-- | mm/slab_common.c | 27 | ||||
-rw-r--r-- | mm/slob.c | 2 | ||||
-rw-r--r-- | mm/slub.c | 19 |
5 files changed, 31 insertions, 23 deletions
@@ -2332,7 +2332,7 @@ out: | |||
2332 | return nr_freed; | 2332 | return nr_freed; |
2333 | } | 2333 | } |
2334 | 2334 | ||
2335 | int __kmem_cache_shrink(struct kmem_cache *cachep, bool deactivate) | 2335 | int __kmem_cache_shrink(struct kmem_cache *cachep) |
2336 | { | 2336 | { |
2337 | int ret = 0; | 2337 | int ret = 0; |
2338 | int node; | 2338 | int node; |
@@ -2352,7 +2352,7 @@ int __kmem_cache_shrink(struct kmem_cache *cachep, bool deactivate) | |||
2352 | 2352 | ||
2353 | int __kmem_cache_shutdown(struct kmem_cache *cachep) | 2353 | int __kmem_cache_shutdown(struct kmem_cache *cachep) |
2354 | { | 2354 | { |
2355 | return __kmem_cache_shrink(cachep, false); | 2355 | return __kmem_cache_shrink(cachep); |
2356 | } | 2356 | } |
2357 | 2357 | ||
2358 | void __kmem_cache_release(struct kmem_cache *cachep) | 2358 | void __kmem_cache_release(struct kmem_cache *cachep) |
@@ -146,7 +146,7 @@ static inline unsigned long kmem_cache_flags(unsigned long object_size, | |||
146 | 146 | ||
147 | int __kmem_cache_shutdown(struct kmem_cache *); | 147 | int __kmem_cache_shutdown(struct kmem_cache *); |
148 | void __kmem_cache_release(struct kmem_cache *); | 148 | void __kmem_cache_release(struct kmem_cache *); |
149 | int __kmem_cache_shrink(struct kmem_cache *, bool); | 149 | int __kmem_cache_shrink(struct kmem_cache *); |
150 | void slab_kmem_cache_release(struct kmem_cache *); | 150 | void slab_kmem_cache_release(struct kmem_cache *); |
151 | 151 | ||
152 | struct seq_file; | 152 | struct seq_file; |
diff --git a/mm/slab_common.c b/mm/slab_common.c index 329b03843863..5d2f24fbafc5 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c | |||
@@ -573,6 +573,29 @@ void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg) | |||
573 | get_online_cpus(); | 573 | get_online_cpus(); |
574 | get_online_mems(); | 574 | get_online_mems(); |
575 | 575 | ||
576 | #ifdef CONFIG_SLUB | ||
577 | /* | ||
578 | * In case of SLUB, we need to disable empty slab caching to | ||
579 | * avoid pinning the offline memory cgroup by freeable kmem | ||
580 | * pages charged to it. SLAB doesn't need this, as it | ||
581 | * periodically purges unused slabs. | ||
582 | */ | ||
583 | mutex_lock(&slab_mutex); | ||
584 | list_for_each_entry(s, &slab_caches, list) { | ||
585 | c = is_root_cache(s) ? cache_from_memcg_idx(s, idx) : NULL; | ||
586 | if (c) { | ||
587 | c->cpu_partial = 0; | ||
588 | c->min_partial = 0; | ||
589 | } | ||
590 | } | ||
591 | mutex_unlock(&slab_mutex); | ||
592 | /* | ||
593 | * kmem_cache->cpu_partial is checked locklessly (see | ||
594 | * put_cpu_partial()). Make sure the change is visible. | ||
595 | */ | ||
596 | synchronize_sched(); | ||
597 | #endif | ||
598 | |||
576 | mutex_lock(&slab_mutex); | 599 | mutex_lock(&slab_mutex); |
577 | list_for_each_entry(s, &slab_caches, list) { | 600 | list_for_each_entry(s, &slab_caches, list) { |
578 | if (!is_root_cache(s)) | 601 | if (!is_root_cache(s)) |
@@ -584,7 +607,7 @@ void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg) | |||
584 | if (!c) | 607 | if (!c) |
585 | continue; | 608 | continue; |
586 | 609 | ||
587 | __kmem_cache_shrink(c, true); | 610 | __kmem_cache_shrink(c); |
588 | arr->entries[idx] = NULL; | 611 | arr->entries[idx] = NULL; |
589 | } | 612 | } |
590 | mutex_unlock(&slab_mutex); | 613 | mutex_unlock(&slab_mutex); |
@@ -755,7 +778,7 @@ int kmem_cache_shrink(struct kmem_cache *cachep) | |||
755 | get_online_cpus(); | 778 | get_online_cpus(); |
756 | get_online_mems(); | 779 | get_online_mems(); |
757 | kasan_cache_shrink(cachep); | 780 | kasan_cache_shrink(cachep); |
758 | ret = __kmem_cache_shrink(cachep, false); | 781 | ret = __kmem_cache_shrink(cachep); |
759 | put_online_mems(); | 782 | put_online_mems(); |
760 | put_online_cpus(); | 783 | put_online_cpus(); |
761 | return ret; | 784 | return ret; |
@@ -634,7 +634,7 @@ void __kmem_cache_release(struct kmem_cache *c) | |||
634 | { | 634 | { |
635 | } | 635 | } |
636 | 636 | ||
637 | int __kmem_cache_shrink(struct kmem_cache *d, bool deactivate) | 637 | int __kmem_cache_shrink(struct kmem_cache *d) |
638 | { | 638 | { |
639 | return 0; | 639 | return 0; |
640 | } | 640 | } |
@@ -3887,7 +3887,7 @@ EXPORT_SYMBOL(kfree); | |||
3887 | * being allocated from last increasing the chance that the last objects | 3887 | * being allocated from last increasing the chance that the last objects |
3888 | * are freed in them. | 3888 | * are freed in them. |
3889 | */ | 3889 | */ |
3890 | int __kmem_cache_shrink(struct kmem_cache *s, bool deactivate) | 3890 | int __kmem_cache_shrink(struct kmem_cache *s) |
3891 | { | 3891 | { |
3892 | int node; | 3892 | int node; |
3893 | int i; | 3893 | int i; |
@@ -3899,21 +3899,6 @@ int __kmem_cache_shrink(struct kmem_cache *s, bool deactivate) | |||
3899 | unsigned long flags; | 3899 | unsigned long flags; |
3900 | int ret = 0; | 3900 | int ret = 0; |
3901 | 3901 | ||
3902 | if (deactivate) { | ||
3903 | /* | ||
3904 | * Disable empty slabs caching. Used to avoid pinning offline | ||
3905 | * memory cgroups by kmem pages that can be freed. | ||
3906 | */ | ||
3907 | s->cpu_partial = 0; | ||
3908 | s->min_partial = 0; | ||
3909 | |||
3910 | /* | ||
3911 | * s->cpu_partial is checked locklessly (see put_cpu_partial), | ||
3912 | * so we have to make sure the change is visible. | ||
3913 | */ | ||
3914 | synchronize_sched(); | ||
3915 | } | ||
3916 | |||
3917 | flush_all(s); | 3902 | flush_all(s); |
3918 | for_each_kmem_cache_node(s, node, n) { | 3903 | for_each_kmem_cache_node(s, node, n) { |
3919 | INIT_LIST_HEAD(&discard); | 3904 | INIT_LIST_HEAD(&discard); |
@@ -3970,7 +3955,7 @@ static int slab_mem_going_offline_callback(void *arg) | |||
3970 | 3955 | ||
3971 | mutex_lock(&slab_mutex); | 3956 | mutex_lock(&slab_mutex); |
3972 | list_for_each_entry(s, &slab_caches, list) | 3957 | list_for_each_entry(s, &slab_caches, list) |
3973 | __kmem_cache_shrink(s, false); | 3958 | __kmem_cache_shrink(s); |
3974 | mutex_unlock(&slab_mutex); | 3959 | mutex_unlock(&slab_mutex); |
3975 | 3960 | ||
3976 | return 0; | 3961 | return 0; |