summaryrefslogtreecommitdiffstats
path: root/mm/slab.c
diff options
context:
space:
mode:
authorJoonsoo Kim <iamjoonsoo.kim@lge.com>2016-05-19 20:10:02 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-05-19 22:12:14 -0400
commit18726ca8b34bbfb3ab5a1c0a52a5d8dd392466ed (patch)
tree90bdc46a1eda7b6b67c54bfb78ed851478aac027 /mm/slab.c
parent19d795b677bda354644cfb87a196b087fdc2a965 (diff)
mm/slab: fix the theoretical race by holding proper lock
While processing concurrent allocation, SLAB could be contended a lot because it did a lots of work with holding a lock. This patchset try to reduce the number of critical section to reduce lock contention. Major changes are lockless decision to allocate more slab and lockless cpu cache refill from the newly allocated slab. Below is the result of concurrent allocation/free in slab allocation benchmark made by Christoph a long time ago. I make the output simpler. The number shows cycle count during alloc/free respectively so less is better. * Before Kmalloc N*alloc N*free(32): Average=365/806 Kmalloc N*alloc N*free(64): Average=452/690 Kmalloc N*alloc N*free(128): Average=736/886 Kmalloc N*alloc N*free(256): Average=1167/985 Kmalloc N*alloc N*free(512): Average=2088/1125 Kmalloc N*alloc N*free(1024): Average=4115/1184 Kmalloc N*alloc N*free(2048): Average=8451/1748 Kmalloc N*alloc N*free(4096): Average=16024/2048 * After Kmalloc N*alloc N*free(32): Average=344/792 Kmalloc N*alloc N*free(64): Average=347/882 Kmalloc N*alloc N*free(128): Average=390/959 Kmalloc N*alloc N*free(256): Average=393/1067 Kmalloc N*alloc N*free(512): Average=683/1229 Kmalloc N*alloc N*free(1024): Average=1295/1325 Kmalloc N*alloc N*free(2048): Average=2513/1664 Kmalloc N*alloc N*free(4096): Average=4742/2172 It shows that performance improves greatly (roughly more than 50%) for the object class whose size is more than 128 bytes. This patch (of 11): If we don't hold neither the slab_mutex nor the node lock, node's shared array cache could be freed and re-populated. If __kmem_cache_shrink() is called at the same time, it will call drain_array() with n->shared without holding node lock so problem can happen. This patch fix the situation by holding the node lock before trying to drain the shared array. In addition, add a debug check to confirm that n->shared access race doesn't exist. Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Jesper Dangaard Brouer <brouer@redhat.com> Cc: Christoph Lameter <cl@linux.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/slab.c')
-rw-r--r--mm/slab.c68
1 files changed, 45 insertions, 23 deletions
diff --git a/mm/slab.c b/mm/slab.c
index 17e2848979c5..3f1cc1ca4d88 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2180,6 +2180,11 @@ static void check_irq_on(void)
2180 BUG_ON(irqs_disabled()); 2180 BUG_ON(irqs_disabled());
2181} 2181}
2182 2182
2183static void check_mutex_acquired(void)
2184{
2185 BUG_ON(!mutex_is_locked(&slab_mutex));
2186}
2187
2183static void check_spinlock_acquired(struct kmem_cache *cachep) 2188static void check_spinlock_acquired(struct kmem_cache *cachep)
2184{ 2189{
2185#ifdef CONFIG_SMP 2190#ifdef CONFIG_SMP
@@ -2199,13 +2204,27 @@ static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node)
2199#else 2204#else
2200#define check_irq_off() do { } while(0) 2205#define check_irq_off() do { } while(0)
2201#define check_irq_on() do { } while(0) 2206#define check_irq_on() do { } while(0)
2207#define check_mutex_acquired() do { } while(0)
2202#define check_spinlock_acquired(x) do { } while(0) 2208#define check_spinlock_acquired(x) do { } while(0)
2203#define check_spinlock_acquired_node(x, y) do { } while(0) 2209#define check_spinlock_acquired_node(x, y) do { } while(0)
2204#endif 2210#endif
2205 2211
2206static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n, 2212static void drain_array_locked(struct kmem_cache *cachep, struct array_cache *ac,
2207 struct array_cache *ac, 2213 int node, bool free_all, struct list_head *list)
2208 int force, int node); 2214{
2215 int tofree;
2216
2217 if (!ac || !ac->avail)
2218 return;
2219
2220 tofree = free_all ? ac->avail : (ac->limit + 4) / 5;
2221 if (tofree > ac->avail)
2222 tofree = (ac->avail + 1) / 2;
2223
2224 free_block(cachep, ac->entry, tofree, node, list);
2225 ac->avail -= tofree;
2226 memmove(ac->entry, &(ac->entry[tofree]), sizeof(void *) * ac->avail);
2227}
2209 2228
2210static void do_drain(void *arg) 2229static void do_drain(void *arg)
2211{ 2230{
@@ -2229,6 +2248,7 @@ static void drain_cpu_caches(struct kmem_cache *cachep)
2229{ 2248{
2230 struct kmem_cache_node *n; 2249 struct kmem_cache_node *n;
2231 int node; 2250 int node;
2251 LIST_HEAD(list);
2232 2252
2233 on_each_cpu(do_drain, cachep, 1); 2253 on_each_cpu(do_drain, cachep, 1);
2234 check_irq_on(); 2254 check_irq_on();
@@ -2236,8 +2256,13 @@ static void drain_cpu_caches(struct kmem_cache *cachep)
2236 if (n->alien) 2256 if (n->alien)
2237 drain_alien_cache(cachep, n->alien); 2257 drain_alien_cache(cachep, n->alien);
2238 2258
2239 for_each_kmem_cache_node(cachep, node, n) 2259 for_each_kmem_cache_node(cachep, node, n) {
2240 drain_array(cachep, n, n->shared, 1, node); 2260 spin_lock_irq(&n->list_lock);
2261 drain_array_locked(cachep, n->shared, node, true, &list);
2262 spin_unlock_irq(&n->list_lock);
2263
2264 slabs_destroy(cachep, &list);
2265 }
2241} 2266}
2242 2267
2243/* 2268/*
@@ -3869,29 +3894,26 @@ skip_setup:
3869 * if drain_array() is used on the shared array. 3894 * if drain_array() is used on the shared array.
3870 */ 3895 */
3871static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n, 3896static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n,
3872 struct array_cache *ac, int force, int node) 3897 struct array_cache *ac, int node)
3873{ 3898{
3874 LIST_HEAD(list); 3899 LIST_HEAD(list);
3875 int tofree; 3900
3901 /* ac from n->shared can be freed if we don't hold the slab_mutex. */
3902 check_mutex_acquired();
3876 3903
3877 if (!ac || !ac->avail) 3904 if (!ac || !ac->avail)
3878 return; 3905 return;
3879 if (ac->touched && !force) { 3906
3907 if (ac->touched) {
3880 ac->touched = 0; 3908 ac->touched = 0;
3881 } else { 3909 return;
3882 spin_lock_irq(&n->list_lock);
3883 if (ac->avail) {
3884 tofree = force ? ac->avail : (ac->limit + 4) / 5;
3885 if (tofree > ac->avail)
3886 tofree = (ac->avail + 1) / 2;
3887 free_block(cachep, ac->entry, tofree, node, &list);
3888 ac->avail -= tofree;
3889 memmove(ac->entry, &(ac->entry[tofree]),
3890 sizeof(void *) * ac->avail);
3891 }
3892 spin_unlock_irq(&n->list_lock);
3893 slabs_destroy(cachep, &list);
3894 } 3910 }
3911
3912 spin_lock_irq(&n->list_lock);
3913 drain_array_locked(cachep, ac, node, false, &list);
3914 spin_unlock_irq(&n->list_lock);
3915
3916 slabs_destroy(cachep, &list);
3895} 3917}
3896 3918
3897/** 3919/**
@@ -3929,7 +3951,7 @@ static void cache_reap(struct work_struct *w)
3929 3951
3930 reap_alien(searchp, n); 3952 reap_alien(searchp, n);
3931 3953
3932 drain_array(searchp, n, cpu_cache_get(searchp), 0, node); 3954 drain_array(searchp, n, cpu_cache_get(searchp), node);
3933 3955
3934 /* 3956 /*
3935 * These are racy checks but it does not matter 3957 * These are racy checks but it does not matter
@@ -3940,7 +3962,7 @@ static void cache_reap(struct work_struct *w)
3940 3962
3941 n->next_reap = jiffies + REAPTIMEOUT_NODE; 3963 n->next_reap = jiffies + REAPTIMEOUT_NODE;
3942 3964
3943 drain_array(searchp, n, n->shared, 0, node); 3965 drain_array(searchp, n, n->shared, node);
3944 3966
3945 if (n->free_touched) 3967 if (n->free_touched)
3946 n->free_touched = 0; 3968 n->free_touched = 0;