mm/slab: fix the theoretical race by holding proper lock

While processing concurrent allocation, SLAB could be contended a lot because it did a lots of work with holding a lock. This patchset try to reduce the number of critical section to reduce lock contention. Major changes are lockless decision to allocate more slab and lockless cpu cache refill from the newly allocated slab. Below is the result of concurrent allocation/free in slab allocation benchmark made by Christoph a long time ago. I make the output simpler. The number shows cycle count during alloc/free respectively so less is better. * Before Kmalloc N*alloc N*free(32): Average=365/806 Kmalloc N*alloc N*free(64): Average=452/690 Kmalloc N*alloc N*free(128): Average=736/886 Kmalloc N*alloc N*free(256): Average=1167/985 Kmalloc N*alloc N*free(512): Average=2088/1125 Kmalloc N*alloc N*free(1024): Average=4115/1184 Kmalloc N*alloc N*free(2048): Average=8451/1748 Kmalloc N*alloc N*free(4096): Average=16024/2048 * After Kmalloc N*alloc N*free(32): Average=344/792 Kmalloc N*alloc N*free(64): Average=347/882 Kmalloc N*alloc N*free(128): Average=390/959 Kmalloc N*alloc N*free(256): Average=393/1067 Kmalloc N*alloc N*free(512): Average=683/1229 Kmalloc N*alloc N*free(1024): Average=1295/1325 Kmalloc N*alloc N*free(2048): Average=2513/1664 Kmalloc N*alloc N*free(4096): Average=4742/2172 It shows that performance improves greatly (roughly more than 50%) for the object class whose size is more than 128 bytes. This patch (of 11): If we don't hold neither the slab_mutex nor the node lock, node's shared array cache could be freed and re-populated. If __kmem_cache_shrink() is called at the same time, it will call drain_array() with n->shared without holding node lock so problem can happen. This patch fix the situation by holding the node lock before trying to drain the shared array. In addition, add a debug check to confirm that n->shared access race doesn't exist. Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com> Cc: Jesper Dangaard Brouer <brouer@redhat.com> Cc: Christoph Lameter <cl@linux.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Joonsoo Kim <iamjoonsoo.kim@lge.com> 2016-05-19 20:10:02 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2016-05-19 22:12:14 -0400
commit: 18726ca8b34bbfb3ab5a1c0a52a5d8dd392466ed (patch)
tree: 90bdc46a1eda7b6b67c54bfb78ed851478aac027 /mm/slab.c
parent: 19d795b677bda354644cfb87a196b087fdc2a965 (diff)
1 files changed, 45 insertions, 23 deletions
diff --git a/mm/slab.c b/mm/slab.c
index 17e2848979c5..3f1cc1ca4d88 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2180,6 +2180,11 @@ static void check_irq_on(void)
        BUG_ON(irqs_disabled());
 }
+static void check_mutex_acquired(void)
+{
+        BUG_ON(!mutex_is_locked(&slab_mutex));
+}
 static void check_spinlock_acquired(struct kmem_cache *cachep)
 {
 #ifdef CONFIG_SMP
@@ -2199,13 +2204,27 @@ static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node)
 #else
 #define check_irq_off() do { } while(0)
 #define check_irq_on()  do { } while(0)
+#define check_mutex_acquired()  do { } while(0)
 #define check_spinlock_acquired(x) do { } while(0)
 #define check_spinlock_acquired_node(x, y) do { } while(0)
 #endif
-static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n,
+static void drain_array_locked(struct kmem_cache *cachep, struct array_cache *ac,
-                        struct array_cache *ac,
+                                int node, bool free_all, struct list_head *list)
-                        int force, int node);
+{
+        int tofree;
+        if (!ac || !ac->avail)
+                return;
+        tofree = free_all ? ac->avail : (ac->limit + 4) / 5;
+        if (tofree > ac->avail)
+                tofree = (ac->avail + 1) / 2;
+        free_block(cachep, ac->entry, tofree, node, list);
+        ac->avail -= tofree;
+        memmove(ac->entry, &(ac->entry[tofree]), sizeof(void *) * ac->avail);
+}
 static void do_drain(void *arg)
 {
@@ -2229,6 +2248,7 @@ static void drain_cpu_caches(struct kmem_cache *cachep)
 {
        struct kmem_cache_node *n;
        int node;
+        LIST_HEAD(list);
        on_each_cpu(do_drain, cachep, 1);
        check_irq_on();
@@ -2236,8 +2256,13 @@ static void drain_cpu_caches(struct kmem_cache *cachep)
                if (n->alien)
                        drain_alien_cache(cachep, n->alien);
-        for_each_kmem_cache_node(cachep, node, n)
+        for_each_kmem_cache_node(cachep, node, n) {
-                drain_array(cachep, n, n->shared, 1, node);
+                spin_lock_irq(&n->list_lock);
+                drain_array_locked(cachep, n->shared, node, true, &list);
+                spin_unlock_irq(&n->list_lock);
+                slabs_destroy(cachep, &list);
+        }
 }
 /*
@@ -3869,29 +3894,26 @@ skip_setup:
 * if drain_array() is used on the shared array.
 */
 static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n,
-                         struct array_cache *ac, int force, int node)
+                         struct array_cache *ac, int node)
 {
        LIST_HEAD(list);
-        int tofree;
+        /* ac from n->shared can be freed if we don't hold the slab_mutex. */
+        check_mutex_acquired();
        if (!ac || !ac->avail)
                return;
-        if (ac->touched && !force) {
+        if (ac->touched) {
                ac->touched = 0;
-        } else {
+                return;
-                spin_lock_irq(&n->list_lock);
-                if (ac->avail) {
-                        tofree = force ? ac->avail : (ac->limit + 4) / 5;
-                        if (tofree > ac->avail)
-                                tofree = (ac->avail + 1) / 2;
-                        free_block(cachep, ac->entry, tofree, node, &list);
-                        ac->avail -= tofree;
-                        memmove(ac->entry, &(ac->entry[tofree]),
-                                sizeof(void *) * ac->avail);
-                }
-                spin_unlock_irq(&n->list_lock);
-                slabs_destroy(cachep, &list);
        }
+        spin_lock_irq(&n->list_lock);
+        drain_array_locked(cachep, ac, node, false, &list);
+        spin_unlock_irq(&n->list_lock);
+        slabs_destroy(cachep, &list);
 }
 /**
@@ -3929,7 +3951,7 @@ static void cache_reap(struct work_struct *w)
                reap_alien(searchp, n);
-                drain_array(searchp, n, cpu_cache_get(searchp), 0, node);
+                drain_array(searchp, n, cpu_cache_get(searchp), node);
                /*
                 * These are racy checks but it does not matter
@@ -3940,7 +3962,7 @@ static void cache_reap(struct work_struct *w)
                n->next_reap = jiffies + REAPTIMEOUT_NODE;
-                drain_array(searchp, n, n->shared, 0, node);
+                drain_array(searchp, n, n->shared, node);
                if (n->free_touched)
                        n->free_touched = 0;
author	Joonsoo Kim <iamjoonsoo.kim@lge.com>	2016-05-19 20:10:02 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2016-05-19 22:12:14 -0400
commit	18726ca8b34bbfb3ab5a1c0a52a5d8dd392466ed (patch)
tree	90bdc46a1eda7b6b67c54bfb78ed851478aac027 /mm/slab.c
parent	19d795b677bda354644cfb87a196b087fdc2a965 (diff)

diff --git a/mm/slab.c b/mm/slab.c index 17e2848979c5..3f1cc1ca4d88 100644 --- a/mm/slab.c +++ b/mm/slab.c
@@ -2180,6 +2180,11 @@ static void check_irq_on(void)
2180	BUG_ON(irqs_disabled());	2180	BUG_ON(irqs_disabled());
2181	}	2181	}
2182		2182
		2183	static void check_mutex_acquired(void)
		2184	{
		2185	BUG_ON(!mutex_is_locked(&slab_mutex));
		2186	}
		2187
2183	static void check_spinlock_acquired(struct kmem_cache *cachep)	2188	static void check_spinlock_acquired(struct kmem_cache *cachep)
2184	{	2189	{
2185	#ifdef CONFIG_SMP	2190	#ifdef CONFIG_SMP
@@ -2199,13 +2204,27 @@ static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node)
2199	#else	2204	#else
2200	#define check_irq_off() do { } while(0)	2205	#define check_irq_off() do { } while(0)
2201	#define check_irq_on() do { } while(0)	2206	#define check_irq_on() do { } while(0)
		2207	#define check_mutex_acquired() do { } while(0)
2202	#define check_spinlock_acquired(x) do { } while(0)	2208	#define check_spinlock_acquired(x) do { } while(0)
2203	#define check_spinlock_acquired_node(x, y) do { } while(0)	2209	#define check_spinlock_acquired_node(x, y) do { } while(0)
2204	#endif	2210	#endif
2205		2211
2206	static void drain_array(struct kmem_cache cachep, struct kmem_cache_node n,	2212	static void drain_array_locked(struct kmem_cache cachep, struct array_cache ac,
2207	struct array_cache *ac,	2213	int node, bool free_all, struct list_head *list)
2208	int force, int node);	2214	{
		2215	int tofree;
		2216
		2217	if (!ac \|\| !ac->avail)
		2218	return;
		2219
		2220	tofree = free_all ? ac->avail : (ac->limit + 4) / 5;
		2221	if (tofree > ac->avail)
		2222	tofree = (ac->avail + 1) / 2;
		2223
		2224	free_block(cachep, ac->entry, tofree, node, list);
		2225	ac->avail -= tofree;
		2226	memmove(ac->entry, &(ac->entry[tofree]), sizeof(void ) ac->avail);
		2227	}
2209		2228
2210	static void do_drain(void *arg)	2229	static void do_drain(void *arg)
2211	{	2230	{
@@ -2229,6 +2248,7 @@ static void drain_cpu_caches(struct kmem_cache *cachep)
2229	{	2248	{
2230	struct kmem_cache_node *n;	2249	struct kmem_cache_node *n;
2231	int node;	2250	int node;
		2251	LIST_HEAD(list);
2232		2252
2233	on_each_cpu(do_drain, cachep, 1);	2253	on_each_cpu(do_drain, cachep, 1);
2234	check_irq_on();	2254	check_irq_on();
@@ -2236,8 +2256,13 @@ static void drain_cpu_caches(struct kmem_cache *cachep)
2236	if (n->alien)	2256	if (n->alien)
2237	drain_alien_cache(cachep, n->alien);	2257	drain_alien_cache(cachep, n->alien);
2238		2258
2239	for_each_kmem_cache_node(cachep, node, n)	2259	for_each_kmem_cache_node(cachep, node, n) {
2240	drain_array(cachep, n, n->shared, 1, node);	2260	spin_lock_irq(&n->list_lock);
		2261	drain_array_locked(cachep, n->shared, node, true, &list);
		2262	spin_unlock_irq(&n->list_lock);
		2263
		2264	slabs_destroy(cachep, &list);
		2265	}
2241	}	2266	}
2242		2267
2243	/*	2268	/*
@@ -3869,29 +3894,26 @@ skip_setup:
3869	* if drain_array() is used on the shared array.	3894	* if drain_array() is used on the shared array.
3870	*/	3895	*/
3871	static void drain_array(struct kmem_cache cachep, struct kmem_cache_node n,	3896	static void drain_array(struct kmem_cache cachep, struct kmem_cache_node n,
3872	struct array_cache *ac, int force, int node)	3897	struct array_cache *ac, int node)
3873	{	3898	{
3874	LIST_HEAD(list);	3899	LIST_HEAD(list);
3875	int tofree;	3900
		3901	/* ac from n->shared can be freed if we don't hold the slab_mutex. */
		3902	check_mutex_acquired();
3876		3903
3877	if (!ac \|\| !ac->avail)	3904	if (!ac \|\| !ac->avail)
3878	return;	3905	return;
3879	if (ac->touched && !force) {	3906
		3907	if (ac->touched) {
3880	ac->touched = 0;	3908	ac->touched = 0;
3881	} else {	3909	return;
3882	spin_lock_irq(&n->list_lock);
3883	if (ac->avail) {
3884	tofree = force ? ac->avail : (ac->limit + 4) / 5;
3885	if (tofree > ac->avail)
3886	tofree = (ac->avail + 1) / 2;
3887	free_block(cachep, ac->entry, tofree, node, &list);
3888	ac->avail -= tofree;
3889	memmove(ac->entry, &(ac->entry[tofree]),
3890	sizeof(void ) ac->avail);
3891	}
3892	spin_unlock_irq(&n->list_lock);
3893	slabs_destroy(cachep, &list);
3894	}	3910	}
		3911
		3912	spin_lock_irq(&n->list_lock);
		3913	drain_array_locked(cachep, ac, node, false, &list);
		3914	spin_unlock_irq(&n->list_lock);
		3915
		3916	slabs_destroy(cachep, &list);
3895	}	3917	}
3896		3918
3897	/**	3919	/**
@@ -3929,7 +3951,7 @@ static void cache_reap(struct work_struct *w)
3929		3951
3930	reap_alien(searchp, n);	3952	reap_alien(searchp, n);
3931		3953
3932	drain_array(searchp, n, cpu_cache_get(searchp), 0, node);	3954	drain_array(searchp, n, cpu_cache_get(searchp), node);
3933		3955
3934	/*	3956	/*
3935	* These are racy checks but it does not matter	3957	* These are racy checks but it does not matter
@@ -3940,7 +3962,7 @@ static void cache_reap(struct work_struct *w)
3940		3962
3941	n->next_reap = jiffies + REAPTIMEOUT_NODE;	3963	n->next_reap = jiffies + REAPTIMEOUT_NODE;
3942		3964
3943	drain_array(searchp, n, n->shared, 0, node);	3965	drain_array(searchp, n, n->shared, node);
3944		3966
3945	if (n->free_touched)	3967	if (n->free_touched)
3946	n->free_touched = 0;	3968	n->free_touched = 0;