slub: refactoring unfreeze_partials()

Current implementation of unfreeze_partials() is so complicated, but benefit from it is insignificant. In addition many code in do {} while loop have a bad influence to a fail rate of cmpxchg_double_slab. Under current implementation which test status of cpu partial slab and acquire list_lock in do {} while loop, we don't need to acquire a list_lock and gain a little benefit when front of the cpu partial slab is to be discarded, but this is a rare case. In case that add_partial is performed and cmpxchg_double_slab is failed, remove_partial should be called case by case. I think that these are disadvantages of current implementation, so I do refactoring unfreeze_partials(). Minimizing code in do {} while loop introduce a reduced fail rate of cmpxchg_double_slab. Below is output of 'slabinfo -r kmalloc-256' when './perf stat -r 33 hackbench 50 process 4000 > /dev/null' is done. ** before ** Cmpxchg_double Looping ------------------------ Locked Cmpxchg Double redos 182685 Unlocked Cmpxchg Double redos 0 ** after ** Cmpxchg_double Looping ------------------------ Locked Cmpxchg Double redos 177995 Unlocked Cmpxchg Double redos 1 We can see cmpxchg_double_slab fail rate is improved slightly. Bolow is output of './perf stat -r 30 hackbench 50 process 4000 > /dev/null'. ** before ** Performance counter stats for './hackbench 50 process 4000' (30 runs): 108517.190463 task-clock # 7.926 CPUs utilized ( +- 0.24% ) 2,919,550 context-switches # 0.027 M/sec ( +- 3.07% ) 100,774 CPU-migrations # 0.929 K/sec ( +- 4.72% ) 124,201 page-faults # 0.001 M/sec ( +- 0.15% ) 401,500,234,387 cycles # 3.700 GHz ( +- 0.24% ) <not supported> stalled-cycles-frontend <not supported> stalled-cycles-backend 250,576,913,354 instructions # 0.62 insns per cycle ( +- 0.13% ) 45,934,956,860 branches # 423.297 M/sec ( +- 0.14% ) 188,219,787 branch-misses # 0.41% of all branches ( +- 0.56% ) 13.691837307 seconds time elapsed ( +- 0.24% ) ** after ** Performance counter stats for './hackbench 50 process 4000' (30 runs): 107784.479767 task-clock # 7.928 CPUs utilized ( +- 0.22% ) 2,834,781 context-switches # 0.026 M/sec ( +- 2.33% ) 93,083 CPU-migrations # 0.864 K/sec ( +- 3.45% ) 123,967 page-faults # 0.001 M/sec ( +- 0.15% ) 398,781,421,836 cycles # 3.700 GHz ( +- 0.22% ) <not supported> stalled-cycles-frontend <not supported> stalled-cycles-backend 250,189,160,419 instructions # 0.63 insns per cycle ( +- 0.09% ) 45,855,370,128 branches # 425.436 M/sec ( +- 0.10% ) 169,881,248 branch-misses # 0.37% of all branches ( +- 0.43% ) 13.596272341 seconds time elapsed ( +- 0.22% ) No regression is found, but rather we can see slightly better result. Acked-by: Christoph Lameter <cl@linux.com> Signed-off-by: Joonsoo Kim <js1304@gmail.com> Signed-off-by: Pekka Enberg <penberg@kernel.org>
author: Joonsoo Kim <js1304@gmail.com> 2012-06-08 13:23:16 -0400
committer: Pekka Enberg <penberg@kernel.org> 2012-06-20 03:17:45 -0400
commit: 43d77867a4f333de4e4189114c480dd365133c09 (patch)
tree: 63eefd04b561b7563eb6e92a4ca923b378a30f7a /mm/slub.c
parent: d24ac77f71ded6a013bacb09f359eac0b0f29a80 (diff)
1 files changed, 14 insertions, 34 deletions
diff --git a/mm/slub.c b/mm/slub.c
index 4f406cd899b7..f96d8bcec54f 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1879,18 +1879,24 @@ redo:
 */
 static void unfreeze_partials(struct kmem_cache *s)
 {
-        struct kmem_cache_node *n = NULL;
+        struct kmem_cache_node *n = NULL, *n2 = NULL;
        struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab);
        struct page *page, *discard_page = NULL;
        while ((page = c->partial)) {
-                enum slab_modes { M_PARTIAL, M_FREE };
-                enum slab_modes l, m;
                struct page new;
                struct page old;
                c->partial = page->next;
-                l = M_FREE;
+                n2 = get_node(s, page_to_nid(page));
+                if (n != n2) {
+                        if (n)
+                                spin_unlock(&n->list_lock);
+                        n = n2;
+                        spin_lock(&n->list_lock);
+                }
                do {
@@ -1903,43 +1909,17 @@ static void unfreeze_partials(struct kmem_cache *s)
                        new.frozen = 0;
-                        if (!new.inuse && (!n || n->nr_partial > s->min_partial))
-                                m = M_FREE;
-                        else {
-                                struct kmem_cache_node *n2 = get_node(s,
-                                                        page_to_nid(page));
-                                m = M_PARTIAL;
-                                if (n != n2) {
-                                        if (n)
-                                                spin_unlock(&n->list_lock);
-                                        n = n2;
-                                        spin_lock(&n->list_lock);
-                                }
-                        }
-                        if (l != m) {
-                                if (l == M_PARTIAL) {
-                                        remove_partial(n, page);
-                                        stat(s, FREE_REMOVE_PARTIAL);
-                                } else {
-                                        add_partial(n, page,
-                                                DEACTIVATE_TO_TAIL);
-                                        stat(s, FREE_ADD_PARTIAL);
-                                }
-                                l = m;
-                        }
                } while (!__cmpxchg_double_slab(s, page,
                                old.freelist, old.counters,
                                new.freelist, new.counters,
                                "unfreezing slab"));
-                if (m == M_FREE) {
+                if (unlikely(!new.inuse && n->nr_partial > s->min_partial)) {
                        page->next = discard_page;
                        discard_page = page;
+                } else {
+                        add_partial(n, page, DEACTIVATE_TO_TAIL);
+                        stat(s, FREE_ADD_PARTIAL);
                }
        }
author	Joonsoo Kim <js1304@gmail.com>	2012-06-08 13:23:16 -0400
committer	Pekka Enberg <penberg@kernel.org>	2012-06-20 03:17:45 -0400
commit	43d77867a4f333de4e4189114c480dd365133c09 (patch)
tree	63eefd04b561b7563eb6e92a4ca923b378a30f7a /mm/slub.c
parent	d24ac77f71ded6a013bacb09f359eac0b0f29a80 (diff)

diff --git a/mm/slub.c b/mm/slub.c index 4f406cd899b7..f96d8bcec54f 100644 --- a/mm/slub.c +++ b/mm/slub.c
@@ -1879,18 +1879,24 @@ redo:
1879	*/	1879	*/
1880	static void unfreeze_partials(struct kmem_cache *s)	1880	static void unfreeze_partials(struct kmem_cache *s)
1881	{	1881	{
1882	struct kmem_cache_node *n = NULL;	1882	struct kmem_cache_node n = NULL, n2 = NULL;
1883	struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab);	1883	struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab);
1884	struct page page, discard_page = NULL;	1884	struct page page, discard_page = NULL;
1885		1885
1886	while ((page = c->partial)) {	1886	while ((page = c->partial)) {
1887	enum slab_modes { M_PARTIAL, M_FREE };
1888	enum slab_modes l, m;
1889	struct page new;	1887	struct page new;
1890	struct page old;	1888	struct page old;
1891		1889
1892	c->partial = page->next;	1890	c->partial = page->next;
1893	l = M_FREE;	1891
		1892	n2 = get_node(s, page_to_nid(page));
		1893	if (n != n2) {
		1894	if (n)
		1895	spin_unlock(&n->list_lock);
		1896
		1897	n = n2;
		1898	spin_lock(&n->list_lock);
		1899	}
1894		1900
1895	do {	1901	do {
1896		1902
@@ -1903,43 +1909,17 @@ static void unfreeze_partials(struct kmem_cache *s)
1903		1909
1904	new.frozen = 0;	1910	new.frozen = 0;
1905		1911
1906	if (!new.inuse && (!n \|\| n->nr_partial > s->min_partial))
1907	m = M_FREE;
1908	else {
1909	struct kmem_cache_node *n2 = get_node(s,
1910	page_to_nid(page));
1911
1912	m = M_PARTIAL;
1913	if (n != n2) {
1914	if (n)
1915	spin_unlock(&n->list_lock);
1916
1917	n = n2;
1918	spin_lock(&n->list_lock);
1919	}
1920	}
1921
1922	if (l != m) {
1923	if (l == M_PARTIAL) {
1924	remove_partial(n, page);
1925	stat(s, FREE_REMOVE_PARTIAL);
1926	} else {
1927	add_partial(n, page,
1928	DEACTIVATE_TO_TAIL);
1929	stat(s, FREE_ADD_PARTIAL);
1930	}
1931
1932	l = m;
1933	}
1934
1935	} while (!__cmpxchg_double_slab(s, page,	1912	} while (!__cmpxchg_double_slab(s, page,
1936	old.freelist, old.counters,	1913	old.freelist, old.counters,
1937	new.freelist, new.counters,	1914	new.freelist, new.counters,
1938	"unfreezing slab"));	1915	"unfreezing slab"));
1939		1916
1940	if (m == M_FREE) {	1917	if (unlikely(!new.inuse && n->nr_partial > s->min_partial)) {
1941	page->next = discard_page;	1918	page->next = discard_page;
1942	discard_page = page;	1919	discard_page = page;
		1920	} else {
		1921	add_partial(n, page, DEACTIVATE_TO_TAIL);
		1922	stat(s, FREE_ADD_PARTIAL);
1943	}	1923	}
1944	}	1924	}
1945		1925