diff options
author | Joonsoo Kim <js1304@gmail.com> | 2012-06-08 13:23:16 -0400 |
---|---|---|
committer | Pekka Enberg <penberg@kernel.org> | 2012-06-20 03:17:45 -0400 |
commit | 43d77867a4f333de4e4189114c480dd365133c09 (patch) | |
tree | 63eefd04b561b7563eb6e92a4ca923b378a30f7a /mm/slub.c | |
parent | d24ac77f71ded6a013bacb09f359eac0b0f29a80 (diff) |
slub: refactoring unfreeze_partials()
Current implementation of unfreeze_partials() is so complicated,
but benefit from it is insignificant. In addition many code in
do {} while loop have a bad influence to a fail rate of cmpxchg_double_slab.
Under current implementation which test status of cpu partial slab
and acquire list_lock in do {} while loop,
we don't need to acquire a list_lock and gain a little benefit
when front of the cpu partial slab is to be discarded, but this is a rare case.
In case that add_partial is performed and cmpxchg_double_slab is failed,
remove_partial should be called case by case.
I think that these are disadvantages of current implementation,
so I do refactoring unfreeze_partials().
Minimizing code in do {} while loop introduce a reduced fail rate
of cmpxchg_double_slab. Below is output of 'slabinfo -r kmalloc-256'
when './perf stat -r 33 hackbench 50 process 4000 > /dev/null' is done.
** before **
Cmpxchg_double Looping
------------------------
Locked Cmpxchg Double redos 182685
Unlocked Cmpxchg Double redos 0
** after **
Cmpxchg_double Looping
------------------------
Locked Cmpxchg Double redos 177995
Unlocked Cmpxchg Double redos 1
We can see cmpxchg_double_slab fail rate is improved slightly.
Bolow is output of './perf stat -r 30 hackbench 50 process 4000 > /dev/null'.
** before **
Performance counter stats for './hackbench 50 process 4000' (30 runs):
108517.190463 task-clock # 7.926 CPUs utilized ( +- 0.24% )
2,919,550 context-switches # 0.027 M/sec ( +- 3.07% )
100,774 CPU-migrations # 0.929 K/sec ( +- 4.72% )
124,201 page-faults # 0.001 M/sec ( +- 0.15% )
401,500,234,387 cycles # 3.700 GHz ( +- 0.24% )
<not supported> stalled-cycles-frontend
<not supported> stalled-cycles-backend
250,576,913,354 instructions # 0.62 insns per cycle ( +- 0.13% )
45,934,956,860 branches # 423.297 M/sec ( +- 0.14% )
188,219,787 branch-misses # 0.41% of all branches ( +- 0.56% )
13.691837307 seconds time elapsed ( +- 0.24% )
** after **
Performance counter stats for './hackbench 50 process 4000' (30 runs):
107784.479767 task-clock # 7.928 CPUs utilized ( +- 0.22% )
2,834,781 context-switches # 0.026 M/sec ( +- 2.33% )
93,083 CPU-migrations # 0.864 K/sec ( +- 3.45% )
123,967 page-faults # 0.001 M/sec ( +- 0.15% )
398,781,421,836 cycles # 3.700 GHz ( +- 0.22% )
<not supported> stalled-cycles-frontend
<not supported> stalled-cycles-backend
250,189,160,419 instructions # 0.63 insns per cycle ( +- 0.09% )
45,855,370,128 branches # 425.436 M/sec ( +- 0.10% )
169,881,248 branch-misses # 0.37% of all branches ( +- 0.43% )
13.596272341 seconds time elapsed ( +- 0.22% )
No regression is found, but rather we can see slightly better result.
Acked-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Joonsoo Kim <js1304@gmail.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
Diffstat (limited to 'mm/slub.c')
-rw-r--r-- | mm/slub.c | 48 |
1 files changed, 14 insertions, 34 deletions
@@ -1879,18 +1879,24 @@ redo: | |||
1879 | */ | 1879 | */ |
1880 | static void unfreeze_partials(struct kmem_cache *s) | 1880 | static void unfreeze_partials(struct kmem_cache *s) |
1881 | { | 1881 | { |
1882 | struct kmem_cache_node *n = NULL; | 1882 | struct kmem_cache_node *n = NULL, *n2 = NULL; |
1883 | struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab); | 1883 | struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab); |
1884 | struct page *page, *discard_page = NULL; | 1884 | struct page *page, *discard_page = NULL; |
1885 | 1885 | ||
1886 | while ((page = c->partial)) { | 1886 | while ((page = c->partial)) { |
1887 | enum slab_modes { M_PARTIAL, M_FREE }; | ||
1888 | enum slab_modes l, m; | ||
1889 | struct page new; | 1887 | struct page new; |
1890 | struct page old; | 1888 | struct page old; |
1891 | 1889 | ||
1892 | c->partial = page->next; | 1890 | c->partial = page->next; |
1893 | l = M_FREE; | 1891 | |
1892 | n2 = get_node(s, page_to_nid(page)); | ||
1893 | if (n != n2) { | ||
1894 | if (n) | ||
1895 | spin_unlock(&n->list_lock); | ||
1896 | |||
1897 | n = n2; | ||
1898 | spin_lock(&n->list_lock); | ||
1899 | } | ||
1894 | 1900 | ||
1895 | do { | 1901 | do { |
1896 | 1902 | ||
@@ -1903,43 +1909,17 @@ static void unfreeze_partials(struct kmem_cache *s) | |||
1903 | 1909 | ||
1904 | new.frozen = 0; | 1910 | new.frozen = 0; |
1905 | 1911 | ||
1906 | if (!new.inuse && (!n || n->nr_partial > s->min_partial)) | ||
1907 | m = M_FREE; | ||
1908 | else { | ||
1909 | struct kmem_cache_node *n2 = get_node(s, | ||
1910 | page_to_nid(page)); | ||
1911 | |||
1912 | m = M_PARTIAL; | ||
1913 | if (n != n2) { | ||
1914 | if (n) | ||
1915 | spin_unlock(&n->list_lock); | ||
1916 | |||
1917 | n = n2; | ||
1918 | spin_lock(&n->list_lock); | ||
1919 | } | ||
1920 | } | ||
1921 | |||
1922 | if (l != m) { | ||
1923 | if (l == M_PARTIAL) { | ||
1924 | remove_partial(n, page); | ||
1925 | stat(s, FREE_REMOVE_PARTIAL); | ||
1926 | } else { | ||
1927 | add_partial(n, page, | ||
1928 | DEACTIVATE_TO_TAIL); | ||
1929 | stat(s, FREE_ADD_PARTIAL); | ||
1930 | } | ||
1931 | |||
1932 | l = m; | ||
1933 | } | ||
1934 | |||
1935 | } while (!__cmpxchg_double_slab(s, page, | 1912 | } while (!__cmpxchg_double_slab(s, page, |
1936 | old.freelist, old.counters, | 1913 | old.freelist, old.counters, |
1937 | new.freelist, new.counters, | 1914 | new.freelist, new.counters, |
1938 | "unfreezing slab")); | 1915 | "unfreezing slab")); |
1939 | 1916 | ||
1940 | if (m == M_FREE) { | 1917 | if (unlikely(!new.inuse && n->nr_partial > s->min_partial)) { |
1941 | page->next = discard_page; | 1918 | page->next = discard_page; |
1942 | discard_page = page; | 1919 | discard_page = page; |
1920 | } else { | ||
1921 | add_partial(n, page, DEACTIVATE_TO_TAIL); | ||
1922 | stat(s, FREE_ADD_PARTIAL); | ||
1943 | } | 1923 | } |
1944 | } | 1924 | } |
1945 | 1925 | ||