aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorChristoph Lameter <cl@linux.com>2011-06-01 13:25:53 -0400
committerPekka Enberg <penberg@kernel.org>2011-07-02 06:26:55 -0400
commit881db7fb03a77af0bcd460fd1de1f4062d5c18fe (patch)
tree281c07cf45aabd44962dbceed4efb1a86492115d /mm
parent2cfb7455d223ab24b23df44be430faf92e12390f (diff)
slub: Invert locking and avoid slab lock
Locking slabs is no longer necesary if the arch supports cmpxchg operations and if no debuggin features are used on a slab. If the arch does not support cmpxchg then we fallback to use the slab lock to do a cmpxchg like operation. The patch also changes the lock order. Slab locks are subsumed to the node lock now. With that approach slab_trylocking is no longer necessary. Signed-off-by: Christoph Lameter <cl@linux.com> Signed-off-by: Pekka Enberg <penberg@kernel.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/slub.c129
1 files changed, 52 insertions, 77 deletions
diff --git a/mm/slub.c b/mm/slub.c
index 5f0346c97c5f..ee70c091e577 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2,10 +2,11 @@
2 * SLUB: A slab allocator that limits cache line use instead of queuing 2 * SLUB: A slab allocator that limits cache line use instead of queuing
3 * objects in per cpu and per node lists. 3 * objects in per cpu and per node lists.
4 * 4 *
5 * The allocator synchronizes using per slab locks and only 5 * The allocator synchronizes using per slab locks or atomic operatios
6 * uses a centralized lock to manage a pool of partial slabs. 6 * and only uses a centralized lock to manage a pool of partial slabs.
7 * 7 *
8 * (C) 2007 SGI, Christoph Lameter 8 * (C) 2007 SGI, Christoph Lameter
9 * (C) 2011 Linux Foundation, Christoph Lameter
9 */ 10 */
10 11
11#include <linux/mm.h> 12#include <linux/mm.h>
@@ -32,15 +33,27 @@
32 33
33/* 34/*
34 * Lock order: 35 * Lock order:
35 * 1. slab_lock(page) 36 * 1. slub_lock (Global Semaphore)
36 * 2. slab->list_lock 37 * 2. node->list_lock
38 * 3. slab_lock(page) (Only on some arches and for debugging)
37 * 39 *
38 * The slab_lock protects operations on the object of a particular 40 * slub_lock
39 * slab and its metadata in the page struct. If the slab lock 41 *
40 * has been taken then no allocations nor frees can be performed 42 * The role of the slub_lock is to protect the list of all the slabs
41 * on the objects in the slab nor can the slab be added or removed 43 * and to synchronize major metadata changes to slab cache structures.
42 * from the partial or full lists since this would mean modifying 44 *
43 * the page_struct of the slab. 45 * The slab_lock is only used for debugging and on arches that do not
46 * have the ability to do a cmpxchg_double. It only protects the second
47 * double word in the page struct. Meaning
48 * A. page->freelist -> List of object free in a page
49 * B. page->counters -> Counters of objects
50 * C. page->frozen -> frozen state
51 *
52 * If a slab is frozen then it is exempt from list management. It is not
53 * on any list. The processor that froze the slab is the one who can
54 * perform list operations on the page. Other processors may put objects
55 * onto the freelist but the processor that froze the slab is the only
56 * one that can retrieve the objects from the page's freelist.
44 * 57 *
45 * The list_lock protects the partial and full list on each node and 58 * The list_lock protects the partial and full list on each node and
46 * the partial slab counter. If taken then no new slabs may be added or 59 * the partial slab counter. If taken then no new slabs may be added or
@@ -53,20 +66,6 @@
53 * slabs, operations can continue without any centralized lock. F.e. 66 * slabs, operations can continue without any centralized lock. F.e.
54 * allocating a long series of objects that fill up slabs does not require 67 * allocating a long series of objects that fill up slabs does not require
55 * the list lock. 68 * the list lock.
56 *
57 * The lock order is sometimes inverted when we are trying to get a slab
58 * off a list. We take the list_lock and then look for a page on the list
59 * to use. While we do that objects in the slabs may be freed. We can
60 * only operate on the slab if we have also taken the slab_lock. So we use
61 * a slab_trylock() on the slab. If trylock was successful then no frees
62 * can occur anymore and we can use the slab for allocations etc. If the
63 * slab_trylock() does not succeed then frees are in progress in the slab and
64 * we must stay away from it for a while since we may cause a bouncing
65 * cacheline if we try to acquire the lock. So go onto the next slab.
66 * If all pages are busy then we may allocate a new slab instead of reusing
67 * a partial slab. A new slab has no one operating on it and thus there is
68 * no danger of cacheline contention.
69 *
70 * Interrupts are disabled during allocation and deallocation in order to 69 * Interrupts are disabled during allocation and deallocation in order to
71 * make the slab allocator safe to use in the context of an irq. In addition 70 * make the slab allocator safe to use in the context of an irq. In addition
72 * interrupts are disabled to ensure that the processor does not change 71 * interrupts are disabled to ensure that the processor does not change
@@ -342,6 +341,19 @@ static inline int oo_objects(struct kmem_cache_order_objects x)
342 return x.x & OO_MASK; 341 return x.x & OO_MASK;
343} 342}
344 343
344/*
345 * Per slab locking using the pagelock
346 */
347static __always_inline void slab_lock(struct page *page)
348{
349 bit_spin_lock(PG_locked, &page->flags);
350}
351
352static __always_inline void slab_unlock(struct page *page)
353{
354 __bit_spin_unlock(PG_locked, &page->flags);
355}
356
345static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page, 357static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
346 void *freelist_old, unsigned long counters_old, 358 void *freelist_old, unsigned long counters_old,
347 void *freelist_new, unsigned long counters_new, 359 void *freelist_new, unsigned long counters_new,
@@ -356,11 +368,14 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
356 } else 368 } else
357#endif 369#endif
358 { 370 {
371 slab_lock(page);
359 if (page->freelist == freelist_old && page->counters == counters_old) { 372 if (page->freelist == freelist_old && page->counters == counters_old) {
360 page->freelist = freelist_new; 373 page->freelist = freelist_new;
361 page->counters = counters_new; 374 page->counters = counters_new;
375 slab_unlock(page);
362 return 1; 376 return 1;
363 } 377 }
378 slab_unlock(page);
364 } 379 }
365 380
366 cpu_relax(); 381 cpu_relax();
@@ -377,7 +392,7 @@ static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
377/* 392/*
378 * Determine a map of object in use on a page. 393 * Determine a map of object in use on a page.
379 * 394 *
380 * Slab lock or node listlock must be held to guarantee that the page does 395 * Node listlock must be held to guarantee that the page does
381 * not vanish from under us. 396 * not vanish from under us.
382 */ 397 */
383static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map) 398static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map)
@@ -808,10 +823,11 @@ static int check_slab(struct kmem_cache *s, struct page *page)
808static int on_freelist(struct kmem_cache *s, struct page *page, void *search) 823static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
809{ 824{
810 int nr = 0; 825 int nr = 0;
811 void *fp = page->freelist; 826 void *fp;
812 void *object = NULL; 827 void *object = NULL;
813 unsigned long max_objects; 828 unsigned long max_objects;
814 829
830 fp = page->freelist;
815 while (fp && nr <= page->objects) { 831 while (fp && nr <= page->objects) {
816 if (fp == search) 832 if (fp == search)
817 return 1; 833 return 1;
@@ -1024,6 +1040,8 @@ bad:
1024static noinline int free_debug_processing(struct kmem_cache *s, 1040static noinline int free_debug_processing(struct kmem_cache *s,
1025 struct page *page, void *object, unsigned long addr) 1041 struct page *page, void *object, unsigned long addr)
1026{ 1042{
1043 slab_lock(page);
1044
1027 if (!check_slab(s, page)) 1045 if (!check_slab(s, page))
1028 goto fail; 1046 goto fail;
1029 1047
@@ -1059,10 +1077,12 @@ static noinline int free_debug_processing(struct kmem_cache *s,
1059 set_track(s, object, TRACK_FREE, addr); 1077 set_track(s, object, TRACK_FREE, addr);
1060 trace(s, page, object, 0); 1078 trace(s, page, object, 0);
1061 init_object(s, object, SLUB_RED_INACTIVE); 1079 init_object(s, object, SLUB_RED_INACTIVE);
1080 slab_unlock(page);
1062 return 1; 1081 return 1;
1063 1082
1064fail: 1083fail:
1065 slab_fix(s, "Object at 0x%p not freed", object); 1084 slab_fix(s, "Object at 0x%p not freed", object);
1085 slab_unlock(page);
1066 return 0; 1086 return 0;
1067} 1087}
1068 1088
@@ -1394,27 +1414,6 @@ static void discard_slab(struct kmem_cache *s, struct page *page)
1394} 1414}
1395 1415
1396/* 1416/*
1397 * Per slab locking using the pagelock
1398 */
1399static __always_inline void slab_lock(struct page *page)
1400{
1401 bit_spin_lock(PG_locked, &page->flags);
1402}
1403
1404static __always_inline void slab_unlock(struct page *page)
1405{
1406 __bit_spin_unlock(PG_locked, &page->flags);
1407}
1408
1409static __always_inline int slab_trylock(struct page *page)
1410{
1411 int rc = 1;
1412
1413 rc = bit_spin_trylock(PG_locked, &page->flags);
1414 return rc;
1415}
1416
1417/*
1418 * Management of partially allocated slabs. 1417 * Management of partially allocated slabs.
1419 * 1418 *
1420 * list_lock must be held. 1419 * list_lock must be held.
@@ -1445,17 +1444,13 @@ static inline void remove_partial(struct kmem_cache_node *n,
1445 * 1444 *
1446 * Must hold list_lock. 1445 * Must hold list_lock.
1447 */ 1446 */
1448static inline int lock_and_freeze_slab(struct kmem_cache *s, 1447static inline int acquire_slab(struct kmem_cache *s,
1449 struct kmem_cache_node *n, struct page *page) 1448 struct kmem_cache_node *n, struct page *page)
1450{ 1449{
1451 void *freelist; 1450 void *freelist;
1452 unsigned long counters; 1451 unsigned long counters;
1453 struct page new; 1452 struct page new;
1454 1453
1455
1456 if (!slab_trylock(page))
1457 return 0;
1458
1459 /* 1454 /*
1460 * Zap the freelist and set the frozen bit. 1455 * Zap the freelist and set the frozen bit.
1461 * The old freelist is the list of objects for the 1456 * The old freelist is the list of objects for the
@@ -1491,7 +1486,6 @@ static inline int lock_and_freeze_slab(struct kmem_cache *s,
1491 */ 1486 */
1492 printk(KERN_ERR "SLUB: %s : Page without available objects on" 1487 printk(KERN_ERR "SLUB: %s : Page without available objects on"
1493 " partial list\n", s->name); 1488 " partial list\n", s->name);
1494 slab_unlock(page);
1495 return 0; 1489 return 0;
1496 } 1490 }
1497} 1491}
@@ -1515,7 +1509,7 @@ static struct page *get_partial_node(struct kmem_cache *s,
1515 1509
1516 spin_lock(&n->list_lock); 1510 spin_lock(&n->list_lock);
1517 list_for_each_entry(page, &n->partial, lru) 1511 list_for_each_entry(page, &n->partial, lru)
1518 if (lock_and_freeze_slab(s, n, page)) 1512 if (acquire_slab(s, n, page))
1519 goto out; 1513 goto out;
1520 page = NULL; 1514 page = NULL;
1521out: 1515out:
@@ -1804,8 +1798,6 @@ redo:
1804 "unfreezing slab")) 1798 "unfreezing slab"))
1805 goto redo; 1799 goto redo;
1806 1800
1807 slab_unlock(page);
1808
1809 if (lock) 1801 if (lock)
1810 spin_unlock(&n->list_lock); 1802 spin_unlock(&n->list_lock);
1811 1803
@@ -1819,7 +1811,6 @@ redo:
1819static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) 1811static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1820{ 1812{
1821 stat(s, CPUSLAB_FLUSH); 1813 stat(s, CPUSLAB_FLUSH);
1822 slab_lock(c->page);
1823 deactivate_slab(s, c); 1814 deactivate_slab(s, c);
1824} 1815}
1825 1816
@@ -1968,7 +1959,6 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
1968 if (!page) 1959 if (!page)
1969 goto new_slab; 1960 goto new_slab;
1970 1961
1971 slab_lock(page);
1972 if (unlikely(!node_match(c, node))) 1962 if (unlikely(!node_match(c, node)))
1973 goto another_slab; 1963 goto another_slab;
1974 1964
@@ -1994,8 +1984,6 @@ load_freelist:
1994 1984
1995 stat(s, ALLOC_REFILL); 1985 stat(s, ALLOC_REFILL);
1996 1986
1997 slab_unlock(page);
1998
1999 c->freelist = get_freepointer(s, object); 1987 c->freelist = get_freepointer(s, object);
2000 c->tid = next_tid(c->tid); 1988 c->tid = next_tid(c->tid);
2001 local_irq_restore(flags); 1989 local_irq_restore(flags);
@@ -2031,7 +2019,6 @@ new_slab:
2031 page->inuse = page->objects; 2019 page->inuse = page->objects;
2032 2020
2033 stat(s, ALLOC_SLAB); 2021 stat(s, ALLOC_SLAB);
2034 slab_lock(page);
2035 c->node = page_to_nid(page); 2022 c->node = page_to_nid(page);
2036 c->page = page; 2023 c->page = page;
2037 goto load_freelist; 2024 goto load_freelist;
@@ -2205,7 +2192,6 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
2205 unsigned long uninitialized_var(flags); 2192 unsigned long uninitialized_var(flags);
2206 2193
2207 local_irq_save(flags); 2194 local_irq_save(flags);
2208 slab_lock(page);
2209 stat(s, FREE_SLOWPATH); 2195 stat(s, FREE_SLOWPATH);
2210 2196
2211 if (kmem_cache_debug(s) && !free_debug_processing(s, page, x, addr)) 2197 if (kmem_cache_debug(s) && !free_debug_processing(s, page, x, addr))
@@ -2271,7 +2257,6 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
2271 spin_unlock(&n->list_lock); 2257 spin_unlock(&n->list_lock);
2272 2258
2273out_unlock: 2259out_unlock:
2274 slab_unlock(page);
2275 local_irq_restore(flags); 2260 local_irq_restore(flags);
2276 return; 2261 return;
2277 2262
@@ -2285,7 +2270,6 @@ slab_empty:
2285 } 2270 }
2286 2271
2287 spin_unlock(&n->list_lock); 2272 spin_unlock(&n->list_lock);
2288 slab_unlock(page);
2289 local_irq_restore(flags); 2273 local_irq_restore(flags);
2290 stat(s, FREE_SLAB); 2274 stat(s, FREE_SLAB);
2291 discard_slab(s, page); 2275 discard_slab(s, page);
@@ -3202,14 +3186,8 @@ int kmem_cache_shrink(struct kmem_cache *s)
3202 * list_lock. page->inuse here is the upper limit. 3186 * list_lock. page->inuse here is the upper limit.
3203 */ 3187 */
3204 list_for_each_entry_safe(page, t, &n->partial, lru) { 3188 list_for_each_entry_safe(page, t, &n->partial, lru) {
3205 if (!page->inuse && slab_trylock(page)) { 3189 if (!page->inuse) {
3206 /*
3207 * Must hold slab lock here because slab_free
3208 * may have freed the last object and be
3209 * waiting to release the slab.
3210 */
3211 remove_partial(n, page); 3190 remove_partial(n, page);
3212 slab_unlock(page);
3213 discard_slab(s, page); 3191 discard_slab(s, page);
3214 } else { 3192 } else {
3215 list_move(&page->lru, 3193 list_move(&page->lru,
@@ -3797,12 +3775,9 @@ static int validate_slab(struct kmem_cache *s, struct page *page,
3797static void validate_slab_slab(struct kmem_cache *s, struct page *page, 3775static void validate_slab_slab(struct kmem_cache *s, struct page *page,
3798 unsigned long *map) 3776 unsigned long *map)
3799{ 3777{
3800 if (slab_trylock(page)) { 3778 slab_lock(page);
3801 validate_slab(s, page, map); 3779 validate_slab(s, page, map);
3802 slab_unlock(page); 3780 slab_unlock(page);
3803 } else
3804 printk(KERN_INFO "SLUB %s: Skipped busy slab 0x%p\n",
3805 s->name, page);
3806} 3781}
3807 3782
3808static int validate_slab_node(struct kmem_cache *s, 3783static int validate_slab_node(struct kmem_cache *s,