aboutsummaryrefslogtreecommitdiffstats
path: root/mm/slub.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-07-30 14:21:48 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-07-30 14:21:48 -0400
commitc11abbbaa3252875c5740a6880b9a1a6f1e2a870 (patch)
tree692143f7edd1157ef499bff21143e0d6df7cace5 /mm/slub.c
parent1d3fe4a75b691285cded47c9f1a91b30d25287b0 (diff)
parent9e577e8b46ab0c38970c0f0cd7eae62e6dffddee (diff)
Merge branch 'slub/lockless' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/slab-2.6
* 'slub/lockless' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/slab-2.6: (21 commits) slub: When allocating a new slab also prep the first object slub: disable interrupts in cmpxchg_double_slab when falling back to pagelock Avoid duplicate _count variables in page_struct Revert "SLUB: Fix build breakage in linux/mm_types.h" SLUB: Fix build breakage in linux/mm_types.h slub: slabinfo update for cmpxchg handling slub: Not necessary to check for empty slab on load_freelist slub: fast release on full slab slub: Add statistics for the case that the current slab does not match the node slub: Get rid of the another_slab label slub: Avoid disabling interrupts in free slowpath slub: Disable interrupts in free_debug processing slub: Invert locking and avoid slab lock slub: Rework allocator fastpaths slub: Pass kmem_cache struct to lock and freeze slab slub: explicit list_lock taking slub: Add cmpxchg_double_slab() mm: Rearrange struct page slub: Move page->frozen handling near where the page->freelist handling occurs slub: Do not use frozen page flag but a bit in the page counters ...
Diffstat (limited to 'mm/slub.c')
-rw-r--r--mm/slub.c764
1 files changed, 512 insertions, 252 deletions
diff --git a/mm/slub.c b/mm/slub.c
index f8f5e8efeb88..eb5a8f93338a 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2,10 +2,11 @@
2 * SLUB: A slab allocator that limits cache line use instead of queuing 2 * SLUB: A slab allocator that limits cache line use instead of queuing
3 * objects in per cpu and per node lists. 3 * objects in per cpu and per node lists.
4 * 4 *
5 * The allocator synchronizes using per slab locks and only 5 * The allocator synchronizes using per slab locks or atomic operatios
6 * uses a centralized lock to manage a pool of partial slabs. 6 * and only uses a centralized lock to manage a pool of partial slabs.
7 * 7 *
8 * (C) 2007 SGI, Christoph Lameter 8 * (C) 2007 SGI, Christoph Lameter
9 * (C) 2011 Linux Foundation, Christoph Lameter
9 */ 10 */
10 11
11#include <linux/mm.h> 12#include <linux/mm.h>
@@ -33,15 +34,27 @@
33 34
34/* 35/*
35 * Lock order: 36 * Lock order:
36 * 1. slab_lock(page) 37 * 1. slub_lock (Global Semaphore)
37 * 2. slab->list_lock 38 * 2. node->list_lock
39 * 3. slab_lock(page) (Only on some arches and for debugging)
38 * 40 *
39 * The slab_lock protects operations on the object of a particular 41 * slub_lock
40 * slab and its metadata in the page struct. If the slab lock 42 *
41 * has been taken then no allocations nor frees can be performed 43 * The role of the slub_lock is to protect the list of all the slabs
42 * on the objects in the slab nor can the slab be added or removed 44 * and to synchronize major metadata changes to slab cache structures.
43 * from the partial or full lists since this would mean modifying 45 *
44 * the page_struct of the slab. 46 * The slab_lock is only used for debugging and on arches that do not
47 * have the ability to do a cmpxchg_double. It only protects the second
48 * double word in the page struct. Meaning
49 * A. page->freelist -> List of object free in a page
50 * B. page->counters -> Counters of objects
51 * C. page->frozen -> frozen state
52 *
53 * If a slab is frozen then it is exempt from list management. It is not
54 * on any list. The processor that froze the slab is the one who can
55 * perform list operations on the page. Other processors may put objects
56 * onto the freelist but the processor that froze the slab is the only
57 * one that can retrieve the objects from the page's freelist.
45 * 58 *
46 * The list_lock protects the partial and full list on each node and 59 * The list_lock protects the partial and full list on each node and
47 * the partial slab counter. If taken then no new slabs may be added or 60 * the partial slab counter. If taken then no new slabs may be added or
@@ -54,20 +67,6 @@
54 * slabs, operations can continue without any centralized lock. F.e. 67 * slabs, operations can continue without any centralized lock. F.e.
55 * allocating a long series of objects that fill up slabs does not require 68 * allocating a long series of objects that fill up slabs does not require
56 * the list lock. 69 * the list lock.
57 *
58 * The lock order is sometimes inverted when we are trying to get a slab
59 * off a list. We take the list_lock and then look for a page on the list
60 * to use. While we do that objects in the slabs may be freed. We can
61 * only operate on the slab if we have also taken the slab_lock. So we use
62 * a slab_trylock() on the slab. If trylock was successful then no frees
63 * can occur anymore and we can use the slab for allocations etc. If the
64 * slab_trylock() does not succeed then frees are in progress in the slab and
65 * we must stay away from it for a while since we may cause a bouncing
66 * cacheline if we try to acquire the lock. So go onto the next slab.
67 * If all pages are busy then we may allocate a new slab instead of reusing
68 * a partial slab. A new slab has no one operating on it and thus there is
69 * no danger of cacheline contention.
70 *
71 * Interrupts are disabled during allocation and deallocation in order to 70 * Interrupts are disabled during allocation and deallocation in order to
72 * make the slab allocator safe to use in the context of an irq. In addition 71 * make the slab allocator safe to use in the context of an irq. In addition
73 * interrupts are disabled to ensure that the processor does not change 72 * interrupts are disabled to ensure that the processor does not change
@@ -132,6 +131,9 @@ static inline int kmem_cache_debug(struct kmem_cache *s)
132/* Enable to test recovery from slab corruption on boot */ 131/* Enable to test recovery from slab corruption on boot */
133#undef SLUB_RESILIENCY_TEST 132#undef SLUB_RESILIENCY_TEST
134 133
134/* Enable to log cmpxchg failures */
135#undef SLUB_DEBUG_CMPXCHG
136
135/* 137/*
136 * Mininum number of partial slabs. These will be left on the partial 138 * Mininum number of partial slabs. These will be left on the partial
137 * lists even if they are empty. kmem_cache_shrink may reclaim them. 139 * lists even if they are empty. kmem_cache_shrink may reclaim them.
@@ -167,10 +169,11 @@ static inline int kmem_cache_debug(struct kmem_cache *s)
167 169
168#define OO_SHIFT 16 170#define OO_SHIFT 16
169#define OO_MASK ((1 << OO_SHIFT) - 1) 171#define OO_MASK ((1 << OO_SHIFT) - 1)
170#define MAX_OBJS_PER_PAGE 65535 /* since page.objects is u16 */ 172#define MAX_OBJS_PER_PAGE 32767 /* since page.objects is u15 */
171 173
172/* Internal SLUB flags */ 174/* Internal SLUB flags */
173#define __OBJECT_POISON 0x80000000UL /* Poison object */ 175#define __OBJECT_POISON 0x80000000UL /* Poison object */
176#define __CMPXCHG_DOUBLE 0x40000000UL /* Use cmpxchg_double */
174 177
175static int kmem_size = sizeof(struct kmem_cache); 178static int kmem_size = sizeof(struct kmem_cache);
176 179
@@ -343,11 +346,99 @@ static inline int oo_objects(struct kmem_cache_order_objects x)
343 return x.x & OO_MASK; 346 return x.x & OO_MASK;
344} 347}
345 348
349/*
350 * Per slab locking using the pagelock
351 */
352static __always_inline void slab_lock(struct page *page)
353{
354 bit_spin_lock(PG_locked, &page->flags);
355}
356
357static __always_inline void slab_unlock(struct page *page)
358{
359 __bit_spin_unlock(PG_locked, &page->flags);
360}
361
362/* Interrupts must be disabled (for the fallback code to work right) */
363static inline bool __cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
364 void *freelist_old, unsigned long counters_old,
365 void *freelist_new, unsigned long counters_new,
366 const char *n)
367{
368 VM_BUG_ON(!irqs_disabled());
369#ifdef CONFIG_CMPXCHG_DOUBLE
370 if (s->flags & __CMPXCHG_DOUBLE) {
371 if (cmpxchg_double(&page->freelist,
372 freelist_old, counters_old,
373 freelist_new, counters_new))
374 return 1;
375 } else
376#endif
377 {
378 slab_lock(page);
379 if (page->freelist == freelist_old && page->counters == counters_old) {
380 page->freelist = freelist_new;
381 page->counters = counters_new;
382 slab_unlock(page);
383 return 1;
384 }
385 slab_unlock(page);
386 }
387
388 cpu_relax();
389 stat(s, CMPXCHG_DOUBLE_FAIL);
390
391#ifdef SLUB_DEBUG_CMPXCHG
392 printk(KERN_INFO "%s %s: cmpxchg double redo ", n, s->name);
393#endif
394
395 return 0;
396}
397
398static inline bool cmpxchg_double_slab(struct kmem_cache *s, struct page *page,
399 void *freelist_old, unsigned long counters_old,
400 void *freelist_new, unsigned long counters_new,
401 const char *n)
402{
403#ifdef CONFIG_CMPXCHG_DOUBLE
404 if (s->flags & __CMPXCHG_DOUBLE) {
405 if (cmpxchg_double(&page->freelist,
406 freelist_old, counters_old,
407 freelist_new, counters_new))
408 return 1;
409 } else
410#endif
411 {
412 unsigned long flags;
413
414 local_irq_save(flags);
415 slab_lock(page);
416 if (page->freelist == freelist_old && page->counters == counters_old) {
417 page->freelist = freelist_new;
418 page->counters = counters_new;
419 slab_unlock(page);
420 local_irq_restore(flags);
421 return 1;
422 }
423 slab_unlock(page);
424 local_irq_restore(flags);
425 }
426
427 cpu_relax();
428 stat(s, CMPXCHG_DOUBLE_FAIL);
429
430#ifdef SLUB_DEBUG_CMPXCHG
431 printk(KERN_INFO "%s %s: cmpxchg double redo ", n, s->name);
432#endif
433
434 return 0;
435}
436
346#ifdef CONFIG_SLUB_DEBUG 437#ifdef CONFIG_SLUB_DEBUG
347/* 438/*
348 * Determine a map of object in use on a page. 439 * Determine a map of object in use on a page.
349 * 440 *
350 * Slab lock or node listlock must be held to guarantee that the page does 441 * Node listlock must be held to guarantee that the page does
351 * not vanish from under us. 442 * not vanish from under us.
352 */ 443 */
353static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map) 444static void get_map(struct kmem_cache *s, struct page *page, unsigned long *map)
@@ -838,10 +929,11 @@ static int check_slab(struct kmem_cache *s, struct page *page)
838static int on_freelist(struct kmem_cache *s, struct page *page, void *search) 929static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
839{ 930{
840 int nr = 0; 931 int nr = 0;
841 void *fp = page->freelist; 932 void *fp;
842 void *object = NULL; 933 void *object = NULL;
843 unsigned long max_objects; 934 unsigned long max_objects;
844 935
936 fp = page->freelist;
845 while (fp && nr <= page->objects) { 937 while (fp && nr <= page->objects) {
846 if (fp == search) 938 if (fp == search)
847 return 1; 939 return 1;
@@ -946,26 +1038,27 @@ static inline void slab_free_hook(struct kmem_cache *s, void *x)
946 1038
947/* 1039/*
948 * Tracking of fully allocated slabs for debugging purposes. 1040 * Tracking of fully allocated slabs for debugging purposes.
1041 *
1042 * list_lock must be held.
949 */ 1043 */
950static void add_full(struct kmem_cache_node *n, struct page *page) 1044static void add_full(struct kmem_cache *s,
1045 struct kmem_cache_node *n, struct page *page)
951{ 1046{
952 spin_lock(&n->list_lock); 1047 if (!(s->flags & SLAB_STORE_USER))
1048 return;
1049
953 list_add(&page->lru, &n->full); 1050 list_add(&page->lru, &n->full);
954 spin_unlock(&n->list_lock);
955} 1051}
956 1052
1053/*
1054 * list_lock must be held.
1055 */
957static void remove_full(struct kmem_cache *s, struct page *page) 1056static void remove_full(struct kmem_cache *s, struct page *page)
958{ 1057{
959 struct kmem_cache_node *n;
960
961 if (!(s->flags & SLAB_STORE_USER)) 1058 if (!(s->flags & SLAB_STORE_USER))
962 return; 1059 return;
963 1060
964 n = get_node(s, page_to_nid(page));
965
966 spin_lock(&n->list_lock);
967 list_del(&page->lru); 1061 list_del(&page->lru);
968 spin_unlock(&n->list_lock);
969} 1062}
970 1063
971/* Tracking of the number of slabs for debugging purposes */ 1064/* Tracking of the number of slabs for debugging purposes */
@@ -1021,11 +1114,6 @@ static noinline int alloc_debug_processing(struct kmem_cache *s, struct page *pa
1021 if (!check_slab(s, page)) 1114 if (!check_slab(s, page))
1022 goto bad; 1115 goto bad;
1023 1116
1024 if (!on_freelist(s, page, object)) {
1025 object_err(s, page, object, "Object already allocated");
1026 goto bad;
1027 }
1028
1029 if (!check_valid_pointer(s, page, object)) { 1117 if (!check_valid_pointer(s, page, object)) {
1030 object_err(s, page, object, "Freelist Pointer check fails"); 1118 object_err(s, page, object, "Freelist Pointer check fails");
1031 goto bad; 1119 goto bad;
@@ -1058,6 +1146,12 @@ bad:
1058static noinline int free_debug_processing(struct kmem_cache *s, 1146static noinline int free_debug_processing(struct kmem_cache *s,
1059 struct page *page, void *object, unsigned long addr) 1147 struct page *page, void *object, unsigned long addr)
1060{ 1148{
1149 unsigned long flags;
1150 int rc = 0;
1151
1152 local_irq_save(flags);
1153 slab_lock(page);
1154
1061 if (!check_slab(s, page)) 1155 if (!check_slab(s, page))
1062 goto fail; 1156 goto fail;
1063 1157
@@ -1072,7 +1166,7 @@ static noinline int free_debug_processing(struct kmem_cache *s,
1072 } 1166 }
1073 1167
1074 if (!check_object(s, page, object, SLUB_RED_ACTIVE)) 1168 if (!check_object(s, page, object, SLUB_RED_ACTIVE))
1075 return 0; 1169 goto out;
1076 1170
1077 if (unlikely(s != page->slab)) { 1171 if (unlikely(s != page->slab)) {
1078 if (!PageSlab(page)) { 1172 if (!PageSlab(page)) {
@@ -1089,18 +1183,19 @@ static noinline int free_debug_processing(struct kmem_cache *s,
1089 goto fail; 1183 goto fail;
1090 } 1184 }
1091 1185
1092 /* Special debug activities for freeing objects */
1093 if (!PageSlubFrozen(page) && !page->freelist)
1094 remove_full(s, page);
1095 if (s->flags & SLAB_STORE_USER) 1186 if (s->flags & SLAB_STORE_USER)
1096 set_track(s, object, TRACK_FREE, addr); 1187 set_track(s, object, TRACK_FREE, addr);
1097 trace(s, page, object, 0); 1188 trace(s, page, object, 0);
1098 init_object(s, object, SLUB_RED_INACTIVE); 1189 init_object(s, object, SLUB_RED_INACTIVE);
1099 return 1; 1190 rc = 1;
1191out:
1192 slab_unlock(page);
1193 local_irq_restore(flags);
1194 return rc;
1100 1195
1101fail: 1196fail:
1102 slab_fix(s, "Object at 0x%p not freed", object); 1197 slab_fix(s, "Object at 0x%p not freed", object);
1103 return 0; 1198 goto out;
1104} 1199}
1105 1200
1106static int __init setup_slub_debug(char *str) 1201static int __init setup_slub_debug(char *str)
@@ -1200,7 +1295,9 @@ static inline int slab_pad_check(struct kmem_cache *s, struct page *page)
1200 { return 1; } 1295 { return 1; }
1201static inline int check_object(struct kmem_cache *s, struct page *page, 1296static inline int check_object(struct kmem_cache *s, struct page *page,
1202 void *object, u8 val) { return 1; } 1297 void *object, u8 val) { return 1; }
1203static inline void add_full(struct kmem_cache_node *n, struct page *page) {} 1298static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
1299 struct page *page) {}
1300static inline void remove_full(struct kmem_cache *s, struct page *page) {}
1204static inline unsigned long kmem_cache_flags(unsigned long objsize, 1301static inline unsigned long kmem_cache_flags(unsigned long objsize,
1205 unsigned long flags, const char *name, 1302 unsigned long flags, const char *name,
1206 void (*ctor)(void *)) 1303 void (*ctor)(void *))
@@ -1252,6 +1349,11 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1252 struct kmem_cache_order_objects oo = s->oo; 1349 struct kmem_cache_order_objects oo = s->oo;
1253 gfp_t alloc_gfp; 1350 gfp_t alloc_gfp;
1254 1351
1352 flags &= gfp_allowed_mask;
1353
1354 if (flags & __GFP_WAIT)
1355 local_irq_enable();
1356
1255 flags |= s->allocflags; 1357 flags |= s->allocflags;
1256 1358
1257 /* 1359 /*
@@ -1268,12 +1370,17 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1268 * Try a lower order alloc if possible 1370 * Try a lower order alloc if possible
1269 */ 1371 */
1270 page = alloc_slab_page(flags, node, oo); 1372 page = alloc_slab_page(flags, node, oo);
1271 if (!page)
1272 return NULL;
1273 1373
1274 stat(s, ORDER_FALLBACK); 1374 if (page)
1375 stat(s, ORDER_FALLBACK);
1275 } 1376 }
1276 1377
1378 if (flags & __GFP_WAIT)
1379 local_irq_disable();
1380
1381 if (!page)
1382 return NULL;
1383
1277 if (kmemcheck_enabled 1384 if (kmemcheck_enabled
1278 && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) { 1385 && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) {
1279 int pages = 1 << oo_order(oo); 1386 int pages = 1 << oo_order(oo);
@@ -1341,6 +1448,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
1341 1448
1342 page->freelist = start; 1449 page->freelist = start;
1343 page->inuse = 0; 1450 page->inuse = 0;
1451 page->frozen = 1;
1344out: 1452out:
1345 return page; 1453 return page;
1346} 1454}
@@ -1418,77 +1526,87 @@ static void discard_slab(struct kmem_cache *s, struct page *page)
1418} 1526}
1419 1527
1420/* 1528/*
1421 * Per slab locking using the pagelock 1529 * Management of partially allocated slabs.
1422 */ 1530 *
1423static __always_inline void slab_lock(struct page *page) 1531 * list_lock must be held.
1424{
1425 bit_spin_lock(PG_locked, &page->flags);
1426}
1427
1428static __always_inline void slab_unlock(struct page *page)
1429{
1430 __bit_spin_unlock(PG_locked, &page->flags);
1431}
1432
1433static __always_inline int slab_trylock(struct page *page)
1434{
1435 int rc = 1;
1436
1437 rc = bit_spin_trylock(PG_locked, &page->flags);
1438 return rc;
1439}
1440
1441/*
1442 * Management of partially allocated slabs
1443 */ 1532 */
1444static void add_partial(struct kmem_cache_node *n, 1533static inline void add_partial(struct kmem_cache_node *n,
1445 struct page *page, int tail) 1534 struct page *page, int tail)
1446{ 1535{
1447 spin_lock(&n->list_lock);
1448 n->nr_partial++; 1536 n->nr_partial++;
1449 if (tail) 1537 if (tail)
1450 list_add_tail(&page->lru, &n->partial); 1538 list_add_tail(&page->lru, &n->partial);
1451 else 1539 else
1452 list_add(&page->lru, &n->partial); 1540 list_add(&page->lru, &n->partial);
1453 spin_unlock(&n->list_lock);
1454} 1541}
1455 1542
1456static inline void __remove_partial(struct kmem_cache_node *n, 1543/*
1544 * list_lock must be held.
1545 */
1546static inline void remove_partial(struct kmem_cache_node *n,
1457 struct page *page) 1547 struct page *page)
1458{ 1548{
1459 list_del(&page->lru); 1549 list_del(&page->lru);
1460 n->nr_partial--; 1550 n->nr_partial--;
1461} 1551}
1462 1552
1463static void remove_partial(struct kmem_cache *s, struct page *page)
1464{
1465 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1466
1467 spin_lock(&n->list_lock);
1468 __remove_partial(n, page);
1469 spin_unlock(&n->list_lock);
1470}
1471
1472/* 1553/*
1473 * Lock slab and remove from the partial list. 1554 * Lock slab, remove from the partial list and put the object into the
1555 * per cpu freelist.
1474 * 1556 *
1475 * Must hold list_lock. 1557 * Must hold list_lock.
1476 */ 1558 */
1477static inline int lock_and_freeze_slab(struct kmem_cache_node *n, 1559static inline int acquire_slab(struct kmem_cache *s,
1478 struct page *page) 1560 struct kmem_cache_node *n, struct page *page)
1479{ 1561{
1480 if (slab_trylock(page)) { 1562 void *freelist;
1481 __remove_partial(n, page); 1563 unsigned long counters;
1482 __SetPageSlubFrozen(page); 1564 struct page new;
1565
1566 /*
1567 * Zap the freelist and set the frozen bit.
1568 * The old freelist is the list of objects for the
1569 * per cpu allocation list.
1570 */
1571 do {
1572 freelist = page->freelist;
1573 counters = page->counters;
1574 new.counters = counters;
1575 new.inuse = page->objects;
1576
1577 VM_BUG_ON(new.frozen);
1578 new.frozen = 1;
1579
1580 } while (!__cmpxchg_double_slab(s, page,
1581 freelist, counters,
1582 NULL, new.counters,
1583 "lock and freeze"));
1584
1585 remove_partial(n, page);
1586
1587 if (freelist) {
1588 /* Populate the per cpu freelist */
1589 this_cpu_write(s->cpu_slab->freelist, freelist);
1590 this_cpu_write(s->cpu_slab->page, page);
1591 this_cpu_write(s->cpu_slab->node, page_to_nid(page));
1483 return 1; 1592 return 1;
1593 } else {
1594 /*
1595 * Slab page came from the wrong list. No object to allocate
1596 * from. Put it onto the correct list and continue partial
1597 * scan.
1598 */
1599 printk(KERN_ERR "SLUB: %s : Page without available objects on"
1600 " partial list\n", s->name);
1601 return 0;
1484 } 1602 }
1485 return 0;
1486} 1603}
1487 1604
1488/* 1605/*
1489 * Try to allocate a partial slab from a specific node. 1606 * Try to allocate a partial slab from a specific node.
1490 */ 1607 */
1491static struct page *get_partial_node(struct kmem_cache_node *n) 1608static struct page *get_partial_node(struct kmem_cache *s,
1609 struct kmem_cache_node *n)
1492{ 1610{
1493 struct page *page; 1611 struct page *page;
1494 1612
@@ -1503,7 +1621,7 @@ static struct page *get_partial_node(struct kmem_cache_node *n)
1503 1621
1504 spin_lock(&n->list_lock); 1622 spin_lock(&n->list_lock);
1505 list_for_each_entry(page, &n->partial, lru) 1623 list_for_each_entry(page, &n->partial, lru)
1506 if (lock_and_freeze_slab(n, page)) 1624 if (acquire_slab(s, n, page))
1507 goto out; 1625 goto out;
1508 page = NULL; 1626 page = NULL;
1509out: 1627out:
@@ -1554,7 +1672,7 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags)
1554 1672
1555 if (n && cpuset_zone_allowed_hardwall(zone, flags) && 1673 if (n && cpuset_zone_allowed_hardwall(zone, flags) &&
1556 n->nr_partial > s->min_partial) { 1674 n->nr_partial > s->min_partial) {
1557 page = get_partial_node(n); 1675 page = get_partial_node(s, n);
1558 if (page) { 1676 if (page) {
1559 put_mems_allowed(); 1677 put_mems_allowed();
1560 return page; 1678 return page;
@@ -1574,60 +1692,13 @@ static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node)
1574 struct page *page; 1692 struct page *page;
1575 int searchnode = (node == NUMA_NO_NODE) ? numa_node_id() : node; 1693 int searchnode = (node == NUMA_NO_NODE) ? numa_node_id() : node;
1576 1694
1577 page = get_partial_node(get_node(s, searchnode)); 1695 page = get_partial_node(s, get_node(s, searchnode));
1578 if (page || node != NUMA_NO_NODE) 1696 if (page || node != NUMA_NO_NODE)
1579 return page; 1697 return page;
1580 1698
1581 return get_any_partial(s, flags); 1699 return get_any_partial(s, flags);
1582} 1700}
1583 1701
1584/*
1585 * Move a page back to the lists.
1586 *
1587 * Must be called with the slab lock held.
1588 *
1589 * On exit the slab lock will have been dropped.
1590 */
1591static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
1592 __releases(bitlock)
1593{
1594 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1595
1596 __ClearPageSlubFrozen(page);
1597 if (page->inuse) {
1598
1599 if (page->freelist) {
1600 add_partial(n, page, tail);
1601 stat(s, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD);
1602 } else {
1603 stat(s, DEACTIVATE_FULL);
1604 if (kmem_cache_debug(s) && (s->flags & SLAB_STORE_USER))
1605 add_full(n, page);
1606 }
1607 slab_unlock(page);
1608 } else {
1609 stat(s, DEACTIVATE_EMPTY);
1610 if (n->nr_partial < s->min_partial) {
1611 /*
1612 * Adding an empty slab to the partial slabs in order
1613 * to avoid page allocator overhead. This slab needs
1614 * to come after the other slabs with objects in
1615 * so that the others get filled first. That way the
1616 * size of the partial list stays small.
1617 *
1618 * kmem_cache_shrink can reclaim any empty slabs from
1619 * the partial list.
1620 */
1621 add_partial(n, page, 1);
1622 slab_unlock(page);
1623 } else {
1624 slab_unlock(page);
1625 stat(s, FREE_SLAB);
1626 discard_slab(s, page);
1627 }
1628 }
1629}
1630
1631#ifdef CONFIG_PREEMPT 1702#ifdef CONFIG_PREEMPT
1632/* 1703/*
1633 * Calculate the next globally unique transaction for disambiguiation 1704 * Calculate the next globally unique transaction for disambiguiation
@@ -1697,42 +1768,161 @@ void init_kmem_cache_cpus(struct kmem_cache *s)
1697/* 1768/*
1698 * Remove the cpu slab 1769 * Remove the cpu slab
1699 */ 1770 */
1771
1772/*
1773 * Remove the cpu slab
1774 */
1700static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) 1775static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1701 __releases(bitlock)
1702{ 1776{
1777 enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
1703 struct page *page = c->page; 1778 struct page *page = c->page;
1704 int tail = 1; 1779 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1705 1780 int lock = 0;
1706 if (page->freelist) 1781 enum slab_modes l = M_NONE, m = M_NONE;
1782 void *freelist;
1783 void *nextfree;
1784 int tail = 0;
1785 struct page new;
1786 struct page old;
1787
1788 if (page->freelist) {
1707 stat(s, DEACTIVATE_REMOTE_FREES); 1789 stat(s, DEACTIVATE_REMOTE_FREES);
1790 tail = 1;
1791 }
1792
1793 c->tid = next_tid(c->tid);
1794 c->page = NULL;
1795 freelist = c->freelist;
1796 c->freelist = NULL;
1797
1798 /*
1799 * Stage one: Free all available per cpu objects back
1800 * to the page freelist while it is still frozen. Leave the
1801 * last one.
1802 *
1803 * There is no need to take the list->lock because the page
1804 * is still frozen.
1805 */
1806 while (freelist && (nextfree = get_freepointer(s, freelist))) {
1807 void *prior;
1808 unsigned long counters;
1809
1810 do {
1811 prior = page->freelist;
1812 counters = page->counters;
1813 set_freepointer(s, freelist, prior);
1814 new.counters = counters;
1815 new.inuse--;
1816 VM_BUG_ON(!new.frozen);
1817
1818 } while (!__cmpxchg_double_slab(s, page,
1819 prior, counters,
1820 freelist, new.counters,
1821 "drain percpu freelist"));
1822
1823 freelist = nextfree;
1824 }
1825
1708 /* 1826 /*
1709 * Merge cpu freelist into slab freelist. Typically we get here 1827 * Stage two: Ensure that the page is unfrozen while the
1710 * because both freelists are empty. So this is unlikely 1828 * list presence reflects the actual number of objects
1711 * to occur. 1829 * during unfreeze.
1830 *
1831 * We setup the list membership and then perform a cmpxchg
1832 * with the count. If there is a mismatch then the page
1833 * is not unfrozen but the page is on the wrong list.
1834 *
1835 * Then we restart the process which may have to remove
1836 * the page from the list that we just put it on again
1837 * because the number of objects in the slab may have
1838 * changed.
1712 */ 1839 */
1713 while (unlikely(c->freelist)) { 1840redo:
1714 void **object;
1715 1841
1716 tail = 0; /* Hot objects. Put the slab first */ 1842 old.freelist = page->freelist;
1843 old.counters = page->counters;
1844 VM_BUG_ON(!old.frozen);
1717 1845
1718 /* Retrieve object from cpu_freelist */ 1846 /* Determine target state of the slab */
1719 object = c->freelist; 1847 new.counters = old.counters;
1720 c->freelist = get_freepointer(s, c->freelist); 1848 if (freelist) {
1849 new.inuse--;
1850 set_freepointer(s, freelist, old.freelist);
1851 new.freelist = freelist;
1852 } else
1853 new.freelist = old.freelist;
1854
1855 new.frozen = 0;
1856
1857 if (!new.inuse && n->nr_partial < s->min_partial)
1858 m = M_FREE;
1859 else if (new.freelist) {
1860 m = M_PARTIAL;
1861 if (!lock) {
1862 lock = 1;
1863 /*
1864 * Taking the spinlock removes the possiblity
1865 * that acquire_slab() will see a slab page that
1866 * is frozen
1867 */
1868 spin_lock(&n->list_lock);
1869 }
1870 } else {
1871 m = M_FULL;
1872 if (kmem_cache_debug(s) && !lock) {
1873 lock = 1;
1874 /*
1875 * This also ensures that the scanning of full
1876 * slabs from diagnostic functions will not see
1877 * any frozen slabs.
1878 */
1879 spin_lock(&n->list_lock);
1880 }
1881 }
1882
1883 if (l != m) {
1884
1885 if (l == M_PARTIAL)
1886
1887 remove_partial(n, page);
1888
1889 else if (l == M_FULL)
1890
1891 remove_full(s, page);
1892
1893 if (m == M_PARTIAL) {
1894
1895 add_partial(n, page, tail);
1896 stat(s, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD);
1897
1898 } else if (m == M_FULL) {
1721 1899
1722 /* And put onto the regular freelist */ 1900 stat(s, DEACTIVATE_FULL);
1723 set_freepointer(s, object, page->freelist); 1901 add_full(s, n, page);
1724 page->freelist = object; 1902
1725 page->inuse--; 1903 }
1904 }
1905
1906 l = m;
1907 if (!__cmpxchg_double_slab(s, page,
1908 old.freelist, old.counters,
1909 new.freelist, new.counters,
1910 "unfreezing slab"))
1911 goto redo;
1912
1913 if (lock)
1914 spin_unlock(&n->list_lock);
1915
1916 if (m == M_FREE) {
1917 stat(s, DEACTIVATE_EMPTY);
1918 discard_slab(s, page);
1919 stat(s, FREE_SLAB);
1726 } 1920 }
1727 c->page = NULL;
1728 c->tid = next_tid(c->tid);
1729 unfreeze_slab(s, page, tail);
1730} 1921}
1731 1922
1732static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) 1923static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1733{ 1924{
1734 stat(s, CPUSLAB_FLUSH); 1925 stat(s, CPUSLAB_FLUSH);
1735 slab_lock(c->page);
1736 deactivate_slab(s, c); 1926 deactivate_slab(s, c);
1737} 1927}
1738 1928
@@ -1861,6 +2051,8 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
1861 void **object; 2051 void **object;
1862 struct page *page; 2052 struct page *page;
1863 unsigned long flags; 2053 unsigned long flags;
2054 struct page new;
2055 unsigned long counters;
1864 2056
1865 local_irq_save(flags); 2057 local_irq_save(flags);
1866#ifdef CONFIG_PREEMPT 2058#ifdef CONFIG_PREEMPT
@@ -1879,72 +2071,97 @@ static void *__slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
1879 if (!page) 2071 if (!page)
1880 goto new_slab; 2072 goto new_slab;
1881 2073
1882 slab_lock(page); 2074 if (unlikely(!node_match(c, node))) {
1883 if (unlikely(!node_match(c, node))) 2075 stat(s, ALLOC_NODE_MISMATCH);
1884 goto another_slab; 2076 deactivate_slab(s, c);
2077 goto new_slab;
2078 }
2079
2080 stat(s, ALLOC_SLOWPATH);
2081
2082 do {
2083 object = page->freelist;
2084 counters = page->counters;
2085 new.counters = counters;
2086 VM_BUG_ON(!new.frozen);
2087
2088 /*
2089 * If there is no object left then we use this loop to
2090 * deactivate the slab which is simple since no objects
2091 * are left in the slab and therefore we do not need to
2092 * put the page back onto the partial list.
2093 *
2094 * If there are objects left then we retrieve them
2095 * and use them to refill the per cpu queue.
2096 */
2097
2098 new.inuse = page->objects;
2099 new.frozen = object != NULL;
2100
2101 } while (!__cmpxchg_double_slab(s, page,
2102 object, counters,
2103 NULL, new.counters,
2104 "__slab_alloc"));
2105
2106 if (unlikely(!object)) {
2107 c->page = NULL;
2108 stat(s, DEACTIVATE_BYPASS);
2109 goto new_slab;
2110 }
1885 2111
1886 stat(s, ALLOC_REFILL); 2112 stat(s, ALLOC_REFILL);
1887 2113
1888load_freelist: 2114load_freelist:
1889 object = page->freelist; 2115 VM_BUG_ON(!page->frozen);
1890 if (unlikely(!object))
1891 goto another_slab;
1892 if (kmem_cache_debug(s))
1893 goto debug;
1894
1895 c->freelist = get_freepointer(s, object); 2116 c->freelist = get_freepointer(s, object);
1896 page->inuse = page->objects;
1897 page->freelist = NULL;
1898
1899 slab_unlock(page);
1900 c->tid = next_tid(c->tid); 2117 c->tid = next_tid(c->tid);
1901 local_irq_restore(flags); 2118 local_irq_restore(flags);
1902 stat(s, ALLOC_SLOWPATH);
1903 return object; 2119 return object;
1904 2120
1905another_slab:
1906 deactivate_slab(s, c);
1907
1908new_slab: 2121new_slab:
1909 page = get_partial(s, gfpflags, node); 2122 page = get_partial(s, gfpflags, node);
1910 if (page) { 2123 if (page) {
1911 stat(s, ALLOC_FROM_PARTIAL); 2124 stat(s, ALLOC_FROM_PARTIAL);
1912 c->node = page_to_nid(page); 2125 object = c->freelist;
1913 c->page = page; 2126
2127 if (kmem_cache_debug(s))
2128 goto debug;
1914 goto load_freelist; 2129 goto load_freelist;
1915 } 2130 }
1916 2131
1917 gfpflags &= gfp_allowed_mask;
1918 if (gfpflags & __GFP_WAIT)
1919 local_irq_enable();
1920
1921 page = new_slab(s, gfpflags, node); 2132 page = new_slab(s, gfpflags, node);
1922 2133
1923 if (gfpflags & __GFP_WAIT)
1924 local_irq_disable();
1925
1926 if (page) { 2134 if (page) {
1927 c = __this_cpu_ptr(s->cpu_slab); 2135 c = __this_cpu_ptr(s->cpu_slab);
1928 stat(s, ALLOC_SLAB);
1929 if (c->page) 2136 if (c->page)
1930 flush_slab(s, c); 2137 flush_slab(s, c);
1931 2138
1932 slab_lock(page); 2139 /*
1933 __SetPageSlubFrozen(page); 2140 * No other reference to the page yet so we can
2141 * muck around with it freely without cmpxchg
2142 */
2143 object = page->freelist;
2144 page->freelist = NULL;
2145 page->inuse = page->objects;
2146
2147 stat(s, ALLOC_SLAB);
1934 c->node = page_to_nid(page); 2148 c->node = page_to_nid(page);
1935 c->page = page; 2149 c->page = page;
2150
2151 if (kmem_cache_debug(s))
2152 goto debug;
1936 goto load_freelist; 2153 goto load_freelist;
1937 } 2154 }
1938 if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit()) 2155 if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
1939 slab_out_of_memory(s, gfpflags, node); 2156 slab_out_of_memory(s, gfpflags, node);
1940 local_irq_restore(flags); 2157 local_irq_restore(flags);
1941 return NULL; 2158 return NULL;
2159
1942debug: 2160debug:
1943 if (!alloc_debug_processing(s, page, object, addr)) 2161 if (!object || !alloc_debug_processing(s, page, object, addr))
1944 goto another_slab; 2162 goto new_slab;
1945 2163
1946 page->inuse++; 2164 c->freelist = get_freepointer(s, object);
1947 page->freelist = get_freepointer(s, object);
1948 deactivate_slab(s, c); 2165 deactivate_slab(s, c);
1949 c->page = NULL; 2166 c->page = NULL;
1950 c->node = NUMA_NO_NODE; 2167 c->node = NUMA_NO_NODE;
@@ -2096,40 +2313,75 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
2096{ 2313{
2097 void *prior; 2314 void *prior;
2098 void **object = (void *)x; 2315 void **object = (void *)x;
2099 unsigned long flags; 2316 int was_frozen;
2317 int inuse;
2318 struct page new;
2319 unsigned long counters;
2320 struct kmem_cache_node *n = NULL;
2321 unsigned long uninitialized_var(flags);
2100 2322
2101 local_irq_save(flags);
2102 slab_lock(page);
2103 stat(s, FREE_SLOWPATH); 2323 stat(s, FREE_SLOWPATH);
2104 2324
2105 if (kmem_cache_debug(s) && !free_debug_processing(s, page, x, addr)) 2325 if (kmem_cache_debug(s) && !free_debug_processing(s, page, x, addr))
2106 goto out_unlock; 2326 return;
2107 2327
2108 prior = page->freelist; 2328 do {
2109 set_freepointer(s, object, prior); 2329 prior = page->freelist;
2110 page->freelist = object; 2330 counters = page->counters;
2111 page->inuse--; 2331 set_freepointer(s, object, prior);
2332 new.counters = counters;
2333 was_frozen = new.frozen;
2334 new.inuse--;
2335 if ((!new.inuse || !prior) && !was_frozen && !n) {
2336 n = get_node(s, page_to_nid(page));
2337 /*
2338 * Speculatively acquire the list_lock.
2339 * If the cmpxchg does not succeed then we may
2340 * drop the list_lock without any processing.
2341 *
2342 * Otherwise the list_lock will synchronize with
2343 * other processors updating the list of slabs.
2344 */
2345 spin_lock_irqsave(&n->list_lock, flags);
2346 }
2347 inuse = new.inuse;
2112 2348
2113 if (unlikely(PageSlubFrozen(page))) { 2349 } while (!cmpxchg_double_slab(s, page,
2114 stat(s, FREE_FROZEN); 2350 prior, counters,
2115 goto out_unlock; 2351 object, new.counters,
2116 } 2352 "__slab_free"));
2117 2353
2118 if (unlikely(!page->inuse)) 2354 if (likely(!n)) {
2119 goto slab_empty; 2355 /*
2356 * The list lock was not taken therefore no list
2357 * activity can be necessary.
2358 */
2359 if (was_frozen)
2360 stat(s, FREE_FROZEN);
2361 return;
2362 }
2120 2363
2121 /* 2364 /*
2122 * Objects left in the slab. If it was not on the partial list before 2365 * was_frozen may have been set after we acquired the list_lock in
2123 * then add it. 2366 * an earlier loop. So we need to check it here again.
2124 */ 2367 */
2125 if (unlikely(!prior)) { 2368 if (was_frozen)
2126 add_partial(get_node(s, page_to_nid(page)), page, 1); 2369 stat(s, FREE_FROZEN);
2127 stat(s, FREE_ADD_PARTIAL); 2370 else {
2128 } 2371 if (unlikely(!inuse && n->nr_partial > s->min_partial))
2372 goto slab_empty;
2129 2373
2130out_unlock: 2374 /*
2131 slab_unlock(page); 2375 * Objects left in the slab. If it was not on the partial list before
2132 local_irq_restore(flags); 2376 * then add it.
2377 */
2378 if (unlikely(!prior)) {
2379 remove_full(s, page);
2380 add_partial(n, page, 0);
2381 stat(s, FREE_ADD_PARTIAL);
2382 }
2383 }
2384 spin_unlock_irqrestore(&n->list_lock, flags);
2133 return; 2385 return;
2134 2386
2135slab_empty: 2387slab_empty:
@@ -2137,11 +2389,11 @@ slab_empty:
2137 /* 2389 /*
2138 * Slab still on the partial list. 2390 * Slab still on the partial list.
2139 */ 2391 */
2140 remove_partial(s, page); 2392 remove_partial(n, page);
2141 stat(s, FREE_REMOVE_PARTIAL); 2393 stat(s, FREE_REMOVE_PARTIAL);
2142 } 2394 }
2143 slab_unlock(page); 2395
2144 local_irq_restore(flags); 2396 spin_unlock_irqrestore(&n->list_lock, flags);
2145 stat(s, FREE_SLAB); 2397 stat(s, FREE_SLAB);
2146 discard_slab(s, page); 2398 discard_slab(s, page);
2147} 2399}
@@ -2415,7 +2667,6 @@ static void early_kmem_cache_node_alloc(int node)
2415{ 2667{
2416 struct page *page; 2668 struct page *page;
2417 struct kmem_cache_node *n; 2669 struct kmem_cache_node *n;
2418 unsigned long flags;
2419 2670
2420 BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node)); 2671 BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node));
2421 2672
@@ -2433,6 +2684,7 @@ static void early_kmem_cache_node_alloc(int node)
2433 BUG_ON(!n); 2684 BUG_ON(!n);
2434 page->freelist = get_freepointer(kmem_cache_node, n); 2685 page->freelist = get_freepointer(kmem_cache_node, n);
2435 page->inuse++; 2686 page->inuse++;
2687 page->frozen = 0;
2436 kmem_cache_node->node[node] = n; 2688 kmem_cache_node->node[node] = n;
2437#ifdef CONFIG_SLUB_DEBUG 2689#ifdef CONFIG_SLUB_DEBUG
2438 init_object(kmem_cache_node, n, SLUB_RED_ACTIVE); 2690 init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);
@@ -2441,14 +2693,7 @@ static void early_kmem_cache_node_alloc(int node)
2441 init_kmem_cache_node(n, kmem_cache_node); 2693 init_kmem_cache_node(n, kmem_cache_node);
2442 inc_slabs_node(kmem_cache_node, node, page->objects); 2694 inc_slabs_node(kmem_cache_node, node, page->objects);
2443 2695
2444 /*
2445 * lockdep requires consistent irq usage for each lock
2446 * so even though there cannot be a race this early in
2447 * the boot sequence, we still disable irqs.
2448 */
2449 local_irq_save(flags);
2450 add_partial(n, page, 0); 2696 add_partial(n, page, 0);
2451 local_irq_restore(flags);
2452} 2697}
2453 2698
2454static void free_kmem_cache_nodes(struct kmem_cache *s) 2699static void free_kmem_cache_nodes(struct kmem_cache *s)
@@ -2654,6 +2899,12 @@ static int kmem_cache_open(struct kmem_cache *s,
2654 } 2899 }
2655 } 2900 }
2656 2901
2902#ifdef CONFIG_CMPXCHG_DOUBLE
2903 if (system_has_cmpxchg_double() && (s->flags & SLAB_DEBUG_FLAGS) == 0)
2904 /* Enable fast mode */
2905 s->flags |= __CMPXCHG_DOUBLE;
2906#endif
2907
2657 /* 2908 /*
2658 * The larger the object size is, the more pages we want on the partial 2909 * The larger the object size is, the more pages we want on the partial
2659 * list to avoid pounding the page allocator excessively. 2910 * list to avoid pounding the page allocator excessively.
@@ -2726,7 +2977,7 @@ static void free_partial(struct kmem_cache *s, struct kmem_cache_node *n)
2726 spin_lock_irqsave(&n->list_lock, flags); 2977 spin_lock_irqsave(&n->list_lock, flags);
2727 list_for_each_entry_safe(page, h, &n->partial, lru) { 2978 list_for_each_entry_safe(page, h, &n->partial, lru) {
2728 if (!page->inuse) { 2979 if (!page->inuse) {
2729 __remove_partial(n, page); 2980 remove_partial(n, page);
2730 discard_slab(s, page); 2981 discard_slab(s, page);
2731 } else { 2982 } else {
2732 list_slab_objects(s, page, 2983 list_slab_objects(s, page,
@@ -3094,14 +3345,8 @@ int kmem_cache_shrink(struct kmem_cache *s)
3094 * list_lock. page->inuse here is the upper limit. 3345 * list_lock. page->inuse here is the upper limit.
3095 */ 3346 */
3096 list_for_each_entry_safe(page, t, &n->partial, lru) { 3347 list_for_each_entry_safe(page, t, &n->partial, lru) {
3097 if (!page->inuse && slab_trylock(page)) { 3348 if (!page->inuse) {
3098 /* 3349 remove_partial(n, page);
3099 * Must hold slab lock here because slab_free
3100 * may have freed the last object and be
3101 * waiting to release the slab.
3102 */
3103 __remove_partial(n, page);
3104 slab_unlock(page);
3105 discard_slab(s, page); 3350 discard_slab(s, page);
3106 } else { 3351 } else {
3107 list_move(&page->lru, 3352 list_move(&page->lru,
@@ -3689,12 +3934,9 @@ static int validate_slab(struct kmem_cache *s, struct page *page,
3689static void validate_slab_slab(struct kmem_cache *s, struct page *page, 3934static void validate_slab_slab(struct kmem_cache *s, struct page *page,
3690 unsigned long *map) 3935 unsigned long *map)
3691{ 3936{
3692 if (slab_trylock(page)) { 3937 slab_lock(page);
3693 validate_slab(s, page, map); 3938 validate_slab(s, page, map);
3694 slab_unlock(page); 3939 slab_unlock(page);
3695 } else
3696 printk(KERN_INFO "SLUB %s: Skipped busy slab 0x%p\n",
3697 s->name, page);
3698} 3940}
3699 3941
3700static int validate_slab_node(struct kmem_cache *s, 3942static int validate_slab_node(struct kmem_cache *s,
@@ -4342,8 +4584,10 @@ static ssize_t sanity_checks_store(struct kmem_cache *s,
4342 const char *buf, size_t length) 4584 const char *buf, size_t length)
4343{ 4585{
4344 s->flags &= ~SLAB_DEBUG_FREE; 4586 s->flags &= ~SLAB_DEBUG_FREE;
4345 if (buf[0] == '1') 4587 if (buf[0] == '1') {
4588 s->flags &= ~__CMPXCHG_DOUBLE;
4346 s->flags |= SLAB_DEBUG_FREE; 4589 s->flags |= SLAB_DEBUG_FREE;
4590 }
4347 return length; 4591 return length;
4348} 4592}
4349SLAB_ATTR(sanity_checks); 4593SLAB_ATTR(sanity_checks);
@@ -4357,8 +4601,10 @@ static ssize_t trace_store(struct kmem_cache *s, const char *buf,
4357 size_t length) 4601 size_t length)
4358{ 4602{
4359 s->flags &= ~SLAB_TRACE; 4603 s->flags &= ~SLAB_TRACE;
4360 if (buf[0] == '1') 4604 if (buf[0] == '1') {
4605 s->flags &= ~__CMPXCHG_DOUBLE;
4361 s->flags |= SLAB_TRACE; 4606 s->flags |= SLAB_TRACE;
4607 }
4362 return length; 4608 return length;
4363} 4609}
4364SLAB_ATTR(trace); 4610SLAB_ATTR(trace);
@@ -4375,8 +4621,10 @@ static ssize_t red_zone_store(struct kmem_cache *s,
4375 return -EBUSY; 4621 return -EBUSY;
4376 4622
4377 s->flags &= ~SLAB_RED_ZONE; 4623 s->flags &= ~SLAB_RED_ZONE;
4378 if (buf[0] == '1') 4624 if (buf[0] == '1') {
4625 s->flags &= ~__CMPXCHG_DOUBLE;
4379 s->flags |= SLAB_RED_ZONE; 4626 s->flags |= SLAB_RED_ZONE;
4627 }
4380 calculate_sizes(s, -1); 4628 calculate_sizes(s, -1);
4381 return length; 4629 return length;
4382} 4630}
@@ -4394,8 +4642,10 @@ static ssize_t poison_store(struct kmem_cache *s,
4394 return -EBUSY; 4642 return -EBUSY;
4395 4643
4396 s->flags &= ~SLAB_POISON; 4644 s->flags &= ~SLAB_POISON;
4397 if (buf[0] == '1') 4645 if (buf[0] == '1') {
4646 s->flags &= ~__CMPXCHG_DOUBLE;
4398 s->flags |= SLAB_POISON; 4647 s->flags |= SLAB_POISON;
4648 }
4399 calculate_sizes(s, -1); 4649 calculate_sizes(s, -1);
4400 return length; 4650 return length;
4401} 4651}
@@ -4413,8 +4663,10 @@ static ssize_t store_user_store(struct kmem_cache *s,
4413 return -EBUSY; 4663 return -EBUSY;
4414 4664
4415 s->flags &= ~SLAB_STORE_USER; 4665 s->flags &= ~SLAB_STORE_USER;
4416 if (buf[0] == '1') 4666 if (buf[0] == '1') {
4667 s->flags &= ~__CMPXCHG_DOUBLE;
4417 s->flags |= SLAB_STORE_USER; 4668 s->flags |= SLAB_STORE_USER;
4669 }
4418 calculate_sizes(s, -1); 4670 calculate_sizes(s, -1);
4419 return length; 4671 return length;
4420} 4672}
@@ -4579,6 +4831,7 @@ STAT_ATTR(FREE_REMOVE_PARTIAL, free_remove_partial);
4579STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial); 4831STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial);
4580STAT_ATTR(ALLOC_SLAB, alloc_slab); 4832STAT_ATTR(ALLOC_SLAB, alloc_slab);
4581STAT_ATTR(ALLOC_REFILL, alloc_refill); 4833STAT_ATTR(ALLOC_REFILL, alloc_refill);
4834STAT_ATTR(ALLOC_NODE_MISMATCH, alloc_node_mismatch);
4582STAT_ATTR(FREE_SLAB, free_slab); 4835STAT_ATTR(FREE_SLAB, free_slab);
4583STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush); 4836STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush);
4584STAT_ATTR(DEACTIVATE_FULL, deactivate_full); 4837STAT_ATTR(DEACTIVATE_FULL, deactivate_full);
@@ -4586,7 +4839,10 @@ STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty);
4586STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head); 4839STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head);
4587STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail); 4840STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail);
4588STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees); 4841STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees);
4842STAT_ATTR(DEACTIVATE_BYPASS, deactivate_bypass);
4589STAT_ATTR(ORDER_FALLBACK, order_fallback); 4843STAT_ATTR(ORDER_FALLBACK, order_fallback);
4844STAT_ATTR(CMPXCHG_DOUBLE_CPU_FAIL, cmpxchg_double_cpu_fail);
4845STAT_ATTR(CMPXCHG_DOUBLE_FAIL, cmpxchg_double_fail);
4590#endif 4846#endif
4591 4847
4592static struct attribute *slab_attrs[] = { 4848static struct attribute *slab_attrs[] = {
@@ -4636,6 +4892,7 @@ static struct attribute *slab_attrs[] = {
4636 &alloc_from_partial_attr.attr, 4892 &alloc_from_partial_attr.attr,
4637 &alloc_slab_attr.attr, 4893 &alloc_slab_attr.attr,
4638 &alloc_refill_attr.attr, 4894 &alloc_refill_attr.attr,
4895 &alloc_node_mismatch_attr.attr,
4639 &free_slab_attr.attr, 4896 &free_slab_attr.attr,
4640 &cpuslab_flush_attr.attr, 4897 &cpuslab_flush_attr.attr,
4641 &deactivate_full_attr.attr, 4898 &deactivate_full_attr.attr,
@@ -4643,7 +4900,10 @@ static struct attribute *slab_attrs[] = {
4643 &deactivate_to_head_attr.attr, 4900 &deactivate_to_head_attr.attr,
4644 &deactivate_to_tail_attr.attr, 4901 &deactivate_to_tail_attr.attr,
4645 &deactivate_remote_frees_attr.attr, 4902 &deactivate_remote_frees_attr.attr,
4903 &deactivate_bypass_attr.attr,
4646 &order_fallback_attr.attr, 4904 &order_fallback_attr.attr,
4905 &cmpxchg_double_fail_attr.attr,
4906 &cmpxchg_double_cpu_fail_attr.attr,
4647#endif 4907#endif
4648#ifdef CONFIG_FAILSLAB 4908#ifdef CONFIG_FAILSLAB
4649 &failslab_attr.attr, 4909 &failslab_attr.attr,