aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/slub.c326
1 files changed, 275 insertions, 51 deletions
diff --git a/mm/slub.c b/mm/slub.c
index 3f056677fa8f..e2989ae243b5 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -149,6 +149,13 @@ static inline void ClearSlabDebug(struct page *page)
149/* Enable to test recovery from slab corruption on boot */ 149/* Enable to test recovery from slab corruption on boot */
150#undef SLUB_RESILIENCY_TEST 150#undef SLUB_RESILIENCY_TEST
151 151
152/*
153 * Currently fastpath is not supported if preemption is enabled.
154 */
155#if defined(CONFIG_FAST_CMPXCHG_LOCAL) && !defined(CONFIG_PREEMPT)
156#define SLUB_FASTPATH
157#endif
158
152#if PAGE_SHIFT <= 12 159#if PAGE_SHIFT <= 12
153 160
154/* 161/*
@@ -243,6 +250,7 @@ enum track_item { TRACK_ALLOC, TRACK_FREE };
243static int sysfs_slab_add(struct kmem_cache *); 250static int sysfs_slab_add(struct kmem_cache *);
244static int sysfs_slab_alias(struct kmem_cache *, const char *); 251static int sysfs_slab_alias(struct kmem_cache *, const char *);
245static void sysfs_slab_remove(struct kmem_cache *); 252static void sysfs_slab_remove(struct kmem_cache *);
253
246#else 254#else
247static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; } 255static inline int sysfs_slab_add(struct kmem_cache *s) { return 0; }
248static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p) 256static inline int sysfs_slab_alias(struct kmem_cache *s, const char *p)
@@ -251,8 +259,16 @@ static inline void sysfs_slab_remove(struct kmem_cache *s)
251{ 259{
252 kfree(s); 260 kfree(s);
253} 261}
262
254#endif 263#endif
255 264
265static inline void stat(struct kmem_cache_cpu *c, enum stat_item si)
266{
267#ifdef CONFIG_SLUB_STATS
268 c->stat[si]++;
269#endif
270}
271
256/******************************************************************** 272/********************************************************************
257 * Core slab cache functions 273 * Core slab cache functions
258 *******************************************************************/ 274 *******************************************************************/
@@ -280,15 +296,32 @@ static inline struct kmem_cache_cpu *get_cpu_slab(struct kmem_cache *s, int cpu)
280#endif 296#endif
281} 297}
282 298
299/*
300 * The end pointer in a slab is special. It points to the first object in the
301 * slab but has bit 0 set to mark it.
302 *
303 * Note that SLUB relies on page_mapping returning NULL for pages with bit 0
304 * in the mapping set.
305 */
306static inline int is_end(void *addr)
307{
308 return (unsigned long)addr & PAGE_MAPPING_ANON;
309}
310
311void *slab_address(struct page *page)
312{
313 return page->end - PAGE_MAPPING_ANON;
314}
315
283static inline int check_valid_pointer(struct kmem_cache *s, 316static inline int check_valid_pointer(struct kmem_cache *s,
284 struct page *page, const void *object) 317 struct page *page, const void *object)
285{ 318{
286 void *base; 319 void *base;
287 320
288 if (!object) 321 if (object == page->end)
289 return 1; 322 return 1;
290 323
291 base = page_address(page); 324 base = slab_address(page);
292 if (object < base || object >= base + s->objects * s->size || 325 if (object < base || object >= base + s->objects * s->size ||
293 (object - base) % s->size) { 326 (object - base) % s->size) {
294 return 0; 327 return 0;
@@ -321,7 +354,8 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
321 354
322/* Scan freelist */ 355/* Scan freelist */
323#define for_each_free_object(__p, __s, __free) \ 356#define for_each_free_object(__p, __s, __free) \
324 for (__p = (__free); __p; __p = get_freepointer((__s), __p)) 357 for (__p = (__free); (__p) != page->end; __p = get_freepointer((__s),\
358 __p))
325 359
326/* Determine object index from a given position */ 360/* Determine object index from a given position */
327static inline int slab_index(void *p, struct kmem_cache *s, void *addr) 361static inline int slab_index(void *p, struct kmem_cache *s, void *addr)
@@ -473,7 +507,7 @@ static void slab_fix(struct kmem_cache *s, char *fmt, ...)
473static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) 507static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
474{ 508{
475 unsigned int off; /* Offset of last byte */ 509 unsigned int off; /* Offset of last byte */
476 u8 *addr = page_address(page); 510 u8 *addr = slab_address(page);
477 511
478 print_tracking(s, p); 512 print_tracking(s, p);
479 513
@@ -651,7 +685,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page)
651 if (!(s->flags & SLAB_POISON)) 685 if (!(s->flags & SLAB_POISON))
652 return 1; 686 return 1;
653 687
654 start = page_address(page); 688 start = slab_address(page);
655 end = start + (PAGE_SIZE << s->order); 689 end = start + (PAGE_SIZE << s->order);
656 length = s->objects * s->size; 690 length = s->objects * s->size;
657 remainder = end - (start + length); 691 remainder = end - (start + length);
@@ -685,9 +719,10 @@ static int check_object(struct kmem_cache *s, struct page *page,
685 endobject, red, s->inuse - s->objsize)) 719 endobject, red, s->inuse - s->objsize))
686 return 0; 720 return 0;
687 } else { 721 } else {
688 if ((s->flags & SLAB_POISON) && s->objsize < s->inuse) 722 if ((s->flags & SLAB_POISON) && s->objsize < s->inuse) {
689 check_bytes_and_report(s, page, p, "Alignment padding", endobject, 723 check_bytes_and_report(s, page, p, "Alignment padding",
690 POISON_INUSE, s->inuse - s->objsize); 724 endobject, POISON_INUSE, s->inuse - s->objsize);
725 }
691 } 726 }
692 727
693 if (s->flags & SLAB_POISON) { 728 if (s->flags & SLAB_POISON) {
@@ -718,7 +753,7 @@ static int check_object(struct kmem_cache *s, struct page *page,
718 * of the free objects in this slab. May cause 753 * of the free objects in this slab. May cause
719 * another error because the object count is now wrong. 754 * another error because the object count is now wrong.
720 */ 755 */
721 set_freepointer(s, p, NULL); 756 set_freepointer(s, p, page->end);
722 return 0; 757 return 0;
723 } 758 }
724 return 1; 759 return 1;
@@ -752,18 +787,18 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
752 void *fp = page->freelist; 787 void *fp = page->freelist;
753 void *object = NULL; 788 void *object = NULL;
754 789
755 while (fp && nr <= s->objects) { 790 while (fp != page->end && nr <= s->objects) {
756 if (fp == search) 791 if (fp == search)
757 return 1; 792 return 1;
758 if (!check_valid_pointer(s, page, fp)) { 793 if (!check_valid_pointer(s, page, fp)) {
759 if (object) { 794 if (object) {
760 object_err(s, page, object, 795 object_err(s, page, object,
761 "Freechain corrupt"); 796 "Freechain corrupt");
762 set_freepointer(s, object, NULL); 797 set_freepointer(s, object, page->end);
763 break; 798 break;
764 } else { 799 } else {
765 slab_err(s, page, "Freepointer corrupt"); 800 slab_err(s, page, "Freepointer corrupt");
766 page->freelist = NULL; 801 page->freelist = page->end;
767 page->inuse = s->objects; 802 page->inuse = s->objects;
768 slab_fix(s, "Freelist cleared"); 803 slab_fix(s, "Freelist cleared");
769 return 0; 804 return 0;
@@ -869,7 +904,7 @@ bad:
869 */ 904 */
870 slab_fix(s, "Marking all objects used"); 905 slab_fix(s, "Marking all objects used");
871 page->inuse = s->objects; 906 page->inuse = s->objects;
872 page->freelist = NULL; 907 page->freelist = page->end;
873 } 908 }
874 return 0; 909 return 0;
875} 910}
@@ -894,11 +929,10 @@ static int free_debug_processing(struct kmem_cache *s, struct page *page,
894 return 0; 929 return 0;
895 930
896 if (unlikely(s != page->slab)) { 931 if (unlikely(s != page->slab)) {
897 if (!PageSlab(page)) 932 if (!PageSlab(page)) {
898 slab_err(s, page, "Attempt to free object(0x%p) " 933 slab_err(s, page, "Attempt to free object(0x%p) "
899 "outside of slab", object); 934 "outside of slab", object);
900 else 935 } else if (!page->slab) {
901 if (!page->slab) {
902 printk(KERN_ERR 936 printk(KERN_ERR
903 "SLUB <none>: no slab for object 0x%p.\n", 937 "SLUB <none>: no slab for object 0x%p.\n",
904 object); 938 object);
@@ -910,7 +944,7 @@ static int free_debug_processing(struct kmem_cache *s, struct page *page,
910 } 944 }
911 945
912 /* Special debug activities for freeing objects */ 946 /* Special debug activities for freeing objects */
913 if (!SlabFrozen(page) && !page->freelist) 947 if (!SlabFrozen(page) && page->freelist == page->end)
914 remove_full(s, page); 948 remove_full(s, page);
915 if (s->flags & SLAB_STORE_USER) 949 if (s->flags & SLAB_STORE_USER)
916 set_track(s, object, TRACK_FREE, addr); 950 set_track(s, object, TRACK_FREE, addr);
@@ -1007,7 +1041,7 @@ static unsigned long kmem_cache_flags(unsigned long objsize,
1007 */ 1041 */
1008 if (slub_debug && (!slub_debug_slabs || 1042 if (slub_debug && (!slub_debug_slabs ||
1009 strncmp(slub_debug_slabs, name, 1043 strncmp(slub_debug_slabs, name,
1010 strlen(slub_debug_slabs)) == 0)) 1044 strlen(slub_debug_slabs)) == 0))
1011 flags |= slub_debug; 1045 flags |= slub_debug;
1012 } 1046 }
1013 1047
@@ -1102,6 +1136,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
1102 SetSlabDebug(page); 1136 SetSlabDebug(page);
1103 1137
1104 start = page_address(page); 1138 start = page_address(page);
1139 page->end = start + 1;
1105 1140
1106 if (unlikely(s->flags & SLAB_POISON)) 1141 if (unlikely(s->flags & SLAB_POISON))
1107 memset(start, POISON_INUSE, PAGE_SIZE << s->order); 1142 memset(start, POISON_INUSE, PAGE_SIZE << s->order);
@@ -1113,7 +1148,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
1113 last = p; 1148 last = p;
1114 } 1149 }
1115 setup_object(s, page, last); 1150 setup_object(s, page, last);
1116 set_freepointer(s, last, NULL); 1151 set_freepointer(s, last, page->end);
1117 1152
1118 page->freelist = start; 1153 page->freelist = start;
1119 page->inuse = 0; 1154 page->inuse = 0;
@@ -1129,7 +1164,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
1129 void *p; 1164 void *p;
1130 1165
1131 slab_pad_check(s, page); 1166 slab_pad_check(s, page);
1132 for_each_object(p, s, page_address(page)) 1167 for_each_object(p, s, slab_address(page))
1133 check_object(s, page, p, 0); 1168 check_object(s, page, p, 0);
1134 ClearSlabDebug(page); 1169 ClearSlabDebug(page);
1135 } 1170 }
@@ -1139,6 +1174,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
1139 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, 1174 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1140 -pages); 1175 -pages);
1141 1176
1177 page->mapping = NULL;
1142 __free_pages(page, s->order); 1178 __free_pages(page, s->order);
1143} 1179}
1144 1180
@@ -1183,7 +1219,7 @@ static __always_inline void slab_lock(struct page *page)
1183 1219
1184static __always_inline void slab_unlock(struct page *page) 1220static __always_inline void slab_unlock(struct page *page)
1185{ 1221{
1186 bit_spin_unlock(PG_locked, &page->flags); 1222 __bit_spin_unlock(PG_locked, &page->flags);
1187} 1223}
1188 1224
1189static __always_inline int slab_trylock(struct page *page) 1225static __always_inline int slab_trylock(struct page *page)
@@ -1294,8 +1330,8 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags)
1294 get_cycles() % 1024 > s->remote_node_defrag_ratio) 1330 get_cycles() % 1024 > s->remote_node_defrag_ratio)
1295 return NULL; 1331 return NULL;
1296 1332
1297 zonelist = &NODE_DATA(slab_node(current->mempolicy)) 1333 zonelist = &NODE_DATA(
1298 ->node_zonelists[gfp_zone(flags)]; 1334 slab_node(current->mempolicy))->node_zonelists[gfp_zone(flags)];
1299 for (z = zonelist->zones; *z; z++) { 1335 for (z = zonelist->zones; *z; z++) {
1300 struct kmem_cache_node *n; 1336 struct kmem_cache_node *n;
1301 1337
@@ -1337,17 +1373,22 @@ static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node)
1337static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail) 1373static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
1338{ 1374{
1339 struct kmem_cache_node *n = get_node(s, page_to_nid(page)); 1375 struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1376 struct kmem_cache_cpu *c = get_cpu_slab(s, smp_processor_id());
1340 1377
1341 ClearSlabFrozen(page); 1378 ClearSlabFrozen(page);
1342 if (page->inuse) { 1379 if (page->inuse) {
1343 1380
1344 if (page->freelist) 1381 if (page->freelist != page->end) {
1345 add_partial(n, page, tail); 1382 add_partial(n, page, tail);
1346 else if (SlabDebug(page) && (s->flags & SLAB_STORE_USER)) 1383 stat(c, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD);
1347 add_full(n, page); 1384 } else {
1385 stat(c, DEACTIVATE_FULL);
1386 if (SlabDebug(page) && (s->flags & SLAB_STORE_USER))
1387 add_full(n, page);
1388 }
1348 slab_unlock(page); 1389 slab_unlock(page);
1349
1350 } else { 1390 } else {
1391 stat(c, DEACTIVATE_EMPTY);
1351 if (n->nr_partial < MIN_PARTIAL) { 1392 if (n->nr_partial < MIN_PARTIAL) {
1352 /* 1393 /*
1353 * Adding an empty slab to the partial slabs in order 1394 * Adding an empty slab to the partial slabs in order
@@ -1361,6 +1402,7 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
1361 slab_unlock(page); 1402 slab_unlock(page);
1362 } else { 1403 } else {
1363 slab_unlock(page); 1404 slab_unlock(page);
1405 stat(get_cpu_slab(s, raw_smp_processor_id()), FREE_SLAB);
1364 discard_slab(s, page); 1406 discard_slab(s, page);
1365 } 1407 }
1366 } 1408 }
@@ -1373,12 +1415,19 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1373{ 1415{
1374 struct page *page = c->page; 1416 struct page *page = c->page;
1375 int tail = 1; 1417 int tail = 1;
1418
1419 if (c->freelist)
1420 stat(c, DEACTIVATE_REMOTE_FREES);
1376 /* 1421 /*
1377 * Merge cpu freelist into freelist. Typically we get here 1422 * Merge cpu freelist into freelist. Typically we get here
1378 * because both freelists are empty. So this is unlikely 1423 * because both freelists are empty. So this is unlikely
1379 * to occur. 1424 * to occur.
1425 *
1426 * We need to use _is_end here because deactivate slab may
1427 * be called for a debug slab. Then c->freelist may contain
1428 * a dummy pointer.
1380 */ 1429 */
1381 while (unlikely(c->freelist)) { 1430 while (unlikely(!is_end(c->freelist))) {
1382 void **object; 1431 void **object;
1383 1432
1384 tail = 0; /* Hot objects. Put the slab first */ 1433 tail = 0; /* Hot objects. Put the slab first */
@@ -1398,6 +1447,7 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1398 1447
1399static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) 1448static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1400{ 1449{
1450 stat(c, CPUSLAB_FLUSH);
1401 slab_lock(c->page); 1451 slab_lock(c->page);
1402 deactivate_slab(s, c); 1452 deactivate_slab(s, c);
1403} 1453}
@@ -1469,16 +1519,21 @@ static void *__slab_alloc(struct kmem_cache *s,
1469{ 1519{
1470 void **object; 1520 void **object;
1471 struct page *new; 1521 struct page *new;
1522#ifdef SLUB_FASTPATH
1523 unsigned long flags;
1472 1524
1525 local_irq_save(flags);
1526#endif
1473 if (!c->page) 1527 if (!c->page)
1474 goto new_slab; 1528 goto new_slab;
1475 1529
1476 slab_lock(c->page); 1530 slab_lock(c->page);
1477 if (unlikely(!node_match(c, node))) 1531 if (unlikely(!node_match(c, node)))
1478 goto another_slab; 1532 goto another_slab;
1533 stat(c, ALLOC_REFILL);
1479load_freelist: 1534load_freelist:
1480 object = c->page->freelist; 1535 object = c->page->freelist;
1481 if (unlikely(!object)) 1536 if (unlikely(object == c->page->end))
1482 goto another_slab; 1537 goto another_slab;
1483 if (unlikely(SlabDebug(c->page))) 1538 if (unlikely(SlabDebug(c->page)))
1484 goto debug; 1539 goto debug;
@@ -1486,9 +1541,15 @@ load_freelist:
1486 object = c->page->freelist; 1541 object = c->page->freelist;
1487 c->freelist = object[c->offset]; 1542 c->freelist = object[c->offset];
1488 c->page->inuse = s->objects; 1543 c->page->inuse = s->objects;
1489 c->page->freelist = NULL; 1544 c->page->freelist = c->page->end;
1490 c->node = page_to_nid(c->page); 1545 c->node = page_to_nid(c->page);
1546unlock_out:
1491 slab_unlock(c->page); 1547 slab_unlock(c->page);
1548 stat(c, ALLOC_SLOWPATH);
1549out:
1550#ifdef SLUB_FASTPATH
1551 local_irq_restore(flags);
1552#endif
1492 return object; 1553 return object;
1493 1554
1494another_slab: 1555another_slab:
@@ -1498,6 +1559,7 @@ new_slab:
1498 new = get_partial(s, gfpflags, node); 1559 new = get_partial(s, gfpflags, node);
1499 if (new) { 1560 if (new) {
1500 c->page = new; 1561 c->page = new;
1562 stat(c, ALLOC_FROM_PARTIAL);
1501 goto load_freelist; 1563 goto load_freelist;
1502 } 1564 }
1503 1565
@@ -1511,6 +1573,7 @@ new_slab:
1511 1573
1512 if (new) { 1574 if (new) {
1513 c = get_cpu_slab(s, smp_processor_id()); 1575 c = get_cpu_slab(s, smp_processor_id());
1576 stat(c, ALLOC_SLAB);
1514 if (c->page) 1577 if (c->page)
1515 flush_slab(s, c); 1578 flush_slab(s, c);
1516 slab_lock(new); 1579 slab_lock(new);
@@ -1518,7 +1581,8 @@ new_slab:
1518 c->page = new; 1581 c->page = new;
1519 goto load_freelist; 1582 goto load_freelist;
1520 } 1583 }
1521 return NULL; 1584 object = NULL;
1585 goto out;
1522debug: 1586debug:
1523 object = c->page->freelist; 1587 object = c->page->freelist;
1524 if (!alloc_debug_processing(s, c->page, object, addr)) 1588 if (!alloc_debug_processing(s, c->page, object, addr))
@@ -1527,8 +1591,7 @@ debug:
1527 c->page->inuse++; 1591 c->page->inuse++;
1528 c->page->freelist = object[c->offset]; 1592 c->page->freelist = object[c->offset];
1529 c->node = -1; 1593 c->node = -1;
1530 slab_unlock(c->page); 1594 goto unlock_out;
1531 return object;
1532} 1595}
1533 1596
1534/* 1597/*
@@ -1545,20 +1608,50 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
1545 gfp_t gfpflags, int node, void *addr) 1608 gfp_t gfpflags, int node, void *addr)
1546{ 1609{
1547 void **object; 1610 void **object;
1548 unsigned long flags;
1549 struct kmem_cache_cpu *c; 1611 struct kmem_cache_cpu *c;
1550 1612
1613/*
1614 * The SLUB_FASTPATH path is provisional and is currently disabled if the
1615 * kernel is compiled with preemption or if the arch does not support
1616 * fast cmpxchg operations. There are a couple of coming changes that will
1617 * simplify matters and allow preemption. Ultimately we may end up making
1618 * SLUB_FASTPATH the default.
1619 *
1620 * 1. The introduction of the per cpu allocator will avoid array lookups
1621 * through get_cpu_slab(). A special register can be used instead.
1622 *
1623 * 2. The introduction of per cpu atomic operations (cpu_ops) means that
1624 * we can realize the logic here entirely with per cpu atomics. The
1625 * per cpu atomic ops will take care of the preemption issues.
1626 */
1627
1628#ifdef SLUB_FASTPATH
1629 c = get_cpu_slab(s, raw_smp_processor_id());
1630 do {
1631 object = c->freelist;
1632 if (unlikely(is_end(object) || !node_match(c, node))) {
1633 object = __slab_alloc(s, gfpflags, node, addr, c);
1634 break;
1635 }
1636 stat(c, ALLOC_FASTPATH);
1637 } while (cmpxchg_local(&c->freelist, object, object[c->offset])
1638 != object);
1639#else
1640 unsigned long flags;
1641
1551 local_irq_save(flags); 1642 local_irq_save(flags);
1552 c = get_cpu_slab(s, smp_processor_id()); 1643 c = get_cpu_slab(s, smp_processor_id());
1553 if (unlikely(!c->freelist || !node_match(c, node))) 1644 if (unlikely(is_end(c->freelist) || !node_match(c, node)))
1554 1645
1555 object = __slab_alloc(s, gfpflags, node, addr, c); 1646 object = __slab_alloc(s, gfpflags, node, addr, c);
1556 1647
1557 else { 1648 else {
1558 object = c->freelist; 1649 object = c->freelist;
1559 c->freelist = object[c->offset]; 1650 c->freelist = object[c->offset];
1651 stat(c, ALLOC_FASTPATH);
1560 } 1652 }
1561 local_irq_restore(flags); 1653 local_irq_restore(flags);
1654#endif
1562 1655
1563 if (unlikely((gfpflags & __GFP_ZERO) && object)) 1656 if (unlikely((gfpflags & __GFP_ZERO) && object))
1564 memset(object, 0, c->objsize); 1657 memset(object, 0, c->objsize);
@@ -1593,7 +1686,15 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
1593{ 1686{
1594 void *prior; 1687 void *prior;
1595 void **object = (void *)x; 1688 void **object = (void *)x;
1689 struct kmem_cache_cpu *c;
1690
1691#ifdef SLUB_FASTPATH
1692 unsigned long flags;
1596 1693
1694 local_irq_save(flags);
1695#endif
1696 c = get_cpu_slab(s, raw_smp_processor_id());
1697 stat(c, FREE_SLOWPATH);
1597 slab_lock(page); 1698 slab_lock(page);
1598 1699
1599 if (unlikely(SlabDebug(page))) 1700 if (unlikely(SlabDebug(page)))
@@ -1603,8 +1704,10 @@ checks_ok:
1603 page->freelist = object; 1704 page->freelist = object;
1604 page->inuse--; 1705 page->inuse--;
1605 1706
1606 if (unlikely(SlabFrozen(page))) 1707 if (unlikely(SlabFrozen(page))) {
1708 stat(c, FREE_FROZEN);
1607 goto out_unlock; 1709 goto out_unlock;
1710 }
1608 1711
1609 if (unlikely(!page->inuse)) 1712 if (unlikely(!page->inuse))
1610 goto slab_empty; 1713 goto slab_empty;
@@ -1614,21 +1717,31 @@ checks_ok:
1614 * was not on the partial list before 1717 * was not on the partial list before
1615 * then add it. 1718 * then add it.
1616 */ 1719 */
1617 if (unlikely(!prior)) 1720 if (unlikely(prior == page->end)) {
1618 add_partial(get_node(s, page_to_nid(page)), page, 1); 1721 add_partial(get_node(s, page_to_nid(page)), page, 1);
1722 stat(c, FREE_ADD_PARTIAL);
1723 }
1619 1724
1620out_unlock: 1725out_unlock:
1621 slab_unlock(page); 1726 slab_unlock(page);
1727#ifdef SLUB_FASTPATH
1728 local_irq_restore(flags);
1729#endif
1622 return; 1730 return;
1623 1731
1624slab_empty: 1732slab_empty:
1625 if (prior) 1733 if (prior != page->end) {
1626 /* 1734 /*
1627 * Slab still on the partial list. 1735 * Slab still on the partial list.
1628 */ 1736 */
1629 remove_partial(s, page); 1737 remove_partial(s, page);
1630 1738 stat(c, FREE_REMOVE_PARTIAL);
1739 }
1631 slab_unlock(page); 1740 slab_unlock(page);
1741 stat(c, FREE_SLAB);
1742#ifdef SLUB_FASTPATH
1743 local_irq_restore(flags);
1744#endif
1632 discard_slab(s, page); 1745 discard_slab(s, page);
1633 return; 1746 return;
1634 1747
@@ -1653,19 +1766,49 @@ static __always_inline void slab_free(struct kmem_cache *s,
1653 struct page *page, void *x, void *addr) 1766 struct page *page, void *x, void *addr)
1654{ 1767{
1655 void **object = (void *)x; 1768 void **object = (void *)x;
1656 unsigned long flags;
1657 struct kmem_cache_cpu *c; 1769 struct kmem_cache_cpu *c;
1658 1770
1771#ifdef SLUB_FASTPATH
1772 void **freelist;
1773
1774 c = get_cpu_slab(s, raw_smp_processor_id());
1775 debug_check_no_locks_freed(object, s->objsize);
1776 do {
1777 freelist = c->freelist;
1778 barrier();
1779 /*
1780 * If the compiler would reorder the retrieval of c->page to
1781 * come before c->freelist then an interrupt could
1782 * change the cpu slab before we retrieve c->freelist. We
1783 * could be matching on a page no longer active and put the
1784 * object onto the freelist of the wrong slab.
1785 *
1786 * On the other hand: If we already have the freelist pointer
1787 * then any change of cpu_slab will cause the cmpxchg to fail
1788 * since the freelist pointers are unique per slab.
1789 */
1790 if (unlikely(page != c->page || c->node < 0)) {
1791 __slab_free(s, page, x, addr, c->offset);
1792 break;
1793 }
1794 object[c->offset] = freelist;
1795 stat(c, FREE_FASTPATH);
1796 } while (cmpxchg_local(&c->freelist, freelist, object) != freelist);
1797#else
1798 unsigned long flags;
1799
1659 local_irq_save(flags); 1800 local_irq_save(flags);
1660 debug_check_no_locks_freed(object, s->objsize); 1801 debug_check_no_locks_freed(object, s->objsize);
1661 c = get_cpu_slab(s, smp_processor_id()); 1802 c = get_cpu_slab(s, smp_processor_id());
1662 if (likely(page == c->page && c->node >= 0)) { 1803 if (likely(page == c->page && c->node >= 0)) {
1663 object[c->offset] = c->freelist; 1804 object[c->offset] = c->freelist;
1664 c->freelist = object; 1805 c->freelist = object;
1806 stat(c, FREE_FASTPATH);
1665 } else 1807 } else
1666 __slab_free(s, page, x, addr, c->offset); 1808 __slab_free(s, page, x, addr, c->offset);
1667 1809
1668 local_irq_restore(flags); 1810 local_irq_restore(flags);
1811#endif
1669} 1812}
1670 1813
1671void kmem_cache_free(struct kmem_cache *s, void *x) 1814void kmem_cache_free(struct kmem_cache *s, void *x)
@@ -1842,7 +1985,7 @@ static void init_kmem_cache_cpu(struct kmem_cache *s,
1842 struct kmem_cache_cpu *c) 1985 struct kmem_cache_cpu *c)
1843{ 1986{
1844 c->page = NULL; 1987 c->page = NULL;
1845 c->freelist = NULL; 1988 c->freelist = (void *)PAGE_MAPPING_ANON;
1846 c->node = 0; 1989 c->node = 0;
1847 c->offset = s->offset / sizeof(void *); 1990 c->offset = s->offset / sizeof(void *);
1848 c->objsize = s->objsize; 1991 c->objsize = s->objsize;
@@ -2446,7 +2589,8 @@ static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags)
2446 goto unlock_out; 2589 goto unlock_out;
2447 2590
2448 realsize = kmalloc_caches[index].objsize; 2591 realsize = kmalloc_caches[index].objsize;
2449 text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d", (unsigned int)realsize), 2592 text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d",
2593 (unsigned int)realsize);
2450 s = kmalloc(kmem_size, flags & ~SLUB_DMA); 2594 s = kmalloc(kmem_size, flags & ~SLUB_DMA);
2451 2595
2452 if (!s || !text || !kmem_cache_open(s, flags, text, 2596 if (!s || !text || !kmem_cache_open(s, flags, text,
@@ -2601,6 +2745,7 @@ EXPORT_SYMBOL(ksize);
2601void kfree(const void *x) 2745void kfree(const void *x)
2602{ 2746{
2603 struct page *page; 2747 struct page *page;
2748 void *object = (void *)x;
2604 2749
2605 if (unlikely(ZERO_OR_NULL_PTR(x))) 2750 if (unlikely(ZERO_OR_NULL_PTR(x)))
2606 return; 2751 return;
@@ -2610,7 +2755,7 @@ void kfree(const void *x)
2610 put_page(page); 2755 put_page(page);
2611 return; 2756 return;
2612 } 2757 }
2613 slab_free(page->slab, page, (void *)x, __builtin_return_address(0)); 2758 slab_free(page->slab, page, object, __builtin_return_address(0));
2614} 2759}
2615EXPORT_SYMBOL(kfree); 2760EXPORT_SYMBOL(kfree);
2616 2761
@@ -2896,7 +3041,8 @@ void __init kmem_cache_init(void)
2896#endif 3041#endif
2897 3042
2898 3043
2899 printk(KERN_INFO "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d," 3044 printk(KERN_INFO
3045 "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d,"
2900 " CPUs=%d, Nodes=%d\n", 3046 " CPUs=%d, Nodes=%d\n",
2901 caches, cache_line_size(), 3047 caches, cache_line_size(),
2902 slub_min_order, slub_max_order, slub_min_objects, 3048 slub_min_order, slub_max_order, slub_min_objects,
@@ -3063,7 +3209,7 @@ static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb,
3063} 3209}
3064 3210
3065static struct notifier_block __cpuinitdata slab_notifier = { 3211static struct notifier_block __cpuinitdata slab_notifier = {
3066 &slab_cpuup_callback, NULL, 0 3212 .notifier_call = slab_cpuup_callback
3067}; 3213};
3068 3214
3069#endif 3215#endif
@@ -3104,7 +3250,7 @@ static int validate_slab(struct kmem_cache *s, struct page *page,
3104 unsigned long *map) 3250 unsigned long *map)
3105{ 3251{
3106 void *p; 3252 void *p;
3107 void *addr = page_address(page); 3253 void *addr = slab_address(page);
3108 3254
3109 if (!check_slab(s, page) || 3255 if (!check_slab(s, page) ||
3110 !on_freelist(s, page, NULL)) 3256 !on_freelist(s, page, NULL))
@@ -3221,8 +3367,9 @@ static void resiliency_test(void)
3221 p = kzalloc(32, GFP_KERNEL); 3367 p = kzalloc(32, GFP_KERNEL);
3222 p[32 + sizeof(void *)] = 0x34; 3368 p[32 + sizeof(void *)] = 0x34;
3223 printk(KERN_ERR "\n2. kmalloc-32: Clobber next pointer/next slab" 3369 printk(KERN_ERR "\n2. kmalloc-32: Clobber next pointer/next slab"
3224 " 0x34 -> -0x%p\n", p); 3370 " 0x34 -> -0x%p\n", p);
3225 printk(KERN_ERR "If allocated object is overwritten then not detectable\n\n"); 3371 printk(KERN_ERR
3372 "If allocated object is overwritten then not detectable\n\n");
3226 3373
3227 validate_slab_cache(kmalloc_caches + 5); 3374 validate_slab_cache(kmalloc_caches + 5);
3228 p = kzalloc(64, GFP_KERNEL); 3375 p = kzalloc(64, GFP_KERNEL);
@@ -3230,7 +3377,8 @@ static void resiliency_test(void)
3230 *p = 0x56; 3377 *p = 0x56;
3231 printk(KERN_ERR "\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n", 3378 printk(KERN_ERR "\n3. kmalloc-64: corrupting random byte 0x56->0x%p\n",
3232 p); 3379 p);
3233 printk(KERN_ERR "If allocated object is overwritten then not detectable\n\n"); 3380 printk(KERN_ERR
3381 "If allocated object is overwritten then not detectable\n\n");
3234 validate_slab_cache(kmalloc_caches + 6); 3382 validate_slab_cache(kmalloc_caches + 6);
3235 3383
3236 printk(KERN_ERR "\nB. Corruption after free\n"); 3384 printk(KERN_ERR "\nB. Corruption after free\n");
@@ -3243,7 +3391,8 @@ static void resiliency_test(void)
3243 p = kzalloc(256, GFP_KERNEL); 3391 p = kzalloc(256, GFP_KERNEL);
3244 kfree(p); 3392 kfree(p);
3245 p[50] = 0x9a; 3393 p[50] = 0x9a;
3246 printk(KERN_ERR "\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n", p); 3394 printk(KERN_ERR "\n2. kmalloc-256: Clobber 50th byte 0x9a->0x%p\n\n",
3395 p);
3247 validate_slab_cache(kmalloc_caches + 8); 3396 validate_slab_cache(kmalloc_caches + 8);
3248 3397
3249 p = kzalloc(512, GFP_KERNEL); 3398 p = kzalloc(512, GFP_KERNEL);
@@ -3384,7 +3533,7 @@ static int add_location(struct loc_track *t, struct kmem_cache *s,
3384static void process_slab(struct loc_track *t, struct kmem_cache *s, 3533static void process_slab(struct loc_track *t, struct kmem_cache *s,
3385 struct page *page, enum track_item alloc) 3534 struct page *page, enum track_item alloc)
3386{ 3535{
3387 void *addr = page_address(page); 3536 void *addr = slab_address(page);
3388 DECLARE_BITMAP(map, s->objects); 3537 DECLARE_BITMAP(map, s->objects);
3389 void *p; 3538 void *p;
3390 3539
@@ -3872,6 +4021,62 @@ static ssize_t remote_node_defrag_ratio_store(struct kmem_cache *s,
3872SLAB_ATTR(remote_node_defrag_ratio); 4021SLAB_ATTR(remote_node_defrag_ratio);
3873#endif 4022#endif
3874 4023
4024#ifdef CONFIG_SLUB_STATS
4025
4026static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
4027{
4028 unsigned long sum = 0;
4029 int cpu;
4030 int len;
4031 int *data = kmalloc(nr_cpu_ids * sizeof(int), GFP_KERNEL);
4032
4033 if (!data)
4034 return -ENOMEM;
4035
4036 for_each_online_cpu(cpu) {
4037 unsigned x = get_cpu_slab(s, cpu)->stat[si];
4038
4039 data[cpu] = x;
4040 sum += x;
4041 }
4042
4043 len = sprintf(buf, "%lu", sum);
4044
4045 for_each_online_cpu(cpu) {
4046 if (data[cpu] && len < PAGE_SIZE - 20)
4047 len += sprintf(buf + len, " c%d=%u", cpu, data[cpu]);
4048 }
4049 kfree(data);
4050 return len + sprintf(buf + len, "\n");
4051}
4052
4053#define STAT_ATTR(si, text) \
4054static ssize_t text##_show(struct kmem_cache *s, char *buf) \
4055{ \
4056 return show_stat(s, buf, si); \
4057} \
4058SLAB_ATTR_RO(text); \
4059
4060STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath);
4061STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath);
4062STAT_ATTR(FREE_FASTPATH, free_fastpath);
4063STAT_ATTR(FREE_SLOWPATH, free_slowpath);
4064STAT_ATTR(FREE_FROZEN, free_frozen);
4065STAT_ATTR(FREE_ADD_PARTIAL, free_add_partial);
4066STAT_ATTR(FREE_REMOVE_PARTIAL, free_remove_partial);
4067STAT_ATTR(ALLOC_FROM_PARTIAL, alloc_from_partial);
4068STAT_ATTR(ALLOC_SLAB, alloc_slab);
4069STAT_ATTR(ALLOC_REFILL, alloc_refill);
4070STAT_ATTR(FREE_SLAB, free_slab);
4071STAT_ATTR(CPUSLAB_FLUSH, cpuslab_flush);
4072STAT_ATTR(DEACTIVATE_FULL, deactivate_full);
4073STAT_ATTR(DEACTIVATE_EMPTY, deactivate_empty);
4074STAT_ATTR(DEACTIVATE_TO_HEAD, deactivate_to_head);
4075STAT_ATTR(DEACTIVATE_TO_TAIL, deactivate_to_tail);
4076STAT_ATTR(DEACTIVATE_REMOTE_FREES, deactivate_remote_frees);
4077
4078#endif
4079
3875static struct attribute *slab_attrs[] = { 4080static struct attribute *slab_attrs[] = {
3876 &slab_size_attr.attr, 4081 &slab_size_attr.attr,
3877 &object_size_attr.attr, 4082 &object_size_attr.attr,
@@ -3902,6 +4107,25 @@ static struct attribute *slab_attrs[] = {
3902#ifdef CONFIG_NUMA 4107#ifdef CONFIG_NUMA
3903 &remote_node_defrag_ratio_attr.attr, 4108 &remote_node_defrag_ratio_attr.attr,
3904#endif 4109#endif
4110#ifdef CONFIG_SLUB_STATS
4111 &alloc_fastpath_attr.attr,
4112 &alloc_slowpath_attr.attr,
4113 &free_fastpath_attr.attr,
4114 &free_slowpath_attr.attr,
4115 &free_frozen_attr.attr,
4116 &free_add_partial_attr.attr,
4117 &free_remove_partial_attr.attr,
4118 &alloc_from_partial_attr.attr,
4119 &alloc_slab_attr.attr,
4120 &alloc_refill_attr.attr,
4121 &free_slab_attr.attr,
4122 &cpuslab_flush_attr.attr,
4123 &deactivate_full_attr.attr,
4124 &deactivate_empty_attr.attr,
4125 &deactivate_to_head_attr.attr,
4126 &deactivate_to_tail_attr.attr,
4127 &deactivate_remote_frees_attr.attr,
4128#endif
3905 NULL 4129 NULL
3906}; 4130};
3907 4131