aboutsummaryrefslogtreecommitdiffstats
path: root/mm/slub.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/slub.c')
-rw-r--r--mm/slub.c217
1 files changed, 97 insertions, 120 deletions
diff --git a/mm/slub.c b/mm/slub.c
index 74c65af0a54f..96d63eb3ab17 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -291,32 +291,16 @@ static inline struct kmem_cache_cpu *get_cpu_slab(struct kmem_cache *s, int cpu)
291#endif 291#endif
292} 292}
293 293
294/* 294/* Verify that a pointer has an address that is valid within a slab page */
295 * The end pointer in a slab is special. It points to the first object in the
296 * slab but has bit 0 set to mark it.
297 *
298 * Note that SLUB relies on page_mapping returning NULL for pages with bit 0
299 * in the mapping set.
300 */
301static inline int is_end(void *addr)
302{
303 return (unsigned long)addr & PAGE_MAPPING_ANON;
304}
305
306static void *slab_address(struct page *page)
307{
308 return page->end - PAGE_MAPPING_ANON;
309}
310
311static inline int check_valid_pointer(struct kmem_cache *s, 295static inline int check_valid_pointer(struct kmem_cache *s,
312 struct page *page, const void *object) 296 struct page *page, const void *object)
313{ 297{
314 void *base; 298 void *base;
315 299
316 if (object == page->end) 300 if (!object)
317 return 1; 301 return 1;
318 302
319 base = slab_address(page); 303 base = page_address(page);
320 if (object < base || object >= base + s->objects * s->size || 304 if (object < base || object >= base + s->objects * s->size ||
321 (object - base) % s->size) { 305 (object - base) % s->size) {
322 return 0; 306 return 0;
@@ -349,8 +333,7 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
349 333
350/* Scan freelist */ 334/* Scan freelist */
351#define for_each_free_object(__p, __s, __free) \ 335#define for_each_free_object(__p, __s, __free) \
352 for (__p = (__free); (__p) != page->end; __p = get_freepointer((__s),\ 336 for (__p = (__free); __p; __p = get_freepointer((__s), __p))
353 __p))
354 337
355/* Determine object index from a given position */ 338/* Determine object index from a given position */
356static inline int slab_index(void *p, struct kmem_cache *s, void *addr) 339static inline int slab_index(void *p, struct kmem_cache *s, void *addr)
@@ -502,7 +485,7 @@ static void slab_fix(struct kmem_cache *s, char *fmt, ...)
502static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) 485static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
503{ 486{
504 unsigned int off; /* Offset of last byte */ 487 unsigned int off; /* Offset of last byte */
505 u8 *addr = slab_address(page); 488 u8 *addr = page_address(page);
506 489
507 print_tracking(s, p); 490 print_tracking(s, p);
508 491
@@ -637,7 +620,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
637 * A. Free pointer (if we cannot overwrite object on free) 620 * A. Free pointer (if we cannot overwrite object on free)
638 * B. Tracking data for SLAB_STORE_USER 621 * B. Tracking data for SLAB_STORE_USER
639 * C. Padding to reach required alignment boundary or at mininum 622 * C. Padding to reach required alignment boundary or at mininum
640 * one word if debuggin is on to be able to detect writes 623 * one word if debugging is on to be able to detect writes
641 * before the word boundary. 624 * before the word boundary.
642 * 625 *
643 * Padding is done using 0x5a (POISON_INUSE) 626 * Padding is done using 0x5a (POISON_INUSE)
@@ -680,7 +663,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page)
680 if (!(s->flags & SLAB_POISON)) 663 if (!(s->flags & SLAB_POISON))
681 return 1; 664 return 1;
682 665
683 start = slab_address(page); 666 start = page_address(page);
684 end = start + (PAGE_SIZE << s->order); 667 end = start + (PAGE_SIZE << s->order);
685 length = s->objects * s->size; 668 length = s->objects * s->size;
686 remainder = end - (start + length); 669 remainder = end - (start + length);
@@ -748,7 +731,7 @@ static int check_object(struct kmem_cache *s, struct page *page,
748 * of the free objects in this slab. May cause 731 * of the free objects in this slab. May cause
749 * another error because the object count is now wrong. 732 * another error because the object count is now wrong.
750 */ 733 */
751 set_freepointer(s, p, page->end); 734 set_freepointer(s, p, NULL);
752 return 0; 735 return 0;
753 } 736 }
754 return 1; 737 return 1;
@@ -782,18 +765,18 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
782 void *fp = page->freelist; 765 void *fp = page->freelist;
783 void *object = NULL; 766 void *object = NULL;
784 767
785 while (fp != page->end && nr <= s->objects) { 768 while (fp && nr <= s->objects) {
786 if (fp == search) 769 if (fp == search)
787 return 1; 770 return 1;
788 if (!check_valid_pointer(s, page, fp)) { 771 if (!check_valid_pointer(s, page, fp)) {
789 if (object) { 772 if (object) {
790 object_err(s, page, object, 773 object_err(s, page, object,
791 "Freechain corrupt"); 774 "Freechain corrupt");
792 set_freepointer(s, object, page->end); 775 set_freepointer(s, object, NULL);
793 break; 776 break;
794 } else { 777 } else {
795 slab_err(s, page, "Freepointer corrupt"); 778 slab_err(s, page, "Freepointer corrupt");
796 page->freelist = page->end; 779 page->freelist = NULL;
797 page->inuse = s->objects; 780 page->inuse = s->objects;
798 slab_fix(s, "Freelist cleared"); 781 slab_fix(s, "Freelist cleared");
799 return 0; 782 return 0;
@@ -870,7 +853,7 @@ static int alloc_debug_processing(struct kmem_cache *s, struct page *page,
870 if (!check_slab(s, page)) 853 if (!check_slab(s, page))
871 goto bad; 854 goto bad;
872 855
873 if (object && !on_freelist(s, page, object)) { 856 if (!on_freelist(s, page, object)) {
874 object_err(s, page, object, "Object already allocated"); 857 object_err(s, page, object, "Object already allocated");
875 goto bad; 858 goto bad;
876 } 859 }
@@ -880,7 +863,7 @@ static int alloc_debug_processing(struct kmem_cache *s, struct page *page,
880 goto bad; 863 goto bad;
881 } 864 }
882 865
883 if (object && !check_object(s, page, object, 0)) 866 if (!check_object(s, page, object, 0))
884 goto bad; 867 goto bad;
885 868
886 /* Success perform special debug activities for allocs */ 869 /* Success perform special debug activities for allocs */
@@ -899,7 +882,7 @@ bad:
899 */ 882 */
900 slab_fix(s, "Marking all objects used"); 883 slab_fix(s, "Marking all objects used");
901 page->inuse = s->objects; 884 page->inuse = s->objects;
902 page->freelist = page->end; 885 page->freelist = NULL;
903 } 886 }
904 return 0; 887 return 0;
905} 888}
@@ -939,7 +922,7 @@ static int free_debug_processing(struct kmem_cache *s, struct page *page,
939 } 922 }
940 923
941 /* Special debug activities for freeing objects */ 924 /* Special debug activities for freeing objects */
942 if (!SlabFrozen(page) && page->freelist == page->end) 925 if (!SlabFrozen(page) && !page->freelist)
943 remove_full(s, page); 926 remove_full(s, page);
944 if (s->flags & SLAB_STORE_USER) 927 if (s->flags & SLAB_STORE_USER)
945 set_track(s, object, TRACK_FREE, addr); 928 set_track(s, object, TRACK_FREE, addr);
@@ -1015,30 +998,11 @@ static unsigned long kmem_cache_flags(unsigned long objsize,
1015 void (*ctor)(struct kmem_cache *, void *)) 998 void (*ctor)(struct kmem_cache *, void *))
1016{ 999{
1017 /* 1000 /*
1018 * The page->offset field is only 16 bit wide. This is an offset 1001 * Enable debugging if selected on the kernel commandline.
1019 * in units of words from the beginning of an object. If the slab
1020 * size is bigger then we cannot move the free pointer behind the
1021 * object anymore.
1022 *
1023 * On 32 bit platforms the limit is 256k. On 64bit platforms
1024 * the limit is 512k.
1025 *
1026 * Debugging or ctor may create a need to move the free
1027 * pointer. Fail if this happens.
1028 */ 1002 */
1029 if (objsize >= 65535 * sizeof(void *)) { 1003 if (slub_debug && (!slub_debug_slabs ||
1030 BUG_ON(flags & (SLAB_RED_ZONE | SLAB_POISON | 1004 strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs)) == 0))
1031 SLAB_STORE_USER | SLAB_DESTROY_BY_RCU)); 1005 flags |= slub_debug;
1032 BUG_ON(ctor);
1033 } else {
1034 /*
1035 * Enable debugging if selected on the kernel commandline.
1036 */
1037 if (slub_debug && (!slub_debug_slabs ||
1038 strncmp(slub_debug_slabs, name,
1039 strlen(slub_debug_slabs)) == 0))
1040 flags |= slub_debug;
1041 }
1042 1006
1043 return flags; 1007 return flags;
1044} 1008}
@@ -1124,7 +1088,6 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
1124 SetSlabDebug(page); 1088 SetSlabDebug(page);
1125 1089
1126 start = page_address(page); 1090 start = page_address(page);
1127 page->end = start + 1;
1128 1091
1129 if (unlikely(s->flags & SLAB_POISON)) 1092 if (unlikely(s->flags & SLAB_POISON))
1130 memset(start, POISON_INUSE, PAGE_SIZE << s->order); 1093 memset(start, POISON_INUSE, PAGE_SIZE << s->order);
@@ -1136,7 +1099,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
1136 last = p; 1099 last = p;
1137 } 1100 }
1138 setup_object(s, page, last); 1101 setup_object(s, page, last);
1139 set_freepointer(s, last, page->end); 1102 set_freepointer(s, last, NULL);
1140 1103
1141 page->freelist = start; 1104 page->freelist = start;
1142 page->inuse = 0; 1105 page->inuse = 0;
@@ -1152,7 +1115,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
1152 void *p; 1115 void *p;
1153 1116
1154 slab_pad_check(s, page); 1117 slab_pad_check(s, page);
1155 for_each_object(p, s, slab_address(page)) 1118 for_each_object(p, s, page_address(page))
1156 check_object(s, page, p, 0); 1119 check_object(s, page, p, 0);
1157 ClearSlabDebug(page); 1120 ClearSlabDebug(page);
1158 } 1121 }
@@ -1162,7 +1125,6 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
1162 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, 1125 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1163 -pages); 1126 -pages);
1164 1127
1165 page->mapping = NULL;
1166 __free_pages(page, s->order); 1128 __free_pages(page, s->order);
1167} 1129}
1168 1130
@@ -1307,7 +1269,7 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags)
1307 * may return off node objects because partial slabs are obtained 1269 * may return off node objects because partial slabs are obtained
1308 * from other nodes and filled up. 1270 * from other nodes and filled up.
1309 * 1271 *
1310 * If /sys/slab/xx/defrag_ratio is set to 100 (which makes 1272 * If /sys/kernel/slab/xx/defrag_ratio is set to 100 (which makes
1311 * defrag_ratio = 1000) then every (well almost) allocation will 1273 * defrag_ratio = 1000) then every (well almost) allocation will
1312 * first attempt to defrag slab caches on other nodes. This means 1274 * first attempt to defrag slab caches on other nodes. This means
1313 * scanning over all nodes to look for partial slabs which may be 1275 * scanning over all nodes to look for partial slabs which may be
@@ -1366,7 +1328,7 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
1366 ClearSlabFrozen(page); 1328 ClearSlabFrozen(page);
1367 if (page->inuse) { 1329 if (page->inuse) {
1368 1330
1369 if (page->freelist != page->end) { 1331 if (page->freelist) {
1370 add_partial(n, page, tail); 1332 add_partial(n, page, tail);
1371 stat(c, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD); 1333 stat(c, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD);
1372 } else { 1334 } else {
@@ -1382,9 +1344,11 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
1382 * Adding an empty slab to the partial slabs in order 1344 * Adding an empty slab to the partial slabs in order
1383 * to avoid page allocator overhead. This slab needs 1345 * to avoid page allocator overhead. This slab needs
1384 * to come after the other slabs with objects in 1346 * to come after the other slabs with objects in
1385 * order to fill them up. That way the size of the 1347 * so that the others get filled first. That way the
1386 * partial list stays small. kmem_cache_shrink can 1348 * size of the partial list stays small.
1387 * reclaim empty slabs from the partial list. 1349 *
1350 * kmem_cache_shrink can reclaim any empty slabs from the
1351 * partial list.
1388 */ 1352 */
1389 add_partial(n, page, 1); 1353 add_partial(n, page, 1);
1390 slab_unlock(page); 1354 slab_unlock(page);
@@ -1404,18 +1368,14 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1404 struct page *page = c->page; 1368 struct page *page = c->page;
1405 int tail = 1; 1369 int tail = 1;
1406 1370
1407 if (c->freelist) 1371 if (page->freelist)
1408 stat(c, DEACTIVATE_REMOTE_FREES); 1372 stat(c, DEACTIVATE_REMOTE_FREES);
1409 /* 1373 /*
1410 * Merge cpu freelist into freelist. Typically we get here 1374 * Merge cpu freelist into slab freelist. Typically we get here
1411 * because both freelists are empty. So this is unlikely 1375 * because both freelists are empty. So this is unlikely
1412 * to occur. 1376 * to occur.
1413 *
1414 * We need to use _is_end here because deactivate slab may
1415 * be called for a debug slab. Then c->freelist may contain
1416 * a dummy pointer.
1417 */ 1377 */
1418 while (unlikely(!is_end(c->freelist))) { 1378 while (unlikely(c->freelist)) {
1419 void **object; 1379 void **object;
1420 1380
1421 tail = 0; /* Hot objects. Put the slab first */ 1381 tail = 0; /* Hot objects. Put the slab first */
@@ -1442,6 +1402,7 @@ static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1442 1402
1443/* 1403/*
1444 * Flush cpu slab. 1404 * Flush cpu slab.
1405 *
1445 * Called from IPI handler with interrupts disabled. 1406 * Called from IPI handler with interrupts disabled.
1446 */ 1407 */
1447static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) 1408static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
@@ -1500,7 +1461,8 @@ static inline int node_match(struct kmem_cache_cpu *c, int node)
1500 * rest of the freelist to the lockless freelist. 1461 * rest of the freelist to the lockless freelist.
1501 * 1462 *
1502 * And if we were unable to get a new slab from the partial slab lists then 1463 * And if we were unable to get a new slab from the partial slab lists then
1503 * we need to allocate a new slab. This is slowest path since we may sleep. 1464 * we need to allocate a new slab. This is the slowest path since it involves
1465 * a call to the page allocator and the setup of a new slab.
1504 */ 1466 */
1505static void *__slab_alloc(struct kmem_cache *s, 1467static void *__slab_alloc(struct kmem_cache *s,
1506 gfp_t gfpflags, int node, void *addr, struct kmem_cache_cpu *c) 1468 gfp_t gfpflags, int node, void *addr, struct kmem_cache_cpu *c)
@@ -1514,18 +1476,19 @@ static void *__slab_alloc(struct kmem_cache *s,
1514 slab_lock(c->page); 1476 slab_lock(c->page);
1515 if (unlikely(!node_match(c, node))) 1477 if (unlikely(!node_match(c, node)))
1516 goto another_slab; 1478 goto another_slab;
1479
1517 stat(c, ALLOC_REFILL); 1480 stat(c, ALLOC_REFILL);
1481
1518load_freelist: 1482load_freelist:
1519 object = c->page->freelist; 1483 object = c->page->freelist;
1520 if (unlikely(object == c->page->end)) 1484 if (unlikely(!object))
1521 goto another_slab; 1485 goto another_slab;
1522 if (unlikely(SlabDebug(c->page))) 1486 if (unlikely(SlabDebug(c->page)))
1523 goto debug; 1487 goto debug;
1524 1488
1525 object = c->page->freelist;
1526 c->freelist = object[c->offset]; 1489 c->freelist = object[c->offset];
1527 c->page->inuse = s->objects; 1490 c->page->inuse = s->objects;
1528 c->page->freelist = c->page->end; 1491 c->page->freelist = NULL;
1529 c->node = page_to_nid(c->page); 1492 c->node = page_to_nid(c->page);
1530unlock_out: 1493unlock_out:
1531 slab_unlock(c->page); 1494 slab_unlock(c->page);
@@ -1578,7 +1541,6 @@ new_slab:
1578 1541
1579 return NULL; 1542 return NULL;
1580debug: 1543debug:
1581 object = c->page->freelist;
1582 if (!alloc_debug_processing(s, c->page, object, addr)) 1544 if (!alloc_debug_processing(s, c->page, object, addr))
1583 goto another_slab; 1545 goto another_slab;
1584 1546
@@ -1607,7 +1569,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
1607 1569
1608 local_irq_save(flags); 1570 local_irq_save(flags);
1609 c = get_cpu_slab(s, smp_processor_id()); 1571 c = get_cpu_slab(s, smp_processor_id());
1610 if (unlikely(is_end(c->freelist) || !node_match(c, node))) 1572 if (unlikely(!c->freelist || !node_match(c, node)))
1611 1573
1612 object = __slab_alloc(s, gfpflags, node, addr, c); 1574 object = __slab_alloc(s, gfpflags, node, addr, c);
1613 1575
@@ -1659,6 +1621,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
1659 1621
1660 if (unlikely(SlabDebug(page))) 1622 if (unlikely(SlabDebug(page)))
1661 goto debug; 1623 goto debug;
1624
1662checks_ok: 1625checks_ok:
1663 prior = object[offset] = page->freelist; 1626 prior = object[offset] = page->freelist;
1664 page->freelist = object; 1627 page->freelist = object;
@@ -1673,11 +1636,10 @@ checks_ok:
1673 goto slab_empty; 1636 goto slab_empty;
1674 1637
1675 /* 1638 /*
1676 * Objects left in the slab. If it 1639 * Objects left in the slab. If it was not on the partial list before
1677 * was not on the partial list before
1678 * then add it. 1640 * then add it.
1679 */ 1641 */
1680 if (unlikely(prior == page->end)) { 1642 if (unlikely(!prior)) {
1681 add_partial(get_node(s, page_to_nid(page)), page, 1); 1643 add_partial(get_node(s, page_to_nid(page)), page, 1);
1682 stat(c, FREE_ADD_PARTIAL); 1644 stat(c, FREE_ADD_PARTIAL);
1683 } 1645 }
@@ -1687,7 +1649,7 @@ out_unlock:
1687 return; 1649 return;
1688 1650
1689slab_empty: 1651slab_empty:
1690 if (prior != page->end) { 1652 if (prior) {
1691 /* 1653 /*
1692 * Slab still on the partial list. 1654 * Slab still on the partial list.
1693 */ 1655 */
@@ -1724,8 +1686,8 @@ static __always_inline void slab_free(struct kmem_cache *s,
1724 unsigned long flags; 1686 unsigned long flags;
1725 1687
1726 local_irq_save(flags); 1688 local_irq_save(flags);
1727 debug_check_no_locks_freed(object, s->objsize);
1728 c = get_cpu_slab(s, smp_processor_id()); 1689 c = get_cpu_slab(s, smp_processor_id());
1690 debug_check_no_locks_freed(object, c->objsize);
1729 if (likely(page == c->page && c->node >= 0)) { 1691 if (likely(page == c->page && c->node >= 0)) {
1730 object[c->offset] = c->freelist; 1692 object[c->offset] = c->freelist;
1731 c->freelist = object; 1693 c->freelist = object;
@@ -1888,20 +1850,21 @@ static unsigned long calculate_alignment(unsigned long flags,
1888 unsigned long align, unsigned long size) 1850 unsigned long align, unsigned long size)
1889{ 1851{
1890 /* 1852 /*
1891 * If the user wants hardware cache aligned objects then 1853 * If the user wants hardware cache aligned objects then follow that
1892 * follow that suggestion if the object is sufficiently 1854 * suggestion if the object is sufficiently large.
1893 * large.
1894 * 1855 *
1895 * The hardware cache alignment cannot override the 1856 * The hardware cache alignment cannot override the specified
1896 * specified alignment though. If that is greater 1857 * alignment though. If that is greater then use it.
1897 * then use it.
1898 */ 1858 */
1899 if ((flags & SLAB_HWCACHE_ALIGN) && 1859 if (flags & SLAB_HWCACHE_ALIGN) {
1900 size > cache_line_size() / 2) 1860 unsigned long ralign = cache_line_size();
1901 return max_t(unsigned long, align, cache_line_size()); 1861 while (size <= ralign / 2)
1862 ralign /= 2;
1863 align = max(align, ralign);
1864 }
1902 1865
1903 if (align < ARCH_SLAB_MINALIGN) 1866 if (align < ARCH_SLAB_MINALIGN)
1904 return ARCH_SLAB_MINALIGN; 1867 align = ARCH_SLAB_MINALIGN;
1905 1868
1906 return ALIGN(align, sizeof(void *)); 1869 return ALIGN(align, sizeof(void *));
1907} 1870}
@@ -1910,7 +1873,7 @@ static void init_kmem_cache_cpu(struct kmem_cache *s,
1910 struct kmem_cache_cpu *c) 1873 struct kmem_cache_cpu *c)
1911{ 1874{
1912 c->page = NULL; 1875 c->page = NULL;
1913 c->freelist = (void *)PAGE_MAPPING_ANON; 1876 c->freelist = NULL;
1914 c->node = 0; 1877 c->node = 0;
1915 c->offset = s->offset / sizeof(void *); 1878 c->offset = s->offset / sizeof(void *);
1916 c->objsize = s->objsize; 1879 c->objsize = s->objsize;
@@ -2092,6 +2055,7 @@ static struct kmem_cache_node *early_kmem_cache_node_alloc(gfp_t gfpflags,
2092#endif 2055#endif
2093 init_kmem_cache_node(n); 2056 init_kmem_cache_node(n);
2094 atomic_long_inc(&n->nr_slabs); 2057 atomic_long_inc(&n->nr_slabs);
2058
2095 /* 2059 /*
2096 * lockdep requires consistent irq usage for each lock 2060 * lockdep requires consistent irq usage for each lock
2097 * so even though there cannot be a race this early in 2061 * so even though there cannot be a race this early in
@@ -2173,6 +2137,14 @@ static int calculate_sizes(struct kmem_cache *s)
2173 unsigned long align = s->align; 2137 unsigned long align = s->align;
2174 2138
2175 /* 2139 /*
2140 * Round up object size to the next word boundary. We can only
2141 * place the free pointer at word boundaries and this determines
2142 * the possible location of the free pointer.
2143 */
2144 size = ALIGN(size, sizeof(void *));
2145
2146#ifdef CONFIG_SLUB_DEBUG
2147 /*
2176 * Determine if we can poison the object itself. If the user of 2148 * Determine if we can poison the object itself. If the user of
2177 * the slab may touch the object after free or before allocation 2149 * the slab may touch the object after free or before allocation
2178 * then we should never poison the object itself. 2150 * then we should never poison the object itself.
@@ -2183,14 +2155,7 @@ static int calculate_sizes(struct kmem_cache *s)
2183 else 2155 else
2184 s->flags &= ~__OBJECT_POISON; 2156 s->flags &= ~__OBJECT_POISON;
2185 2157
2186 /*
2187 * Round up object size to the next word boundary. We can only
2188 * place the free pointer at word boundaries and this determines
2189 * the possible location of the free pointer.
2190 */
2191 size = ALIGN(size, sizeof(void *));
2192 2158
2193#ifdef CONFIG_SLUB_DEBUG
2194 /* 2159 /*
2195 * If we are Redzoning then check if there is some space between the 2160 * If we are Redzoning then check if there is some space between the
2196 * end of the object and the free pointer. If not then add an 2161 * end of the object and the free pointer. If not then add an
@@ -2343,7 +2308,7 @@ int kmem_ptr_validate(struct kmem_cache *s, const void *object)
2343 /* 2308 /*
2344 * We could also check if the object is on the slabs freelist. 2309 * We could also check if the object is on the slabs freelist.
2345 * But this would be too expensive and it seems that the main 2310 * But this would be too expensive and it seems that the main
2346 * purpose of kmem_ptr_valid is to check if the object belongs 2311 * purpose of kmem_ptr_valid() is to check if the object belongs
2347 * to a certain slab. 2312 * to a certain slab.
2348 */ 2313 */
2349 return 1; 2314 return 1;
@@ -2630,13 +2595,24 @@ void *__kmalloc(size_t size, gfp_t flags)
2630} 2595}
2631EXPORT_SYMBOL(__kmalloc); 2596EXPORT_SYMBOL(__kmalloc);
2632 2597
2598static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
2599{
2600 struct page *page = alloc_pages_node(node, flags | __GFP_COMP,
2601 get_order(size));
2602
2603 if (page)
2604 return page_address(page);
2605 else
2606 return NULL;
2607}
2608
2633#ifdef CONFIG_NUMA 2609#ifdef CONFIG_NUMA
2634void *__kmalloc_node(size_t size, gfp_t flags, int node) 2610void *__kmalloc_node(size_t size, gfp_t flags, int node)
2635{ 2611{
2636 struct kmem_cache *s; 2612 struct kmem_cache *s;
2637 2613
2638 if (unlikely(size > PAGE_SIZE)) 2614 if (unlikely(size > PAGE_SIZE))
2639 return kmalloc_large(size, flags); 2615 return kmalloc_large_node(size, flags, node);
2640 2616
2641 s = get_slab(size, flags); 2617 s = get_slab(size, flags);
2642 2618
@@ -2653,19 +2629,17 @@ size_t ksize(const void *object)
2653 struct page *page; 2629 struct page *page;
2654 struct kmem_cache *s; 2630 struct kmem_cache *s;
2655 2631
2656 BUG_ON(!object);
2657 if (unlikely(object == ZERO_SIZE_PTR)) 2632 if (unlikely(object == ZERO_SIZE_PTR))
2658 return 0; 2633 return 0;
2659 2634
2660 page = virt_to_head_page(object); 2635 page = virt_to_head_page(object);
2661 BUG_ON(!page);
2662 2636
2663 if (unlikely(!PageSlab(page))) 2637 if (unlikely(!PageSlab(page)))
2664 return PAGE_SIZE << compound_order(page); 2638 return PAGE_SIZE << compound_order(page);
2665 2639
2666 s = page->slab; 2640 s = page->slab;
2667 BUG_ON(!s);
2668 2641
2642#ifdef CONFIG_SLUB_DEBUG
2669 /* 2643 /*
2670 * Debugging requires use of the padding between object 2644 * Debugging requires use of the padding between object
2671 * and whatever may come after it. 2645 * and whatever may come after it.
@@ -2673,6 +2647,7 @@ size_t ksize(const void *object)
2673 if (s->flags & (SLAB_RED_ZONE | SLAB_POISON)) 2647 if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))
2674 return s->objsize; 2648 return s->objsize;
2675 2649
2650#endif
2676 /* 2651 /*
2677 * If we have the need to store the freelist pointer 2652 * If we have the need to store the freelist pointer
2678 * back there or track user information then we can 2653 * back there or track user information then we can
@@ -2680,7 +2655,6 @@ size_t ksize(const void *object)
2680 */ 2655 */
2681 if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER)) 2656 if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER))
2682 return s->inuse; 2657 return s->inuse;
2683
2684 /* 2658 /*
2685 * Else we can use all the padding etc for the allocation 2659 * Else we can use all the padding etc for the allocation
2686 */ 2660 */
@@ -2957,7 +2931,7 @@ void __init kmem_cache_init(void)
2957 /* 2931 /*
2958 * Patch up the size_index table if we have strange large alignment 2932 * Patch up the size_index table if we have strange large alignment
2959 * requirements for the kmalloc array. This is only the case for 2933 * requirements for the kmalloc array. This is only the case for
2960 * mips it seems. The standard arches will not generate any code here. 2934 * MIPS it seems. The standard arches will not generate any code here.
2961 * 2935 *
2962 * Largest permitted alignment is 256 bytes due to the way we 2936 * Largest permitted alignment is 256 bytes due to the way we
2963 * handle the index determination for the smaller caches. 2937 * handle the index determination for the smaller caches.
@@ -2986,7 +2960,6 @@ void __init kmem_cache_init(void)
2986 kmem_size = sizeof(struct kmem_cache); 2960 kmem_size = sizeof(struct kmem_cache);
2987#endif 2961#endif
2988 2962
2989
2990 printk(KERN_INFO 2963 printk(KERN_INFO
2991 "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d," 2964 "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d,"
2992 " CPUs=%d, Nodes=%d\n", 2965 " CPUs=%d, Nodes=%d\n",
@@ -3083,12 +3056,15 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size,
3083 */ 3056 */
3084 for_each_online_cpu(cpu) 3057 for_each_online_cpu(cpu)
3085 get_cpu_slab(s, cpu)->objsize = s->objsize; 3058 get_cpu_slab(s, cpu)->objsize = s->objsize;
3059
3086 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); 3060 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
3087 up_write(&slub_lock); 3061 up_write(&slub_lock);
3062
3088 if (sysfs_slab_alias(s, name)) 3063 if (sysfs_slab_alias(s, name))
3089 goto err; 3064 goto err;
3090 return s; 3065 return s;
3091 } 3066 }
3067
3092 s = kmalloc(kmem_size, GFP_KERNEL); 3068 s = kmalloc(kmem_size, GFP_KERNEL);
3093 if (s) { 3069 if (s) {
3094 if (kmem_cache_open(s, GFP_KERNEL, name, 3070 if (kmem_cache_open(s, GFP_KERNEL, name,
@@ -3184,7 +3160,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
3184 struct kmem_cache *s; 3160 struct kmem_cache *s;
3185 3161
3186 if (unlikely(size > PAGE_SIZE)) 3162 if (unlikely(size > PAGE_SIZE))
3187 return kmalloc_large(size, gfpflags); 3163 return kmalloc_large_node(size, gfpflags, node);
3188 3164
3189 s = get_slab(size, gfpflags); 3165 s = get_slab(size, gfpflags);
3190 3166
@@ -3199,7 +3175,7 @@ static int validate_slab(struct kmem_cache *s, struct page *page,
3199 unsigned long *map) 3175 unsigned long *map)
3200{ 3176{
3201 void *p; 3177 void *p;
3202 void *addr = slab_address(page); 3178 void *addr = page_address(page);
3203 3179
3204 if (!check_slab(s, page) || 3180 if (!check_slab(s, page) ||
3205 !on_freelist(s, page, NULL)) 3181 !on_freelist(s, page, NULL))
@@ -3482,7 +3458,7 @@ static int add_location(struct loc_track *t, struct kmem_cache *s,
3482static void process_slab(struct loc_track *t, struct kmem_cache *s, 3458static void process_slab(struct loc_track *t, struct kmem_cache *s,
3483 struct page *page, enum track_item alloc) 3459 struct page *page, enum track_item alloc)
3484{ 3460{
3485 void *addr = slab_address(page); 3461 void *addr = page_address(page);
3486 DECLARE_BITMAP(map, s->objects); 3462 DECLARE_BITMAP(map, s->objects);
3487 void *p; 3463 void *p;
3488 3464
@@ -3591,8 +3567,8 @@ enum slab_stat_type {
3591#define SO_CPU (1 << SL_CPU) 3567#define SO_CPU (1 << SL_CPU)
3592#define SO_OBJECTS (1 << SL_OBJECTS) 3568#define SO_OBJECTS (1 << SL_OBJECTS)
3593 3569
3594static unsigned long slab_objects(struct kmem_cache *s, 3570static ssize_t show_slab_objects(struct kmem_cache *s,
3595 char *buf, unsigned long flags) 3571 char *buf, unsigned long flags)
3596{ 3572{
3597 unsigned long total = 0; 3573 unsigned long total = 0;
3598 int cpu; 3574 int cpu;
@@ -3602,6 +3578,8 @@ static unsigned long slab_objects(struct kmem_cache *s,
3602 unsigned long *per_cpu; 3578 unsigned long *per_cpu;
3603 3579
3604 nodes = kzalloc(2 * sizeof(unsigned long) * nr_node_ids, GFP_KERNEL); 3580 nodes = kzalloc(2 * sizeof(unsigned long) * nr_node_ids, GFP_KERNEL);
3581 if (!nodes)
3582 return -ENOMEM;
3605 per_cpu = nodes + nr_node_ids; 3583 per_cpu = nodes + nr_node_ids;
3606 3584
3607 for_each_possible_cpu(cpu) { 3585 for_each_possible_cpu(cpu) {
@@ -3754,25 +3732,25 @@ SLAB_ATTR_RO(aliases);
3754 3732
3755static ssize_t slabs_show(struct kmem_cache *s, char *buf) 3733static ssize_t slabs_show(struct kmem_cache *s, char *buf)
3756{ 3734{
3757 return slab_objects(s, buf, SO_FULL|SO_PARTIAL|SO_CPU); 3735 return show_slab_objects(s, buf, SO_FULL|SO_PARTIAL|SO_CPU);
3758} 3736}
3759SLAB_ATTR_RO(slabs); 3737SLAB_ATTR_RO(slabs);
3760 3738
3761static ssize_t partial_show(struct kmem_cache *s, char *buf) 3739static ssize_t partial_show(struct kmem_cache *s, char *buf)
3762{ 3740{
3763 return slab_objects(s, buf, SO_PARTIAL); 3741 return show_slab_objects(s, buf, SO_PARTIAL);
3764} 3742}
3765SLAB_ATTR_RO(partial); 3743SLAB_ATTR_RO(partial);
3766 3744
3767static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf) 3745static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf)
3768{ 3746{
3769 return slab_objects(s, buf, SO_CPU); 3747 return show_slab_objects(s, buf, SO_CPU);
3770} 3748}
3771SLAB_ATTR_RO(cpu_slabs); 3749SLAB_ATTR_RO(cpu_slabs);
3772 3750
3773static ssize_t objects_show(struct kmem_cache *s, char *buf) 3751static ssize_t objects_show(struct kmem_cache *s, char *buf)
3774{ 3752{
3775 return slab_objects(s, buf, SO_FULL|SO_PARTIAL|SO_CPU|SO_OBJECTS); 3753 return show_slab_objects(s, buf, SO_FULL|SO_PARTIAL|SO_CPU|SO_OBJECTS);
3776} 3754}
3777SLAB_ATTR_RO(objects); 3755SLAB_ATTR_RO(objects);
3778 3756
@@ -3971,7 +3949,6 @@ SLAB_ATTR(remote_node_defrag_ratio);
3971#endif 3949#endif
3972 3950
3973#ifdef CONFIG_SLUB_STATS 3951#ifdef CONFIG_SLUB_STATS
3974
3975static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si) 3952static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
3976{ 3953{
3977 unsigned long sum = 0; 3954 unsigned long sum = 0;
@@ -4155,8 +4132,8 @@ static struct kset *slab_kset;
4155#define ID_STR_LENGTH 64 4132#define ID_STR_LENGTH 64
4156 4133
4157/* Create a unique string id for a slab cache: 4134/* Create a unique string id for a slab cache:
4158 * format 4135 *
4159 * :[flags-]size:[memory address of kmemcache] 4136 * Format :[flags-]size
4160 */ 4137 */
4161static char *create_unique_id(struct kmem_cache *s) 4138static char *create_unique_id(struct kmem_cache *s)
4162{ 4139{