aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/mm_types.h5
-rw-r--r--include/linux/slub_def.h4
-rw-r--r--mm/slub.c204
3 files changed, 92 insertions, 121 deletions
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index bfee0bd1d435..34023c65d466 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -64,10 +64,7 @@ struct page {
64#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS 64#if NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS
65 spinlock_t ptl; 65 spinlock_t ptl;
66#endif 66#endif
67 struct { 67 struct kmem_cache *slab; /* SLUB: Pointer to slab */
68 struct kmem_cache *slab; /* SLUB: Pointer to slab */
69 void *end; /* SLUB: end marker */
70 };
71 struct page *first_page; /* Compound tail pages */ 68 struct page *first_page; /* Compound tail pages */
72 }; 69 };
73 union { 70 union {
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 57deecc79d52..b00c1c73eb0a 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -61,7 +61,7 @@ struct kmem_cache {
61 int size; /* The size of an object including meta data */ 61 int size; /* The size of an object including meta data */
62 int objsize; /* The size of an object without meta data */ 62 int objsize; /* The size of an object without meta data */
63 int offset; /* Free pointer offset. */ 63 int offset; /* Free pointer offset. */
64 int order; 64 int order; /* Current preferred allocation order */
65 65
66 /* 66 /*
67 * Avoid an extra cache line for UP, SMP and for the node local to 67 * Avoid an extra cache line for UP, SMP and for the node local to
@@ -138,11 +138,11 @@ static __always_inline int kmalloc_index(size_t size)
138 if (size <= 512) return 9; 138 if (size <= 512) return 9;
139 if (size <= 1024) return 10; 139 if (size <= 1024) return 10;
140 if (size <= 2 * 1024) return 11; 140 if (size <= 2 * 1024) return 11;
141 if (size <= 4 * 1024) return 12;
141/* 142/*
142 * The following is only needed to support architectures with a larger page 143 * The following is only needed to support architectures with a larger page
143 * size than 4k. 144 * size than 4k.
144 */ 145 */
145 if (size <= 4 * 1024) return 12;
146 if (size <= 8 * 1024) return 13; 146 if (size <= 8 * 1024) return 13;
147 if (size <= 16 * 1024) return 14; 147 if (size <= 16 * 1024) return 14;
148 if (size <= 32 * 1024) return 15; 148 if (size <= 32 * 1024) return 15;
diff --git a/mm/slub.c b/mm/slub.c
index 74c65af0a54f..0863fd38a5ce 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -291,32 +291,16 @@ static inline struct kmem_cache_cpu *get_cpu_slab(struct kmem_cache *s, int cpu)
291#endif 291#endif
292} 292}
293 293
294/* 294/* Verify that a pointer has an address that is valid within a slab page */
295 * The end pointer in a slab is special. It points to the first object in the
296 * slab but has bit 0 set to mark it.
297 *
298 * Note that SLUB relies on page_mapping returning NULL for pages with bit 0
299 * in the mapping set.
300 */
301static inline int is_end(void *addr)
302{
303 return (unsigned long)addr & PAGE_MAPPING_ANON;
304}
305
306static void *slab_address(struct page *page)
307{
308 return page->end - PAGE_MAPPING_ANON;
309}
310
311static inline int check_valid_pointer(struct kmem_cache *s, 295static inline int check_valid_pointer(struct kmem_cache *s,
312 struct page *page, const void *object) 296 struct page *page, const void *object)
313{ 297{
314 void *base; 298 void *base;
315 299
316 if (object == page->end) 300 if (!object)
317 return 1; 301 return 1;
318 302
319 base = slab_address(page); 303 base = page_address(page);
320 if (object < base || object >= base + s->objects * s->size || 304 if (object < base || object >= base + s->objects * s->size ||
321 (object - base) % s->size) { 305 (object - base) % s->size) {
322 return 0; 306 return 0;
@@ -349,8 +333,7 @@ static inline void set_freepointer(struct kmem_cache *s, void *object, void *fp)
349 333
350/* Scan freelist */ 334/* Scan freelist */
351#define for_each_free_object(__p, __s, __free) \ 335#define for_each_free_object(__p, __s, __free) \
352 for (__p = (__free); (__p) != page->end; __p = get_freepointer((__s),\ 336 for (__p = (__free); __p; __p = get_freepointer((__s), __p))
353 __p))
354 337
355/* Determine object index from a given position */ 338/* Determine object index from a given position */
356static inline int slab_index(void *p, struct kmem_cache *s, void *addr) 339static inline int slab_index(void *p, struct kmem_cache *s, void *addr)
@@ -502,7 +485,7 @@ static void slab_fix(struct kmem_cache *s, char *fmt, ...)
502static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p) 485static void print_trailer(struct kmem_cache *s, struct page *page, u8 *p)
503{ 486{
504 unsigned int off; /* Offset of last byte */ 487 unsigned int off; /* Offset of last byte */
505 u8 *addr = slab_address(page); 488 u8 *addr = page_address(page);
506 489
507 print_tracking(s, p); 490 print_tracking(s, p);
508 491
@@ -637,7 +620,7 @@ static int check_bytes_and_report(struct kmem_cache *s, struct page *page,
637 * A. Free pointer (if we cannot overwrite object on free) 620 * A. Free pointer (if we cannot overwrite object on free)
638 * B. Tracking data for SLAB_STORE_USER 621 * B. Tracking data for SLAB_STORE_USER
639 * C. Padding to reach required alignment boundary or at mininum 622 * C. Padding to reach required alignment boundary or at mininum
640 * one word if debuggin is on to be able to detect writes 623 * one word if debugging is on to be able to detect writes
641 * before the word boundary. 624 * before the word boundary.
642 * 625 *
643 * Padding is done using 0x5a (POISON_INUSE) 626 * Padding is done using 0x5a (POISON_INUSE)
@@ -680,7 +663,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page)
680 if (!(s->flags & SLAB_POISON)) 663 if (!(s->flags & SLAB_POISON))
681 return 1; 664 return 1;
682 665
683 start = slab_address(page); 666 start = page_address(page);
684 end = start + (PAGE_SIZE << s->order); 667 end = start + (PAGE_SIZE << s->order);
685 length = s->objects * s->size; 668 length = s->objects * s->size;
686 remainder = end - (start + length); 669 remainder = end - (start + length);
@@ -748,7 +731,7 @@ static int check_object(struct kmem_cache *s, struct page *page,
748 * of the free objects in this slab. May cause 731 * of the free objects in this slab. May cause
749 * another error because the object count is now wrong. 732 * another error because the object count is now wrong.
750 */ 733 */
751 set_freepointer(s, p, page->end); 734 set_freepointer(s, p, NULL);
752 return 0; 735 return 0;
753 } 736 }
754 return 1; 737 return 1;
@@ -782,18 +765,18 @@ static int on_freelist(struct kmem_cache *s, struct page *page, void *search)
782 void *fp = page->freelist; 765 void *fp = page->freelist;
783 void *object = NULL; 766 void *object = NULL;
784 767
785 while (fp != page->end && nr <= s->objects) { 768 while (fp && nr <= s->objects) {
786 if (fp == search) 769 if (fp == search)
787 return 1; 770 return 1;
788 if (!check_valid_pointer(s, page, fp)) { 771 if (!check_valid_pointer(s, page, fp)) {
789 if (object) { 772 if (object) {
790 object_err(s, page, object, 773 object_err(s, page, object,
791 "Freechain corrupt"); 774 "Freechain corrupt");
792 set_freepointer(s, object, page->end); 775 set_freepointer(s, object, NULL);
793 break; 776 break;
794 } else { 777 } else {
795 slab_err(s, page, "Freepointer corrupt"); 778 slab_err(s, page, "Freepointer corrupt");
796 page->freelist = page->end; 779 page->freelist = NULL;
797 page->inuse = s->objects; 780 page->inuse = s->objects;
798 slab_fix(s, "Freelist cleared"); 781 slab_fix(s, "Freelist cleared");
799 return 0; 782 return 0;
@@ -870,7 +853,7 @@ static int alloc_debug_processing(struct kmem_cache *s, struct page *page,
870 if (!check_slab(s, page)) 853 if (!check_slab(s, page))
871 goto bad; 854 goto bad;
872 855
873 if (object && !on_freelist(s, page, object)) { 856 if (!on_freelist(s, page, object)) {
874 object_err(s, page, object, "Object already allocated"); 857 object_err(s, page, object, "Object already allocated");
875 goto bad; 858 goto bad;
876 } 859 }
@@ -880,7 +863,7 @@ static int alloc_debug_processing(struct kmem_cache *s, struct page *page,
880 goto bad; 863 goto bad;
881 } 864 }
882 865
883 if (object && !check_object(s, page, object, 0)) 866 if (!check_object(s, page, object, 0))
884 goto bad; 867 goto bad;
885 868
886 /* Success perform special debug activities for allocs */ 869 /* Success perform special debug activities for allocs */
@@ -899,7 +882,7 @@ bad:
899 */ 882 */
900 slab_fix(s, "Marking all objects used"); 883 slab_fix(s, "Marking all objects used");
901 page->inuse = s->objects; 884 page->inuse = s->objects;
902 page->freelist = page->end; 885 page->freelist = NULL;
903 } 886 }
904 return 0; 887 return 0;
905} 888}
@@ -939,7 +922,7 @@ static int free_debug_processing(struct kmem_cache *s, struct page *page,
939 } 922 }
940 923
941 /* Special debug activities for freeing objects */ 924 /* Special debug activities for freeing objects */
942 if (!SlabFrozen(page) && page->freelist == page->end) 925 if (!SlabFrozen(page) && !page->freelist)
943 remove_full(s, page); 926 remove_full(s, page);
944 if (s->flags & SLAB_STORE_USER) 927 if (s->flags & SLAB_STORE_USER)
945 set_track(s, object, TRACK_FREE, addr); 928 set_track(s, object, TRACK_FREE, addr);
@@ -1015,30 +998,11 @@ static unsigned long kmem_cache_flags(unsigned long objsize,
1015 void (*ctor)(struct kmem_cache *, void *)) 998 void (*ctor)(struct kmem_cache *, void *))
1016{ 999{
1017 /* 1000 /*
1018 * The page->offset field is only 16 bit wide. This is an offset 1001 * Enable debugging if selected on the kernel commandline.
1019 * in units of words from the beginning of an object. If the slab
1020 * size is bigger then we cannot move the free pointer behind the
1021 * object anymore.
1022 *
1023 * On 32 bit platforms the limit is 256k. On 64bit platforms
1024 * the limit is 512k.
1025 *
1026 * Debugging or ctor may create a need to move the free
1027 * pointer. Fail if this happens.
1028 */ 1002 */
1029 if (objsize >= 65535 * sizeof(void *)) { 1003 if (slub_debug && (!slub_debug_slabs ||
1030 BUG_ON(flags & (SLAB_RED_ZONE | SLAB_POISON | 1004 strncmp(slub_debug_slabs, name, strlen(slub_debug_slabs)) == 0))
1031 SLAB_STORE_USER | SLAB_DESTROY_BY_RCU)); 1005 flags |= slub_debug;
1032 BUG_ON(ctor);
1033 } else {
1034 /*
1035 * Enable debugging if selected on the kernel commandline.
1036 */
1037 if (slub_debug && (!slub_debug_slabs ||
1038 strncmp(slub_debug_slabs, name,
1039 strlen(slub_debug_slabs)) == 0))
1040 flags |= slub_debug;
1041 }
1042 1006
1043 return flags; 1007 return flags;
1044} 1008}
@@ -1124,7 +1088,6 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
1124 SetSlabDebug(page); 1088 SetSlabDebug(page);
1125 1089
1126 start = page_address(page); 1090 start = page_address(page);
1127 page->end = start + 1;
1128 1091
1129 if (unlikely(s->flags & SLAB_POISON)) 1092 if (unlikely(s->flags & SLAB_POISON))
1130 memset(start, POISON_INUSE, PAGE_SIZE << s->order); 1093 memset(start, POISON_INUSE, PAGE_SIZE << s->order);
@@ -1136,7 +1099,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
1136 last = p; 1099 last = p;
1137 } 1100 }
1138 setup_object(s, page, last); 1101 setup_object(s, page, last);
1139 set_freepointer(s, last, page->end); 1102 set_freepointer(s, last, NULL);
1140 1103
1141 page->freelist = start; 1104 page->freelist = start;
1142 page->inuse = 0; 1105 page->inuse = 0;
@@ -1152,7 +1115,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
1152 void *p; 1115 void *p;
1153 1116
1154 slab_pad_check(s, page); 1117 slab_pad_check(s, page);
1155 for_each_object(p, s, slab_address(page)) 1118 for_each_object(p, s, page_address(page))
1156 check_object(s, page, p, 0); 1119 check_object(s, page, p, 0);
1157 ClearSlabDebug(page); 1120 ClearSlabDebug(page);
1158 } 1121 }
@@ -1162,7 +1125,6 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
1162 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, 1125 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
1163 -pages); 1126 -pages);
1164 1127
1165 page->mapping = NULL;
1166 __free_pages(page, s->order); 1128 __free_pages(page, s->order);
1167} 1129}
1168 1130
@@ -1307,7 +1269,7 @@ static struct page *get_any_partial(struct kmem_cache *s, gfp_t flags)
1307 * may return off node objects because partial slabs are obtained 1269 * may return off node objects because partial slabs are obtained
1308 * from other nodes and filled up. 1270 * from other nodes and filled up.
1309 * 1271 *
1310 * If /sys/slab/xx/defrag_ratio is set to 100 (which makes 1272 * If /sys/kernel/slab/xx/defrag_ratio is set to 100 (which makes
1311 * defrag_ratio = 1000) then every (well almost) allocation will 1273 * defrag_ratio = 1000) then every (well almost) allocation will
1312 * first attempt to defrag slab caches on other nodes. This means 1274 * first attempt to defrag slab caches on other nodes. This means
1313 * scanning over all nodes to look for partial slabs which may be 1275 * scanning over all nodes to look for partial slabs which may be
@@ -1366,7 +1328,7 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
1366 ClearSlabFrozen(page); 1328 ClearSlabFrozen(page);
1367 if (page->inuse) { 1329 if (page->inuse) {
1368 1330
1369 if (page->freelist != page->end) { 1331 if (page->freelist) {
1370 add_partial(n, page, tail); 1332 add_partial(n, page, tail);
1371 stat(c, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD); 1333 stat(c, tail ? DEACTIVATE_TO_TAIL : DEACTIVATE_TO_HEAD);
1372 } else { 1334 } else {
@@ -1382,9 +1344,11 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
1382 * Adding an empty slab to the partial slabs in order 1344 * Adding an empty slab to the partial slabs in order
1383 * to avoid page allocator overhead. This slab needs 1345 * to avoid page allocator overhead. This slab needs
1384 * to come after the other slabs with objects in 1346 * to come after the other slabs with objects in
1385 * order to fill them up. That way the size of the 1347 * so that the others get filled first. That way the
1386 * partial list stays small. kmem_cache_shrink can 1348 * size of the partial list stays small.
1387 * reclaim empty slabs from the partial list. 1349 *
1350 * kmem_cache_shrink can reclaim any empty slabs from the
1351 * partial list.
1388 */ 1352 */
1389 add_partial(n, page, 1); 1353 add_partial(n, page, 1);
1390 slab_unlock(page); 1354 slab_unlock(page);
@@ -1407,15 +1371,11 @@ static void deactivate_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1407 if (c->freelist) 1371 if (c->freelist)
1408 stat(c, DEACTIVATE_REMOTE_FREES); 1372 stat(c, DEACTIVATE_REMOTE_FREES);
1409 /* 1373 /*
1410 * Merge cpu freelist into freelist. Typically we get here 1374 * Merge cpu freelist into slab freelist. Typically we get here
1411 * because both freelists are empty. So this is unlikely 1375 * because both freelists are empty. So this is unlikely
1412 * to occur. 1376 * to occur.
1413 *
1414 * We need to use _is_end here because deactivate slab may
1415 * be called for a debug slab. Then c->freelist may contain
1416 * a dummy pointer.
1417 */ 1377 */
1418 while (unlikely(!is_end(c->freelist))) { 1378 while (unlikely(c->freelist)) {
1419 void **object; 1379 void **object;
1420 1380
1421 tail = 0; /* Hot objects. Put the slab first */ 1381 tail = 0; /* Hot objects. Put the slab first */
@@ -1442,6 +1402,7 @@ static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
1442 1402
1443/* 1403/*
1444 * Flush cpu slab. 1404 * Flush cpu slab.
1405 *
1445 * Called from IPI handler with interrupts disabled. 1406 * Called from IPI handler with interrupts disabled.
1446 */ 1407 */
1447static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu) 1408static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
@@ -1500,7 +1461,8 @@ static inline int node_match(struct kmem_cache_cpu *c, int node)
1500 * rest of the freelist to the lockless freelist. 1461 * rest of the freelist to the lockless freelist.
1501 * 1462 *
1502 * And if we were unable to get a new slab from the partial slab lists then 1463 * And if we were unable to get a new slab from the partial slab lists then
1503 * we need to allocate a new slab. This is slowest path since we may sleep. 1464 * we need to allocate a new slab. This is the slowest path since it involves
1465 * a call to the page allocator and the setup of a new slab.
1504 */ 1466 */
1505static void *__slab_alloc(struct kmem_cache *s, 1467static void *__slab_alloc(struct kmem_cache *s,
1506 gfp_t gfpflags, int node, void *addr, struct kmem_cache_cpu *c) 1468 gfp_t gfpflags, int node, void *addr, struct kmem_cache_cpu *c)
@@ -1514,18 +1476,19 @@ static void *__slab_alloc(struct kmem_cache *s,
1514 slab_lock(c->page); 1476 slab_lock(c->page);
1515 if (unlikely(!node_match(c, node))) 1477 if (unlikely(!node_match(c, node)))
1516 goto another_slab; 1478 goto another_slab;
1479
1517 stat(c, ALLOC_REFILL); 1480 stat(c, ALLOC_REFILL);
1481
1518load_freelist: 1482load_freelist:
1519 object = c->page->freelist; 1483 object = c->page->freelist;
1520 if (unlikely(object == c->page->end)) 1484 if (unlikely(!object))
1521 goto another_slab; 1485 goto another_slab;
1522 if (unlikely(SlabDebug(c->page))) 1486 if (unlikely(SlabDebug(c->page)))
1523 goto debug; 1487 goto debug;
1524 1488
1525 object = c->page->freelist;
1526 c->freelist = object[c->offset]; 1489 c->freelist = object[c->offset];
1527 c->page->inuse = s->objects; 1490 c->page->inuse = s->objects;
1528 c->page->freelist = c->page->end; 1491 c->page->freelist = NULL;
1529 c->node = page_to_nid(c->page); 1492 c->node = page_to_nid(c->page);
1530unlock_out: 1493unlock_out:
1531 slab_unlock(c->page); 1494 slab_unlock(c->page);
@@ -1578,7 +1541,6 @@ new_slab:
1578 1541
1579 return NULL; 1542 return NULL;
1580debug: 1543debug:
1581 object = c->page->freelist;
1582 if (!alloc_debug_processing(s, c->page, object, addr)) 1544 if (!alloc_debug_processing(s, c->page, object, addr))
1583 goto another_slab; 1545 goto another_slab;
1584 1546
@@ -1607,7 +1569,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
1607 1569
1608 local_irq_save(flags); 1570 local_irq_save(flags);
1609 c = get_cpu_slab(s, smp_processor_id()); 1571 c = get_cpu_slab(s, smp_processor_id());
1610 if (unlikely(is_end(c->freelist) || !node_match(c, node))) 1572 if (unlikely(!c->freelist || !node_match(c, node)))
1611 1573
1612 object = __slab_alloc(s, gfpflags, node, addr, c); 1574 object = __slab_alloc(s, gfpflags, node, addr, c);
1613 1575
@@ -1659,6 +1621,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
1659 1621
1660 if (unlikely(SlabDebug(page))) 1622 if (unlikely(SlabDebug(page)))
1661 goto debug; 1623 goto debug;
1624
1662checks_ok: 1625checks_ok:
1663 prior = object[offset] = page->freelist; 1626 prior = object[offset] = page->freelist;
1664 page->freelist = object; 1627 page->freelist = object;
@@ -1673,11 +1636,10 @@ checks_ok:
1673 goto slab_empty; 1636 goto slab_empty;
1674 1637
1675 /* 1638 /*
1676 * Objects left in the slab. If it 1639 * Objects left in the slab. If it was not on the partial list before
1677 * was not on the partial list before
1678 * then add it. 1640 * then add it.
1679 */ 1641 */
1680 if (unlikely(prior == page->end)) { 1642 if (unlikely(!prior)) {
1681 add_partial(get_node(s, page_to_nid(page)), page, 1); 1643 add_partial(get_node(s, page_to_nid(page)), page, 1);
1682 stat(c, FREE_ADD_PARTIAL); 1644 stat(c, FREE_ADD_PARTIAL);
1683 } 1645 }
@@ -1687,7 +1649,7 @@ out_unlock:
1687 return; 1649 return;
1688 1650
1689slab_empty: 1651slab_empty:
1690 if (prior != page->end) { 1652 if (prior) {
1691 /* 1653 /*
1692 * Slab still on the partial list. 1654 * Slab still on the partial list.
1693 */ 1655 */
@@ -1724,8 +1686,8 @@ static __always_inline void slab_free(struct kmem_cache *s,
1724 unsigned long flags; 1686 unsigned long flags;
1725 1687
1726 local_irq_save(flags); 1688 local_irq_save(flags);
1727 debug_check_no_locks_freed(object, s->objsize);
1728 c = get_cpu_slab(s, smp_processor_id()); 1689 c = get_cpu_slab(s, smp_processor_id());
1690 debug_check_no_locks_freed(object, c->objsize);
1729 if (likely(page == c->page && c->node >= 0)) { 1691 if (likely(page == c->page && c->node >= 0)) {
1730 object[c->offset] = c->freelist; 1692 object[c->offset] = c->freelist;
1731 c->freelist = object; 1693 c->freelist = object;
@@ -1888,13 +1850,11 @@ static unsigned long calculate_alignment(unsigned long flags,
1888 unsigned long align, unsigned long size) 1850 unsigned long align, unsigned long size)
1889{ 1851{
1890 /* 1852 /*
1891 * If the user wants hardware cache aligned objects then 1853 * If the user wants hardware cache aligned objects then follow that
1892 * follow that suggestion if the object is sufficiently 1854 * suggestion if the object is sufficiently large.
1893 * large.
1894 * 1855 *
1895 * The hardware cache alignment cannot override the 1856 * The hardware cache alignment cannot override the specified
1896 * specified alignment though. If that is greater 1857 * alignment though. If that is greater then use it.
1897 * then use it.
1898 */ 1858 */
1899 if ((flags & SLAB_HWCACHE_ALIGN) && 1859 if ((flags & SLAB_HWCACHE_ALIGN) &&
1900 size > cache_line_size() / 2) 1860 size > cache_line_size() / 2)
@@ -1910,7 +1870,7 @@ static void init_kmem_cache_cpu(struct kmem_cache *s,
1910 struct kmem_cache_cpu *c) 1870 struct kmem_cache_cpu *c)
1911{ 1871{
1912 c->page = NULL; 1872 c->page = NULL;
1913 c->freelist = (void *)PAGE_MAPPING_ANON; 1873 c->freelist = NULL;
1914 c->node = 0; 1874 c->node = 0;
1915 c->offset = s->offset / sizeof(void *); 1875 c->offset = s->offset / sizeof(void *);
1916 c->objsize = s->objsize; 1876 c->objsize = s->objsize;
@@ -2092,6 +2052,7 @@ static struct kmem_cache_node *early_kmem_cache_node_alloc(gfp_t gfpflags,
2092#endif 2052#endif
2093 init_kmem_cache_node(n); 2053 init_kmem_cache_node(n);
2094 atomic_long_inc(&n->nr_slabs); 2054 atomic_long_inc(&n->nr_slabs);
2055
2095 /* 2056 /*
2096 * lockdep requires consistent irq usage for each lock 2057 * lockdep requires consistent irq usage for each lock
2097 * so even though there cannot be a race this early in 2058 * so even though there cannot be a race this early in
@@ -2173,6 +2134,14 @@ static int calculate_sizes(struct kmem_cache *s)
2173 unsigned long align = s->align; 2134 unsigned long align = s->align;
2174 2135
2175 /* 2136 /*
2137 * Round up object size to the next word boundary. We can only
2138 * place the free pointer at word boundaries and this determines
2139 * the possible location of the free pointer.
2140 */
2141 size = ALIGN(size, sizeof(void *));
2142
2143#ifdef CONFIG_SLUB_DEBUG
2144 /*
2176 * Determine if we can poison the object itself. If the user of 2145 * Determine if we can poison the object itself. If the user of
2177 * the slab may touch the object after free or before allocation 2146 * the slab may touch the object after free or before allocation
2178 * then we should never poison the object itself. 2147 * then we should never poison the object itself.
@@ -2183,14 +2152,7 @@ static int calculate_sizes(struct kmem_cache *s)
2183 else 2152 else
2184 s->flags &= ~__OBJECT_POISON; 2153 s->flags &= ~__OBJECT_POISON;
2185 2154
2186 /*
2187 * Round up object size to the next word boundary. We can only
2188 * place the free pointer at word boundaries and this determines
2189 * the possible location of the free pointer.
2190 */
2191 size = ALIGN(size, sizeof(void *));
2192 2155
2193#ifdef CONFIG_SLUB_DEBUG
2194 /* 2156 /*
2195 * If we are Redzoning then check if there is some space between the 2157 * If we are Redzoning then check if there is some space between the
2196 * end of the object and the free pointer. If not then add an 2158 * end of the object and the free pointer. If not then add an
@@ -2343,7 +2305,7 @@ int kmem_ptr_validate(struct kmem_cache *s, const void *object)
2343 /* 2305 /*
2344 * We could also check if the object is on the slabs freelist. 2306 * We could also check if the object is on the slabs freelist.
2345 * But this would be too expensive and it seems that the main 2307 * But this would be too expensive and it seems that the main
2346 * purpose of kmem_ptr_valid is to check if the object belongs 2308 * purpose of kmem_ptr_valid() is to check if the object belongs
2347 * to a certain slab. 2309 * to a certain slab.
2348 */ 2310 */
2349 return 1; 2311 return 1;
@@ -2630,13 +2592,24 @@ void *__kmalloc(size_t size, gfp_t flags)
2630} 2592}
2631EXPORT_SYMBOL(__kmalloc); 2593EXPORT_SYMBOL(__kmalloc);
2632 2594
2595static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
2596{
2597 struct page *page = alloc_pages_node(node, flags | __GFP_COMP,
2598 get_order(size));
2599
2600 if (page)
2601 return page_address(page);
2602 else
2603 return NULL;
2604}
2605
2633#ifdef CONFIG_NUMA 2606#ifdef CONFIG_NUMA
2634void *__kmalloc_node(size_t size, gfp_t flags, int node) 2607void *__kmalloc_node(size_t size, gfp_t flags, int node)
2635{ 2608{
2636 struct kmem_cache *s; 2609 struct kmem_cache *s;
2637 2610
2638 if (unlikely(size > PAGE_SIZE)) 2611 if (unlikely(size > PAGE_SIZE))
2639 return kmalloc_large(size, flags); 2612 return kmalloc_large_node(size, flags, node);
2640 2613
2641 s = get_slab(size, flags); 2614 s = get_slab(size, flags);
2642 2615
@@ -2653,19 +2626,17 @@ size_t ksize(const void *object)
2653 struct page *page; 2626 struct page *page;
2654 struct kmem_cache *s; 2627 struct kmem_cache *s;
2655 2628
2656 BUG_ON(!object);
2657 if (unlikely(object == ZERO_SIZE_PTR)) 2629 if (unlikely(object == ZERO_SIZE_PTR))
2658 return 0; 2630 return 0;
2659 2631
2660 page = virt_to_head_page(object); 2632 page = virt_to_head_page(object);
2661 BUG_ON(!page);
2662 2633
2663 if (unlikely(!PageSlab(page))) 2634 if (unlikely(!PageSlab(page)))
2664 return PAGE_SIZE << compound_order(page); 2635 return PAGE_SIZE << compound_order(page);
2665 2636
2666 s = page->slab; 2637 s = page->slab;
2667 BUG_ON(!s);
2668 2638
2639#ifdef CONFIG_SLUB_DEBUG
2669 /* 2640 /*
2670 * Debugging requires use of the padding between object 2641 * Debugging requires use of the padding between object
2671 * and whatever may come after it. 2642 * and whatever may come after it.
@@ -2673,6 +2644,7 @@ size_t ksize(const void *object)
2673 if (s->flags & (SLAB_RED_ZONE | SLAB_POISON)) 2644 if (s->flags & (SLAB_RED_ZONE | SLAB_POISON))
2674 return s->objsize; 2645 return s->objsize;
2675 2646
2647#endif
2676 /* 2648 /*
2677 * If we have the need to store the freelist pointer 2649 * If we have the need to store the freelist pointer
2678 * back there or track user information then we can 2650 * back there or track user information then we can
@@ -2680,7 +2652,6 @@ size_t ksize(const void *object)
2680 */ 2652 */
2681 if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER)) 2653 if (s->flags & (SLAB_DESTROY_BY_RCU | SLAB_STORE_USER))
2682 return s->inuse; 2654 return s->inuse;
2683
2684 /* 2655 /*
2685 * Else we can use all the padding etc for the allocation 2656 * Else we can use all the padding etc for the allocation
2686 */ 2657 */
@@ -2957,7 +2928,7 @@ void __init kmem_cache_init(void)
2957 /* 2928 /*
2958 * Patch up the size_index table if we have strange large alignment 2929 * Patch up the size_index table if we have strange large alignment
2959 * requirements for the kmalloc array. This is only the case for 2930 * requirements for the kmalloc array. This is only the case for
2960 * mips it seems. The standard arches will not generate any code here. 2931 * MIPS it seems. The standard arches will not generate any code here.
2961 * 2932 *
2962 * Largest permitted alignment is 256 bytes due to the way we 2933 * Largest permitted alignment is 256 bytes due to the way we
2963 * handle the index determination for the smaller caches. 2934 * handle the index determination for the smaller caches.
@@ -2986,7 +2957,6 @@ void __init kmem_cache_init(void)
2986 kmem_size = sizeof(struct kmem_cache); 2957 kmem_size = sizeof(struct kmem_cache);
2987#endif 2958#endif
2988 2959
2989
2990 printk(KERN_INFO 2960 printk(KERN_INFO
2991 "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d," 2961 "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d,"
2992 " CPUs=%d, Nodes=%d\n", 2962 " CPUs=%d, Nodes=%d\n",
@@ -3083,12 +3053,15 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size,
3083 */ 3053 */
3084 for_each_online_cpu(cpu) 3054 for_each_online_cpu(cpu)
3085 get_cpu_slab(s, cpu)->objsize = s->objsize; 3055 get_cpu_slab(s, cpu)->objsize = s->objsize;
3056
3086 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *))); 3057 s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
3087 up_write(&slub_lock); 3058 up_write(&slub_lock);
3059
3088 if (sysfs_slab_alias(s, name)) 3060 if (sysfs_slab_alias(s, name))
3089 goto err; 3061 goto err;
3090 return s; 3062 return s;
3091 } 3063 }
3064
3092 s = kmalloc(kmem_size, GFP_KERNEL); 3065 s = kmalloc(kmem_size, GFP_KERNEL);
3093 if (s) { 3066 if (s) {
3094 if (kmem_cache_open(s, GFP_KERNEL, name, 3067 if (kmem_cache_open(s, GFP_KERNEL, name,
@@ -3184,7 +3157,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
3184 struct kmem_cache *s; 3157 struct kmem_cache *s;
3185 3158
3186 if (unlikely(size > PAGE_SIZE)) 3159 if (unlikely(size > PAGE_SIZE))
3187 return kmalloc_large(size, gfpflags); 3160 return kmalloc_large_node(size, gfpflags, node);
3188 3161
3189 s = get_slab(size, gfpflags); 3162 s = get_slab(size, gfpflags);
3190 3163
@@ -3199,7 +3172,7 @@ static int validate_slab(struct kmem_cache *s, struct page *page,
3199 unsigned long *map) 3172 unsigned long *map)
3200{ 3173{
3201 void *p; 3174 void *p;
3202 void *addr = slab_address(page); 3175 void *addr = page_address(page);
3203 3176
3204 if (!check_slab(s, page) || 3177 if (!check_slab(s, page) ||
3205 !on_freelist(s, page, NULL)) 3178 !on_freelist(s, page, NULL))
@@ -3482,7 +3455,7 @@ static int add_location(struct loc_track *t, struct kmem_cache *s,
3482static void process_slab(struct loc_track *t, struct kmem_cache *s, 3455static void process_slab(struct loc_track *t, struct kmem_cache *s,
3483 struct page *page, enum track_item alloc) 3456 struct page *page, enum track_item alloc)
3484{ 3457{
3485 void *addr = slab_address(page); 3458 void *addr = page_address(page);
3486 DECLARE_BITMAP(map, s->objects); 3459 DECLARE_BITMAP(map, s->objects);
3487 void *p; 3460 void *p;
3488 3461
@@ -3591,8 +3564,8 @@ enum slab_stat_type {
3591#define SO_CPU (1 << SL_CPU) 3564#define SO_CPU (1 << SL_CPU)
3592#define SO_OBJECTS (1 << SL_OBJECTS) 3565#define SO_OBJECTS (1 << SL_OBJECTS)
3593 3566
3594static unsigned long slab_objects(struct kmem_cache *s, 3567static ssize_t show_slab_objects(struct kmem_cache *s,
3595 char *buf, unsigned long flags) 3568 char *buf, unsigned long flags)
3596{ 3569{
3597 unsigned long total = 0; 3570 unsigned long total = 0;
3598 int cpu; 3571 int cpu;
@@ -3602,6 +3575,8 @@ static unsigned long slab_objects(struct kmem_cache *s,
3602 unsigned long *per_cpu; 3575 unsigned long *per_cpu;
3603 3576
3604 nodes = kzalloc(2 * sizeof(unsigned long) * nr_node_ids, GFP_KERNEL); 3577 nodes = kzalloc(2 * sizeof(unsigned long) * nr_node_ids, GFP_KERNEL);
3578 if (!nodes)
3579 return -ENOMEM;
3605 per_cpu = nodes + nr_node_ids; 3580 per_cpu = nodes + nr_node_ids;
3606 3581
3607 for_each_possible_cpu(cpu) { 3582 for_each_possible_cpu(cpu) {
@@ -3754,25 +3729,25 @@ SLAB_ATTR_RO(aliases);
3754 3729
3755static ssize_t slabs_show(struct kmem_cache *s, char *buf) 3730static ssize_t slabs_show(struct kmem_cache *s, char *buf)
3756{ 3731{
3757 return slab_objects(s, buf, SO_FULL|SO_PARTIAL|SO_CPU); 3732 return show_slab_objects(s, buf, SO_FULL|SO_PARTIAL|SO_CPU);
3758} 3733}
3759SLAB_ATTR_RO(slabs); 3734SLAB_ATTR_RO(slabs);
3760 3735
3761static ssize_t partial_show(struct kmem_cache *s, char *buf) 3736static ssize_t partial_show(struct kmem_cache *s, char *buf)
3762{ 3737{
3763 return slab_objects(s, buf, SO_PARTIAL); 3738 return show_slab_objects(s, buf, SO_PARTIAL);
3764} 3739}
3765SLAB_ATTR_RO(partial); 3740SLAB_ATTR_RO(partial);
3766 3741
3767static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf) 3742static ssize_t cpu_slabs_show(struct kmem_cache *s, char *buf)
3768{ 3743{
3769 return slab_objects(s, buf, SO_CPU); 3744 return show_slab_objects(s, buf, SO_CPU);
3770} 3745}
3771SLAB_ATTR_RO(cpu_slabs); 3746SLAB_ATTR_RO(cpu_slabs);
3772 3747
3773static ssize_t objects_show(struct kmem_cache *s, char *buf) 3748static ssize_t objects_show(struct kmem_cache *s, char *buf)
3774{ 3749{
3775 return slab_objects(s, buf, SO_FULL|SO_PARTIAL|SO_CPU|SO_OBJECTS); 3750 return show_slab_objects(s, buf, SO_FULL|SO_PARTIAL|SO_CPU|SO_OBJECTS);
3776} 3751}
3777SLAB_ATTR_RO(objects); 3752SLAB_ATTR_RO(objects);
3778 3753
@@ -3971,7 +3946,6 @@ SLAB_ATTR(remote_node_defrag_ratio);
3971#endif 3946#endif
3972 3947
3973#ifdef CONFIG_SLUB_STATS 3948#ifdef CONFIG_SLUB_STATS
3974
3975static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si) 3949static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
3976{ 3950{
3977 unsigned long sum = 0; 3951 unsigned long sum = 0;
@@ -4155,8 +4129,8 @@ static struct kset *slab_kset;
4155#define ID_STR_LENGTH 64 4129#define ID_STR_LENGTH 64
4156 4130
4157/* Create a unique string id for a slab cache: 4131/* Create a unique string id for a slab cache:
4158 * format 4132 *
4159 * :[flags-]size:[memory address of kmemcache] 4133 * Format :[flags-]size
4160 */ 4134 */
4161static char *create_unique_id(struct kmem_cache *s) 4135static char *create_unique_id(struct kmem_cache *s)
4162{ 4136{