aboutsummaryrefslogtreecommitdiffstats
path: root/mm/slab.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/slab.c')
-rw-r--r--mm/slab.c269
1 files changed, 128 insertions, 141 deletions
diff --git a/mm/slab.c b/mm/slab.c
index 9a90b00d2f91..e74a16e4ced6 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -102,17 +102,19 @@
102#include <linux/cpu.h> 102#include <linux/cpu.h>
103#include <linux/sysctl.h> 103#include <linux/sysctl.h>
104#include <linux/module.h> 104#include <linux/module.h>
105#include <trace/kmemtrace.h> 105#include <linux/kmemtrace.h>
106#include <linux/rcupdate.h> 106#include <linux/rcupdate.h>
107#include <linux/string.h> 107#include <linux/string.h>
108#include <linux/uaccess.h> 108#include <linux/uaccess.h>
109#include <linux/nodemask.h> 109#include <linux/nodemask.h>
110#include <linux/kmemleak.h>
110#include <linux/mempolicy.h> 111#include <linux/mempolicy.h>
111#include <linux/mutex.h> 112#include <linux/mutex.h>
112#include <linux/fault-inject.h> 113#include <linux/fault-inject.h>
113#include <linux/rtmutex.h> 114#include <linux/rtmutex.h>
114#include <linux/reciprocal_div.h> 115#include <linux/reciprocal_div.h>
115#include <linux/debugobjects.h> 116#include <linux/debugobjects.h>
117#include <linux/kmemcheck.h>
116 118
117#include <asm/cacheflush.h> 119#include <asm/cacheflush.h>
118#include <asm/tlbflush.h> 120#include <asm/tlbflush.h>
@@ -178,13 +180,13 @@
178 SLAB_STORE_USER | \ 180 SLAB_STORE_USER | \
179 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ 181 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
180 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \ 182 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \
181 SLAB_DEBUG_OBJECTS) 183 SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE | SLAB_NOTRACK)
182#else 184#else
183# define CREATE_MASK (SLAB_HWCACHE_ALIGN | \ 185# define CREATE_MASK (SLAB_HWCACHE_ALIGN | \
184 SLAB_CACHE_DMA | \ 186 SLAB_CACHE_DMA | \
185 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ 187 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
186 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \ 188 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \
187 SLAB_DEBUG_OBJECTS) 189 SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE | SLAB_NOTRACK)
188#endif 190#endif
189 191
190/* 192/*
@@ -315,7 +317,7 @@ static int drain_freelist(struct kmem_cache *cache,
315 struct kmem_list3 *l3, int tofree); 317 struct kmem_list3 *l3, int tofree);
316static void free_block(struct kmem_cache *cachep, void **objpp, int len, 318static void free_block(struct kmem_cache *cachep, void **objpp, int len,
317 int node); 319 int node);
318static int enable_cpucache(struct kmem_cache *cachep); 320static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp);
319static void cache_reap(struct work_struct *unused); 321static void cache_reap(struct work_struct *unused);
320 322
321/* 323/*
@@ -373,87 +375,6 @@ static void kmem_list3_init(struct kmem_list3 *parent)
373 MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \ 375 MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \
374 } while (0) 376 } while (0)
375 377
376/*
377 * struct kmem_cache
378 *
379 * manages a cache.
380 */
381
382struct kmem_cache {
383/* 1) per-cpu data, touched during every alloc/free */
384 struct array_cache *array[NR_CPUS];
385/* 2) Cache tunables. Protected by cache_chain_mutex */
386 unsigned int batchcount;
387 unsigned int limit;
388 unsigned int shared;
389
390 unsigned int buffer_size;
391 u32 reciprocal_buffer_size;
392/* 3) touched by every alloc & free from the backend */
393
394 unsigned int flags; /* constant flags */
395 unsigned int num; /* # of objs per slab */
396
397/* 4) cache_grow/shrink */
398 /* order of pgs per slab (2^n) */
399 unsigned int gfporder;
400
401 /* force GFP flags, e.g. GFP_DMA */
402 gfp_t gfpflags;
403
404 size_t colour; /* cache colouring range */
405 unsigned int colour_off; /* colour offset */
406 struct kmem_cache *slabp_cache;
407 unsigned int slab_size;
408 unsigned int dflags; /* dynamic flags */
409
410 /* constructor func */
411 void (*ctor)(void *obj);
412
413/* 5) cache creation/removal */
414 const char *name;
415 struct list_head next;
416
417/* 6) statistics */
418#if STATS
419 unsigned long num_active;
420 unsigned long num_allocations;
421 unsigned long high_mark;
422 unsigned long grown;
423 unsigned long reaped;
424 unsigned long errors;
425 unsigned long max_freeable;
426 unsigned long node_allocs;
427 unsigned long node_frees;
428 unsigned long node_overflow;
429 atomic_t allochit;
430 atomic_t allocmiss;
431 atomic_t freehit;
432 atomic_t freemiss;
433#endif
434#if DEBUG
435 /*
436 * If debugging is enabled, then the allocator can add additional
437 * fields and/or padding to every object. buffer_size contains the total
438 * object size including these internal fields, the following two
439 * variables contain the offset to the user object and its size.
440 */
441 int obj_offset;
442 int obj_size;
443#endif
444 /*
445 * We put nodelists[] at the end of kmem_cache, because we want to size
446 * this array to nr_node_ids slots instead of MAX_NUMNODES
447 * (see kmem_cache_init())
448 * We still use [MAX_NUMNODES] and not [1] or [0] because cache_cache
449 * is statically defined, so we reserve the max number of nodes.
450 */
451 struct kmem_list3 *nodelists[MAX_NUMNODES];
452 /*
453 * Do not add fields after nodelists[]
454 */
455};
456
457#define CFLGS_OFF_SLAB (0x80000000UL) 378#define CFLGS_OFF_SLAB (0x80000000UL)
458#define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB) 379#define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB)
459 380
@@ -752,6 +673,7 @@ static enum {
752 NONE, 673 NONE,
753 PARTIAL_AC, 674 PARTIAL_AC,
754 PARTIAL_L3, 675 PARTIAL_L3,
676 EARLY,
755 FULL 677 FULL
756} g_cpucache_up; 678} g_cpucache_up;
757 679
@@ -760,7 +682,7 @@ static enum {
760 */ 682 */
761int slab_is_available(void) 683int slab_is_available(void)
762{ 684{
763 return g_cpucache_up == FULL; 685 return g_cpucache_up >= EARLY;
764} 686}
765 687
766static DEFINE_PER_CPU(struct delayed_work, reap_work); 688static DEFINE_PER_CPU(struct delayed_work, reap_work);
@@ -890,7 +812,6 @@ static void __slab_error(const char *function, struct kmem_cache *cachep,
890 */ 812 */
891 813
892static int use_alien_caches __read_mostly = 1; 814static int use_alien_caches __read_mostly = 1;
893static int numa_platform __read_mostly = 1;
894static int __init noaliencache_setup(char *s) 815static int __init noaliencache_setup(char *s)
895{ 816{
896 use_alien_caches = 0; 817 use_alien_caches = 0;
@@ -958,12 +879,20 @@ static void __cpuinit start_cpu_timer(int cpu)
958} 879}
959 880
960static struct array_cache *alloc_arraycache(int node, int entries, 881static struct array_cache *alloc_arraycache(int node, int entries,
961 int batchcount) 882 int batchcount, gfp_t gfp)
962{ 883{
963 int memsize = sizeof(void *) * entries + sizeof(struct array_cache); 884 int memsize = sizeof(void *) * entries + sizeof(struct array_cache);
964 struct array_cache *nc = NULL; 885 struct array_cache *nc = NULL;
965 886
966 nc = kmalloc_node(memsize, GFP_KERNEL, node); 887 nc = kmalloc_node(memsize, gfp, node);
888 /*
889 * The array_cache structures contain pointers to free object.
890 * However, when such objects are allocated or transfered to another
891 * cache the pointers are not cleared and they could be counted as
892 * valid references during a kmemleak scan. Therefore, kmemleak must
893 * not scan such objects.
894 */
895 kmemleak_no_scan(nc);
967 if (nc) { 896 if (nc) {
968 nc->avail = 0; 897 nc->avail = 0;
969 nc->limit = entries; 898 nc->limit = entries;
@@ -1003,7 +932,7 @@ static int transfer_objects(struct array_cache *to,
1003#define drain_alien_cache(cachep, alien) do { } while (0) 932#define drain_alien_cache(cachep, alien) do { } while (0)
1004#define reap_alien(cachep, l3) do { } while (0) 933#define reap_alien(cachep, l3) do { } while (0)
1005 934
1006static inline struct array_cache **alloc_alien_cache(int node, int limit) 935static inline struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
1007{ 936{
1008 return (struct array_cache **)BAD_ALIEN_MAGIC; 937 return (struct array_cache **)BAD_ALIEN_MAGIC;
1009} 938}
@@ -1034,7 +963,7 @@ static inline void *____cache_alloc_node(struct kmem_cache *cachep,
1034static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int); 963static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int);
1035static void *alternate_node_alloc(struct kmem_cache *, gfp_t); 964static void *alternate_node_alloc(struct kmem_cache *, gfp_t);
1036 965
1037static struct array_cache **alloc_alien_cache(int node, int limit) 966static struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
1038{ 967{
1039 struct array_cache **ac_ptr; 968 struct array_cache **ac_ptr;
1040 int memsize = sizeof(void *) * nr_node_ids; 969 int memsize = sizeof(void *) * nr_node_ids;
@@ -1042,14 +971,14 @@ static struct array_cache **alloc_alien_cache(int node, int limit)
1042 971
1043 if (limit > 1) 972 if (limit > 1)
1044 limit = 12; 973 limit = 12;
1045 ac_ptr = kmalloc_node(memsize, GFP_KERNEL, node); 974 ac_ptr = kmalloc_node(memsize, gfp, node);
1046 if (ac_ptr) { 975 if (ac_ptr) {
1047 for_each_node(i) { 976 for_each_node(i) {
1048 if (i == node || !node_online(i)) { 977 if (i == node || !node_online(i)) {
1049 ac_ptr[i] = NULL; 978 ac_ptr[i] = NULL;
1050 continue; 979 continue;
1051 } 980 }
1052 ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d); 981 ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d, gfp);
1053 if (!ac_ptr[i]) { 982 if (!ac_ptr[i]) {
1054 for (i--; i >= 0; i--) 983 for (i--; i >= 0; i--)
1055 kfree(ac_ptr[i]); 984 kfree(ac_ptr[i]);
@@ -1282,20 +1211,20 @@ static int __cpuinit cpuup_prepare(long cpu)
1282 struct array_cache **alien = NULL; 1211 struct array_cache **alien = NULL;
1283 1212
1284 nc = alloc_arraycache(node, cachep->limit, 1213 nc = alloc_arraycache(node, cachep->limit,
1285 cachep->batchcount); 1214 cachep->batchcount, GFP_KERNEL);
1286 if (!nc) 1215 if (!nc)
1287 goto bad; 1216 goto bad;
1288 if (cachep->shared) { 1217 if (cachep->shared) {
1289 shared = alloc_arraycache(node, 1218 shared = alloc_arraycache(node,
1290 cachep->shared * cachep->batchcount, 1219 cachep->shared * cachep->batchcount,
1291 0xbaadf00d); 1220 0xbaadf00d, GFP_KERNEL);
1292 if (!shared) { 1221 if (!shared) {
1293 kfree(nc); 1222 kfree(nc);
1294 goto bad; 1223 goto bad;
1295 } 1224 }
1296 } 1225 }
1297 if (use_alien_caches) { 1226 if (use_alien_caches) {
1298 alien = alloc_alien_cache(node, cachep->limit); 1227 alien = alloc_alien_cache(node, cachep->limit, GFP_KERNEL);
1299 if (!alien) { 1228 if (!alien) {
1300 kfree(shared); 1229 kfree(shared);
1301 kfree(nc); 1230 kfree(nc);
@@ -1399,10 +1328,9 @@ static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list,
1399{ 1328{
1400 struct kmem_list3 *ptr; 1329 struct kmem_list3 *ptr;
1401 1330
1402 ptr = kmalloc_node(sizeof(struct kmem_list3), GFP_KERNEL, nodeid); 1331 ptr = kmalloc_node(sizeof(struct kmem_list3), GFP_NOWAIT, nodeid);
1403 BUG_ON(!ptr); 1332 BUG_ON(!ptr);
1404 1333
1405 local_irq_disable();
1406 memcpy(ptr, list, sizeof(struct kmem_list3)); 1334 memcpy(ptr, list, sizeof(struct kmem_list3));
1407 /* 1335 /*
1408 * Do not assume that spinlocks can be initialized via memcpy: 1336 * Do not assume that spinlocks can be initialized via memcpy:
@@ -1411,7 +1339,6 @@ static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list,
1411 1339
1412 MAKE_ALL_LISTS(cachep, ptr, nodeid); 1340 MAKE_ALL_LISTS(cachep, ptr, nodeid);
1413 cachep->nodelists[nodeid] = ptr; 1341 cachep->nodelists[nodeid] = ptr;
1414 local_irq_enable();
1415} 1342}
1416 1343
1417/* 1344/*
@@ -1443,10 +1370,8 @@ void __init kmem_cache_init(void)
1443 int order; 1370 int order;
1444 int node; 1371 int node;
1445 1372
1446 if (num_possible_nodes() == 1) { 1373 if (num_possible_nodes() == 1)
1447 use_alien_caches = 0; 1374 use_alien_caches = 0;
1448 numa_platform = 0;
1449 }
1450 1375
1451 for (i = 0; i < NUM_INIT_LISTS; i++) { 1376 for (i = 0; i < NUM_INIT_LISTS; i++) {
1452 kmem_list3_init(&initkmem_list3[i]); 1377 kmem_list3_init(&initkmem_list3[i]);
@@ -1575,9 +1500,8 @@ void __init kmem_cache_init(void)
1575 { 1500 {
1576 struct array_cache *ptr; 1501 struct array_cache *ptr;
1577 1502
1578 ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); 1503 ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
1579 1504
1580 local_irq_disable();
1581 BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache); 1505 BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache);
1582 memcpy(ptr, cpu_cache_get(&cache_cache), 1506 memcpy(ptr, cpu_cache_get(&cache_cache),
1583 sizeof(struct arraycache_init)); 1507 sizeof(struct arraycache_init));
@@ -1587,11 +1511,9 @@ void __init kmem_cache_init(void)
1587 spin_lock_init(&ptr->lock); 1511 spin_lock_init(&ptr->lock);
1588 1512
1589 cache_cache.array[smp_processor_id()] = ptr; 1513 cache_cache.array[smp_processor_id()] = ptr;
1590 local_irq_enable();
1591 1514
1592 ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); 1515 ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
1593 1516
1594 local_irq_disable();
1595 BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep) 1517 BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep)
1596 != &initarray_generic.cache); 1518 != &initarray_generic.cache);
1597 memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep), 1519 memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep),
@@ -1603,7 +1525,6 @@ void __init kmem_cache_init(void)
1603 1525
1604 malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] = 1526 malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] =
1605 ptr; 1527 ptr;
1606 local_irq_enable();
1607 } 1528 }
1608 /* 5) Replace the bootstrap kmem_list3's */ 1529 /* 5) Replace the bootstrap kmem_list3's */
1609 { 1530 {
@@ -1622,19 +1543,22 @@ void __init kmem_cache_init(void)
1622 } 1543 }
1623 } 1544 }
1624 1545
1625 /* 6) resize the head arrays to their final sizes */ 1546 g_cpucache_up = EARLY;
1626 {
1627 struct kmem_cache *cachep;
1628 mutex_lock(&cache_chain_mutex);
1629 list_for_each_entry(cachep, &cache_chain, next)
1630 if (enable_cpucache(cachep))
1631 BUG();
1632 mutex_unlock(&cache_chain_mutex);
1633 }
1634 1547
1635 /* Annotate slab for lockdep -- annotate the malloc caches */ 1548 /* Annotate slab for lockdep -- annotate the malloc caches */
1636 init_lock_keys(); 1549 init_lock_keys();
1550}
1551
1552void __init kmem_cache_init_late(void)
1553{
1554 struct kmem_cache *cachep;
1637 1555
1556 /* 6) resize the head arrays to their final sizes */
1557 mutex_lock(&cache_chain_mutex);
1558 list_for_each_entry(cachep, &cache_chain, next)
1559 if (enable_cpucache(cachep, GFP_NOWAIT))
1560 BUG();
1561 mutex_unlock(&cache_chain_mutex);
1638 1562
1639 /* Done! */ 1563 /* Done! */
1640 g_cpucache_up = FULL; 1564 g_cpucache_up = FULL;
@@ -1689,7 +1613,7 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
1689 if (cachep->flags & SLAB_RECLAIM_ACCOUNT) 1613 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1690 flags |= __GFP_RECLAIMABLE; 1614 flags |= __GFP_RECLAIMABLE;
1691 1615
1692 page = alloc_pages_node(nodeid, flags, cachep->gfporder); 1616 page = alloc_pages_exact_node(nodeid, flags | __GFP_NOTRACK, cachep->gfporder);
1693 if (!page) 1617 if (!page)
1694 return NULL; 1618 return NULL;
1695 1619
@@ -1702,6 +1626,16 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
1702 NR_SLAB_UNRECLAIMABLE, nr_pages); 1626 NR_SLAB_UNRECLAIMABLE, nr_pages);
1703 for (i = 0; i < nr_pages; i++) 1627 for (i = 0; i < nr_pages; i++)
1704 __SetPageSlab(page + i); 1628 __SetPageSlab(page + i);
1629
1630 if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) {
1631 kmemcheck_alloc_shadow(page, cachep->gfporder, flags, nodeid);
1632
1633 if (cachep->ctor)
1634 kmemcheck_mark_uninitialized_pages(page, nr_pages);
1635 else
1636 kmemcheck_mark_unallocated_pages(page, nr_pages);
1637 }
1638
1705 return page_address(page); 1639 return page_address(page);
1706} 1640}
1707 1641
@@ -1714,6 +1648,8 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr)
1714 struct page *page = virt_to_page(addr); 1648 struct page *page = virt_to_page(addr);
1715 const unsigned long nr_freed = i; 1649 const unsigned long nr_freed = i;
1716 1650
1651 kmemcheck_free_shadow(page, cachep->gfporder);
1652
1717 if (cachep->flags & SLAB_RECLAIM_ACCOUNT) 1653 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1718 sub_zone_page_state(page_zone(page), 1654 sub_zone_page_state(page_zone(page),
1719 NR_SLAB_RECLAIMABLE, nr_freed); 1655 NR_SLAB_RECLAIMABLE, nr_freed);
@@ -2064,10 +2000,10 @@ static size_t calculate_slab_order(struct kmem_cache *cachep,
2064 return left_over; 2000 return left_over;
2065} 2001}
2066 2002
2067static int __init_refok setup_cpu_cache(struct kmem_cache *cachep) 2003static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
2068{ 2004{
2069 if (g_cpucache_up == FULL) 2005 if (g_cpucache_up == FULL)
2070 return enable_cpucache(cachep); 2006 return enable_cpucache(cachep, gfp);
2071 2007
2072 if (g_cpucache_up == NONE) { 2008 if (g_cpucache_up == NONE) {
2073 /* 2009 /*
@@ -2089,7 +2025,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep)
2089 g_cpucache_up = PARTIAL_AC; 2025 g_cpucache_up = PARTIAL_AC;
2090 } else { 2026 } else {
2091 cachep->array[smp_processor_id()] = 2027 cachep->array[smp_processor_id()] =
2092 kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); 2028 kmalloc(sizeof(struct arraycache_init), gfp);
2093 2029
2094 if (g_cpucache_up == PARTIAL_AC) { 2030 if (g_cpucache_up == PARTIAL_AC) {
2095 set_up_list3s(cachep, SIZE_L3); 2031 set_up_list3s(cachep, SIZE_L3);
@@ -2099,7 +2035,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep)
2099 for_each_online_node(node) { 2035 for_each_online_node(node) {
2100 cachep->nodelists[node] = 2036 cachep->nodelists[node] =
2101 kmalloc_node(sizeof(struct kmem_list3), 2037 kmalloc_node(sizeof(struct kmem_list3),
2102 GFP_KERNEL, node); 2038 gfp, node);
2103 BUG_ON(!cachep->nodelists[node]); 2039 BUG_ON(!cachep->nodelists[node]);
2104 kmem_list3_init(cachep->nodelists[node]); 2040 kmem_list3_init(cachep->nodelists[node]);
2105 } 2041 }
@@ -2153,6 +2089,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
2153{ 2089{
2154 size_t left_over, slab_size, ralign; 2090 size_t left_over, slab_size, ralign;
2155 struct kmem_cache *cachep = NULL, *pc; 2091 struct kmem_cache *cachep = NULL, *pc;
2092 gfp_t gfp;
2156 2093
2157 /* 2094 /*
2158 * Sanity checks... these are all serious usage bugs. 2095 * Sanity checks... these are all serious usage bugs.
@@ -2168,8 +2105,10 @@ kmem_cache_create (const char *name, size_t size, size_t align,
2168 * We use cache_chain_mutex to ensure a consistent view of 2105 * We use cache_chain_mutex to ensure a consistent view of
2169 * cpu_online_mask as well. Please see cpuup_callback 2106 * cpu_online_mask as well. Please see cpuup_callback
2170 */ 2107 */
2171 get_online_cpus(); 2108 if (slab_is_available()) {
2172 mutex_lock(&cache_chain_mutex); 2109 get_online_cpus();
2110 mutex_lock(&cache_chain_mutex);
2111 }
2173 2112
2174 list_for_each_entry(pc, &cache_chain, next) { 2113 list_for_each_entry(pc, &cache_chain, next) {
2175 char tmp; 2114 char tmp;
@@ -2278,8 +2217,13 @@ kmem_cache_create (const char *name, size_t size, size_t align,
2278 */ 2217 */
2279 align = ralign; 2218 align = ralign;
2280 2219
2220 if (slab_is_available())
2221 gfp = GFP_KERNEL;
2222 else
2223 gfp = GFP_NOWAIT;
2224
2281 /* Get cache's description obj. */ 2225 /* Get cache's description obj. */
2282 cachep = kmem_cache_zalloc(&cache_cache, GFP_KERNEL); 2226 cachep = kmem_cache_zalloc(&cache_cache, gfp);
2283 if (!cachep) 2227 if (!cachep)
2284 goto oops; 2228 goto oops;
2285 2229
@@ -2353,6 +2297,15 @@ kmem_cache_create (const char *name, size_t size, size_t align,
2353 /* really off slab. No need for manual alignment */ 2297 /* really off slab. No need for manual alignment */
2354 slab_size = 2298 slab_size =
2355 cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab); 2299 cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab);
2300
2301#ifdef CONFIG_PAGE_POISONING
2302 /* If we're going to use the generic kernel_map_pages()
2303 * poisoning, then it's going to smash the contents of
2304 * the redzone and userword anyhow, so switch them off.
2305 */
2306 if (size % PAGE_SIZE == 0 && flags & SLAB_POISON)
2307 flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
2308#endif
2356 } 2309 }
2357 2310
2358 cachep->colour_off = cache_line_size(); 2311 cachep->colour_off = cache_line_size();
@@ -2382,7 +2335,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
2382 cachep->ctor = ctor; 2335 cachep->ctor = ctor;
2383 cachep->name = name; 2336 cachep->name = name;
2384 2337
2385 if (setup_cpu_cache(cachep)) { 2338 if (setup_cpu_cache(cachep, gfp)) {
2386 __kmem_cache_destroy(cachep); 2339 __kmem_cache_destroy(cachep);
2387 cachep = NULL; 2340 cachep = NULL;
2388 goto oops; 2341 goto oops;
@@ -2394,8 +2347,10 @@ oops:
2394 if (!cachep && (flags & SLAB_PANIC)) 2347 if (!cachep && (flags & SLAB_PANIC))
2395 panic("kmem_cache_create(): failed to create slab `%s'\n", 2348 panic("kmem_cache_create(): failed to create slab `%s'\n",
2396 name); 2349 name);
2397 mutex_unlock(&cache_chain_mutex); 2350 if (slab_is_available()) {
2398 put_online_cpus(); 2351 mutex_unlock(&cache_chain_mutex);
2352 put_online_cpus();
2353 }
2399 return cachep; 2354 return cachep;
2400} 2355}
2401EXPORT_SYMBOL(kmem_cache_create); 2356EXPORT_SYMBOL(kmem_cache_create);
@@ -2621,6 +2576,14 @@ static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp,
2621 /* Slab management obj is off-slab. */ 2576 /* Slab management obj is off-slab. */
2622 slabp = kmem_cache_alloc_node(cachep->slabp_cache, 2577 slabp = kmem_cache_alloc_node(cachep->slabp_cache,
2623 local_flags, nodeid); 2578 local_flags, nodeid);
2579 /*
2580 * If the first object in the slab is leaked (it's allocated
2581 * but no one has a reference to it), we want to make sure
2582 * kmemleak does not treat the ->s_mem pointer as a reference
2583 * to the object. Otherwise we will not report the leak.
2584 */
2585 kmemleak_scan_area(slabp, offsetof(struct slab, list),
2586 sizeof(struct list_head), local_flags);
2624 if (!slabp) 2587 if (!slabp)
2625 return NULL; 2588 return NULL;
2626 } else { 2589 } else {
@@ -3141,6 +3104,12 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3141 STATS_INC_ALLOCMISS(cachep); 3104 STATS_INC_ALLOCMISS(cachep);
3142 objp = cache_alloc_refill(cachep, flags); 3105 objp = cache_alloc_refill(cachep, flags);
3143 } 3106 }
3107 /*
3108 * To avoid a false negative, if an object that is in one of the
3109 * per-CPU caches is leaked, we need to make sure kmemleak doesn't
3110 * treat the array pointers as a reference to the object.
3111 */
3112 kmemleak_erase(&ac->entry[ac->avail]);
3144 return objp; 3113 return objp;
3145} 3114}
3146 3115
@@ -3219,7 +3188,7 @@ retry:
3219 if (local_flags & __GFP_WAIT) 3188 if (local_flags & __GFP_WAIT)
3220 local_irq_enable(); 3189 local_irq_enable();
3221 kmem_flagcheck(cache, flags); 3190 kmem_flagcheck(cache, flags);
3222 obj = kmem_getpages(cache, local_flags, -1); 3191 obj = kmem_getpages(cache, local_flags, numa_node_id());
3223 if (local_flags & __GFP_WAIT) 3192 if (local_flags & __GFP_WAIT)
3224 local_irq_disable(); 3193 local_irq_disable();
3225 if (obj) { 3194 if (obj) {
@@ -3327,6 +3296,8 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
3327 unsigned long save_flags; 3296 unsigned long save_flags;
3328 void *ptr; 3297 void *ptr;
3329 3298
3299 flags &= gfp_allowed_mask;
3300
3330 lockdep_trace_alloc(flags); 3301 lockdep_trace_alloc(flags);
3331 3302
3332 if (slab_should_failslab(cachep, flags)) 3303 if (slab_should_failslab(cachep, flags))
@@ -3360,6 +3331,11 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
3360 out: 3331 out:
3361 local_irq_restore(save_flags); 3332 local_irq_restore(save_flags);
3362 ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller); 3333 ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
3334 kmemleak_alloc_recursive(ptr, obj_size(cachep), 1, cachep->flags,
3335 flags);
3336
3337 if (likely(ptr))
3338 kmemcheck_slab_alloc(cachep, flags, ptr, obj_size(cachep));
3363 3339
3364 if (unlikely((flags & __GFP_ZERO) && ptr)) 3340 if (unlikely((flags & __GFP_ZERO) && ptr))
3365 memset(ptr, 0, obj_size(cachep)); 3341 memset(ptr, 0, obj_size(cachep));
@@ -3405,6 +3381,8 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
3405 unsigned long save_flags; 3381 unsigned long save_flags;
3406 void *objp; 3382 void *objp;
3407 3383
3384 flags &= gfp_allowed_mask;
3385
3408 lockdep_trace_alloc(flags); 3386 lockdep_trace_alloc(flags);
3409 3387
3410 if (slab_should_failslab(cachep, flags)) 3388 if (slab_should_failslab(cachep, flags))
@@ -3415,8 +3393,13 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
3415 objp = __do_cache_alloc(cachep, flags); 3393 objp = __do_cache_alloc(cachep, flags);
3416 local_irq_restore(save_flags); 3394 local_irq_restore(save_flags);
3417 objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller); 3395 objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
3396 kmemleak_alloc_recursive(objp, obj_size(cachep), 1, cachep->flags,
3397 flags);
3418 prefetchw(objp); 3398 prefetchw(objp);
3419 3399
3400 if (likely(objp))
3401 kmemcheck_slab_alloc(cachep, flags, objp, obj_size(cachep));
3402
3420 if (unlikely((flags & __GFP_ZERO) && objp)) 3403 if (unlikely((flags & __GFP_ZERO) && objp))
3421 memset(objp, 0, obj_size(cachep)); 3404 memset(objp, 0, obj_size(cachep));
3422 3405
@@ -3530,8 +3513,11 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp)
3530 struct array_cache *ac = cpu_cache_get(cachep); 3513 struct array_cache *ac = cpu_cache_get(cachep);
3531 3514
3532 check_irq_off(); 3515 check_irq_off();
3516 kmemleak_free_recursive(objp, cachep->flags);
3533 objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0)); 3517 objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));
3534 3518
3519 kmemcheck_slab_free(cachep, objp, obj_size(cachep));
3520
3535 /* 3521 /*
3536 * Skip calling cache_free_alien() when the platform is not numa. 3522 * Skip calling cache_free_alien() when the platform is not numa.
3537 * This will avoid cache misses that happen while accessing slabp (which 3523 * This will avoid cache misses that happen while accessing slabp (which
@@ -3539,7 +3525,7 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp)
3539 * variable to skip the call, which is mostly likely to be present in 3525 * variable to skip the call, which is mostly likely to be present in
3540 * the cache. 3526 * the cache.
3541 */ 3527 */
3542 if (numa_platform && cache_free_alien(cachep, objp)) 3528 if (nr_online_nodes > 1 && cache_free_alien(cachep, objp))
3543 return; 3529 return;
3544 3530
3545 if (likely(ac->avail < ac->limit)) { 3531 if (likely(ac->avail < ac->limit)) {
@@ -3802,7 +3788,7 @@ EXPORT_SYMBOL_GPL(kmem_cache_name);
3802/* 3788/*
3803 * This initializes kmem_list3 or resizes various caches for all nodes. 3789 * This initializes kmem_list3 or resizes various caches for all nodes.
3804 */ 3790 */
3805static int alloc_kmemlist(struct kmem_cache *cachep) 3791static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp)
3806{ 3792{
3807 int node; 3793 int node;
3808 struct kmem_list3 *l3; 3794 struct kmem_list3 *l3;
@@ -3812,7 +3798,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep)
3812 for_each_online_node(node) { 3798 for_each_online_node(node) {
3813 3799
3814 if (use_alien_caches) { 3800 if (use_alien_caches) {
3815 new_alien = alloc_alien_cache(node, cachep->limit); 3801 new_alien = alloc_alien_cache(node, cachep->limit, gfp);
3816 if (!new_alien) 3802 if (!new_alien)
3817 goto fail; 3803 goto fail;
3818 } 3804 }
@@ -3821,7 +3807,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep)
3821 if (cachep->shared) { 3807 if (cachep->shared) {
3822 new_shared = alloc_arraycache(node, 3808 new_shared = alloc_arraycache(node,
3823 cachep->shared*cachep->batchcount, 3809 cachep->shared*cachep->batchcount,
3824 0xbaadf00d); 3810 0xbaadf00d, gfp);
3825 if (!new_shared) { 3811 if (!new_shared) {
3826 free_alien_cache(new_alien); 3812 free_alien_cache(new_alien);
3827 goto fail; 3813 goto fail;
@@ -3850,7 +3836,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep)
3850 free_alien_cache(new_alien); 3836 free_alien_cache(new_alien);
3851 continue; 3837 continue;
3852 } 3838 }
3853 l3 = kmalloc_node(sizeof(struct kmem_list3), GFP_KERNEL, node); 3839 l3 = kmalloc_node(sizeof(struct kmem_list3), gfp, node);
3854 if (!l3) { 3840 if (!l3) {
3855 free_alien_cache(new_alien); 3841 free_alien_cache(new_alien);
3856 kfree(new_shared); 3842 kfree(new_shared);
@@ -3906,18 +3892,18 @@ static void do_ccupdate_local(void *info)
3906 3892
3907/* Always called with the cache_chain_mutex held */ 3893/* Always called with the cache_chain_mutex held */
3908static int do_tune_cpucache(struct kmem_cache *cachep, int limit, 3894static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
3909 int batchcount, int shared) 3895 int batchcount, int shared, gfp_t gfp)
3910{ 3896{
3911 struct ccupdate_struct *new; 3897 struct ccupdate_struct *new;
3912 int i; 3898 int i;
3913 3899
3914 new = kzalloc(sizeof(*new), GFP_KERNEL); 3900 new = kzalloc(sizeof(*new), gfp);
3915 if (!new) 3901 if (!new)
3916 return -ENOMEM; 3902 return -ENOMEM;
3917 3903
3918 for_each_online_cpu(i) { 3904 for_each_online_cpu(i) {
3919 new->new[i] = alloc_arraycache(cpu_to_node(i), limit, 3905 new->new[i] = alloc_arraycache(cpu_to_node(i), limit,
3920 batchcount); 3906 batchcount, gfp);
3921 if (!new->new[i]) { 3907 if (!new->new[i]) {
3922 for (i--; i >= 0; i--) 3908 for (i--; i >= 0; i--)
3923 kfree(new->new[i]); 3909 kfree(new->new[i]);
@@ -3944,11 +3930,11 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
3944 kfree(ccold); 3930 kfree(ccold);
3945 } 3931 }
3946 kfree(new); 3932 kfree(new);
3947 return alloc_kmemlist(cachep); 3933 return alloc_kmemlist(cachep, gfp);
3948} 3934}
3949 3935
3950/* Called with cache_chain_mutex held always */ 3936/* Called with cache_chain_mutex held always */
3951static int enable_cpucache(struct kmem_cache *cachep) 3937static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
3952{ 3938{
3953 int err; 3939 int err;
3954 int limit, shared; 3940 int limit, shared;
@@ -3994,7 +3980,7 @@ static int enable_cpucache(struct kmem_cache *cachep)
3994 if (limit > 32) 3980 if (limit > 32)
3995 limit = 32; 3981 limit = 32;
3996#endif 3982#endif
3997 err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared); 3983 err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared, gfp);
3998 if (err) 3984 if (err)
3999 printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n", 3985 printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",
4000 cachep->name, -err); 3986 cachep->name, -err);
@@ -4300,7 +4286,8 @@ ssize_t slabinfo_write(struct file *file, const char __user * buffer,
4300 res = 0; 4286 res = 0;
4301 } else { 4287 } else {
4302 res = do_tune_cpucache(cachep, limit, 4288 res = do_tune_cpucache(cachep, limit,
4303 batchcount, shared); 4289 batchcount, shared,
4290 GFP_KERNEL);
4304 } 4291 }
4305 break; 4292 break;
4306 } 4293 }