aboutsummaryrefslogtreecommitdiffstats
path: root/mm/slab.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/slab.c')
-rw-r--r--mm/slab.c280
1 files changed, 139 insertions, 141 deletions
diff --git a/mm/slab.c b/mm/slab.c
index 9a90b00d2f91..d08692303f6e 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -102,17 +102,19 @@
102#include <linux/cpu.h> 102#include <linux/cpu.h>
103#include <linux/sysctl.h> 103#include <linux/sysctl.h>
104#include <linux/module.h> 104#include <linux/module.h>
105#include <trace/kmemtrace.h> 105#include <linux/kmemtrace.h>
106#include <linux/rcupdate.h> 106#include <linux/rcupdate.h>
107#include <linux/string.h> 107#include <linux/string.h>
108#include <linux/uaccess.h> 108#include <linux/uaccess.h>
109#include <linux/nodemask.h> 109#include <linux/nodemask.h>
110#include <linux/kmemleak.h>
110#include <linux/mempolicy.h> 111#include <linux/mempolicy.h>
111#include <linux/mutex.h> 112#include <linux/mutex.h>
112#include <linux/fault-inject.h> 113#include <linux/fault-inject.h>
113#include <linux/rtmutex.h> 114#include <linux/rtmutex.h>
114#include <linux/reciprocal_div.h> 115#include <linux/reciprocal_div.h>
115#include <linux/debugobjects.h> 116#include <linux/debugobjects.h>
117#include <linux/kmemcheck.h>
116 118
117#include <asm/cacheflush.h> 119#include <asm/cacheflush.h>
118#include <asm/tlbflush.h> 120#include <asm/tlbflush.h>
@@ -178,13 +180,13 @@
178 SLAB_STORE_USER | \ 180 SLAB_STORE_USER | \
179 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ 181 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
180 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \ 182 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \
181 SLAB_DEBUG_OBJECTS) 183 SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE | SLAB_NOTRACK)
182#else 184#else
183# define CREATE_MASK (SLAB_HWCACHE_ALIGN | \ 185# define CREATE_MASK (SLAB_HWCACHE_ALIGN | \
184 SLAB_CACHE_DMA | \ 186 SLAB_CACHE_DMA | \
185 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ 187 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
186 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \ 188 SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \
187 SLAB_DEBUG_OBJECTS) 189 SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE | SLAB_NOTRACK)
188#endif 190#endif
189 191
190/* 192/*
@@ -303,6 +305,12 @@ struct kmem_list3 {
303}; 305};
304 306
305/* 307/*
308 * The slab allocator is initialized with interrupts disabled. Therefore, make
309 * sure early boot allocations don't accidentally enable interrupts.
310 */
311static gfp_t slab_gfp_mask __read_mostly = SLAB_GFP_BOOT_MASK;
312
313/*
306 * Need this for bootstrapping a per node allocator. 314 * Need this for bootstrapping a per node allocator.
307 */ 315 */
308#define NUM_INIT_LISTS (3 * MAX_NUMNODES) 316#define NUM_INIT_LISTS (3 * MAX_NUMNODES)
@@ -315,7 +323,7 @@ static int drain_freelist(struct kmem_cache *cache,
315 struct kmem_list3 *l3, int tofree); 323 struct kmem_list3 *l3, int tofree);
316static void free_block(struct kmem_cache *cachep, void **objpp, int len, 324static void free_block(struct kmem_cache *cachep, void **objpp, int len,
317 int node); 325 int node);
318static int enable_cpucache(struct kmem_cache *cachep); 326static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp);
319static void cache_reap(struct work_struct *unused); 327static void cache_reap(struct work_struct *unused);
320 328
321/* 329/*
@@ -373,87 +381,6 @@ static void kmem_list3_init(struct kmem_list3 *parent)
373 MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \ 381 MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \
374 } while (0) 382 } while (0)
375 383
376/*
377 * struct kmem_cache
378 *
379 * manages a cache.
380 */
381
382struct kmem_cache {
383/* 1) per-cpu data, touched during every alloc/free */
384 struct array_cache *array[NR_CPUS];
385/* 2) Cache tunables. Protected by cache_chain_mutex */
386 unsigned int batchcount;
387 unsigned int limit;
388 unsigned int shared;
389
390 unsigned int buffer_size;
391 u32 reciprocal_buffer_size;
392/* 3) touched by every alloc & free from the backend */
393
394 unsigned int flags; /* constant flags */
395 unsigned int num; /* # of objs per slab */
396
397/* 4) cache_grow/shrink */
398 /* order of pgs per slab (2^n) */
399 unsigned int gfporder;
400
401 /* force GFP flags, e.g. GFP_DMA */
402 gfp_t gfpflags;
403
404 size_t colour; /* cache colouring range */
405 unsigned int colour_off; /* colour offset */
406 struct kmem_cache *slabp_cache;
407 unsigned int slab_size;
408 unsigned int dflags; /* dynamic flags */
409
410 /* constructor func */
411 void (*ctor)(void *obj);
412
413/* 5) cache creation/removal */
414 const char *name;
415 struct list_head next;
416
417/* 6) statistics */
418#if STATS
419 unsigned long num_active;
420 unsigned long num_allocations;
421 unsigned long high_mark;
422 unsigned long grown;
423 unsigned long reaped;
424 unsigned long errors;
425 unsigned long max_freeable;
426 unsigned long node_allocs;
427 unsigned long node_frees;
428 unsigned long node_overflow;
429 atomic_t allochit;
430 atomic_t allocmiss;
431 atomic_t freehit;
432 atomic_t freemiss;
433#endif
434#if DEBUG
435 /*
436 * If debugging is enabled, then the allocator can add additional
437 * fields and/or padding to every object. buffer_size contains the total
438 * object size including these internal fields, the following two
439 * variables contain the offset to the user object and its size.
440 */
441 int obj_offset;
442 int obj_size;
443#endif
444 /*
445 * We put nodelists[] at the end of kmem_cache, because we want to size
446 * this array to nr_node_ids slots instead of MAX_NUMNODES
447 * (see kmem_cache_init())
448 * We still use [MAX_NUMNODES] and not [1] or [0] because cache_cache
449 * is statically defined, so we reserve the max number of nodes.
450 */
451 struct kmem_list3 *nodelists[MAX_NUMNODES];
452 /*
453 * Do not add fields after nodelists[]
454 */
455};
456
457#define CFLGS_OFF_SLAB (0x80000000UL) 384#define CFLGS_OFF_SLAB (0x80000000UL)
458#define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB) 385#define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB)
459 386
@@ -752,6 +679,7 @@ static enum {
752 NONE, 679 NONE,
753 PARTIAL_AC, 680 PARTIAL_AC,
754 PARTIAL_L3, 681 PARTIAL_L3,
682 EARLY,
755 FULL 683 FULL
756} g_cpucache_up; 684} g_cpucache_up;
757 685
@@ -760,7 +688,7 @@ static enum {
760 */ 688 */
761int slab_is_available(void) 689int slab_is_available(void)
762{ 690{
763 return g_cpucache_up == FULL; 691 return g_cpucache_up >= EARLY;
764} 692}
765 693
766static DEFINE_PER_CPU(struct delayed_work, reap_work); 694static DEFINE_PER_CPU(struct delayed_work, reap_work);
@@ -890,7 +818,6 @@ static void __slab_error(const char *function, struct kmem_cache *cachep,
890 */ 818 */
891 819
892static int use_alien_caches __read_mostly = 1; 820static int use_alien_caches __read_mostly = 1;
893static int numa_platform __read_mostly = 1;
894static int __init noaliencache_setup(char *s) 821static int __init noaliencache_setup(char *s)
895{ 822{
896 use_alien_caches = 0; 823 use_alien_caches = 0;
@@ -958,12 +885,20 @@ static void __cpuinit start_cpu_timer(int cpu)
958} 885}
959 886
960static struct array_cache *alloc_arraycache(int node, int entries, 887static struct array_cache *alloc_arraycache(int node, int entries,
961 int batchcount) 888 int batchcount, gfp_t gfp)
962{ 889{
963 int memsize = sizeof(void *) * entries + sizeof(struct array_cache); 890 int memsize = sizeof(void *) * entries + sizeof(struct array_cache);
964 struct array_cache *nc = NULL; 891 struct array_cache *nc = NULL;
965 892
966 nc = kmalloc_node(memsize, GFP_KERNEL, node); 893 nc = kmalloc_node(memsize, gfp, node);
894 /*
895 * The array_cache structures contain pointers to free object.
896 * However, when such objects are allocated or transfered to another
897 * cache the pointers are not cleared and they could be counted as
898 * valid references during a kmemleak scan. Therefore, kmemleak must
899 * not scan such objects.
900 */
901 kmemleak_no_scan(nc);
967 if (nc) { 902 if (nc) {
968 nc->avail = 0; 903 nc->avail = 0;
969 nc->limit = entries; 904 nc->limit = entries;
@@ -1003,7 +938,7 @@ static int transfer_objects(struct array_cache *to,
1003#define drain_alien_cache(cachep, alien) do { } while (0) 938#define drain_alien_cache(cachep, alien) do { } while (0)
1004#define reap_alien(cachep, l3) do { } while (0) 939#define reap_alien(cachep, l3) do { } while (0)
1005 940
1006static inline struct array_cache **alloc_alien_cache(int node, int limit) 941static inline struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
1007{ 942{
1008 return (struct array_cache **)BAD_ALIEN_MAGIC; 943 return (struct array_cache **)BAD_ALIEN_MAGIC;
1009} 944}
@@ -1034,7 +969,7 @@ static inline void *____cache_alloc_node(struct kmem_cache *cachep,
1034static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int); 969static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int);
1035static void *alternate_node_alloc(struct kmem_cache *, gfp_t); 970static void *alternate_node_alloc(struct kmem_cache *, gfp_t);
1036 971
1037static struct array_cache **alloc_alien_cache(int node, int limit) 972static struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
1038{ 973{
1039 struct array_cache **ac_ptr; 974 struct array_cache **ac_ptr;
1040 int memsize = sizeof(void *) * nr_node_ids; 975 int memsize = sizeof(void *) * nr_node_ids;
@@ -1042,14 +977,14 @@ static struct array_cache **alloc_alien_cache(int node, int limit)
1042 977
1043 if (limit > 1) 978 if (limit > 1)
1044 limit = 12; 979 limit = 12;
1045 ac_ptr = kmalloc_node(memsize, GFP_KERNEL, node); 980 ac_ptr = kmalloc_node(memsize, gfp, node);
1046 if (ac_ptr) { 981 if (ac_ptr) {
1047 for_each_node(i) { 982 for_each_node(i) {
1048 if (i == node || !node_online(i)) { 983 if (i == node || !node_online(i)) {
1049 ac_ptr[i] = NULL; 984 ac_ptr[i] = NULL;
1050 continue; 985 continue;
1051 } 986 }
1052 ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d); 987 ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d, gfp);
1053 if (!ac_ptr[i]) { 988 if (!ac_ptr[i]) {
1054 for (i--; i >= 0; i--) 989 for (i--; i >= 0; i--)
1055 kfree(ac_ptr[i]); 990 kfree(ac_ptr[i]);
@@ -1282,20 +1217,20 @@ static int __cpuinit cpuup_prepare(long cpu)
1282 struct array_cache **alien = NULL; 1217 struct array_cache **alien = NULL;
1283 1218
1284 nc = alloc_arraycache(node, cachep->limit, 1219 nc = alloc_arraycache(node, cachep->limit,
1285 cachep->batchcount); 1220 cachep->batchcount, GFP_KERNEL);
1286 if (!nc) 1221 if (!nc)
1287 goto bad; 1222 goto bad;
1288 if (cachep->shared) { 1223 if (cachep->shared) {
1289 shared = alloc_arraycache(node, 1224 shared = alloc_arraycache(node,
1290 cachep->shared * cachep->batchcount, 1225 cachep->shared * cachep->batchcount,
1291 0xbaadf00d); 1226 0xbaadf00d, GFP_KERNEL);
1292 if (!shared) { 1227 if (!shared) {
1293 kfree(nc); 1228 kfree(nc);
1294 goto bad; 1229 goto bad;
1295 } 1230 }
1296 } 1231 }
1297 if (use_alien_caches) { 1232 if (use_alien_caches) {
1298 alien = alloc_alien_cache(node, cachep->limit); 1233 alien = alloc_alien_cache(node, cachep->limit, GFP_KERNEL);
1299 if (!alien) { 1234 if (!alien) {
1300 kfree(shared); 1235 kfree(shared);
1301 kfree(nc); 1236 kfree(nc);
@@ -1399,10 +1334,9 @@ static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list,
1399{ 1334{
1400 struct kmem_list3 *ptr; 1335 struct kmem_list3 *ptr;
1401 1336
1402 ptr = kmalloc_node(sizeof(struct kmem_list3), GFP_KERNEL, nodeid); 1337 ptr = kmalloc_node(sizeof(struct kmem_list3), GFP_NOWAIT, nodeid);
1403 BUG_ON(!ptr); 1338 BUG_ON(!ptr);
1404 1339
1405 local_irq_disable();
1406 memcpy(ptr, list, sizeof(struct kmem_list3)); 1340 memcpy(ptr, list, sizeof(struct kmem_list3));
1407 /* 1341 /*
1408 * Do not assume that spinlocks can be initialized via memcpy: 1342 * Do not assume that spinlocks can be initialized via memcpy:
@@ -1411,7 +1345,6 @@ static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list,
1411 1345
1412 MAKE_ALL_LISTS(cachep, ptr, nodeid); 1346 MAKE_ALL_LISTS(cachep, ptr, nodeid);
1413 cachep->nodelists[nodeid] = ptr; 1347 cachep->nodelists[nodeid] = ptr;
1414 local_irq_enable();
1415} 1348}
1416 1349
1417/* 1350/*
@@ -1443,10 +1376,8 @@ void __init kmem_cache_init(void)
1443 int order; 1376 int order;
1444 int node; 1377 int node;
1445 1378
1446 if (num_possible_nodes() == 1) { 1379 if (num_possible_nodes() == 1)
1447 use_alien_caches = 0; 1380 use_alien_caches = 0;
1448 numa_platform = 0;
1449 }
1450 1381
1451 for (i = 0; i < NUM_INIT_LISTS; i++) { 1382 for (i = 0; i < NUM_INIT_LISTS; i++) {
1452 kmem_list3_init(&initkmem_list3[i]); 1383 kmem_list3_init(&initkmem_list3[i]);
@@ -1575,9 +1506,8 @@ void __init kmem_cache_init(void)
1575 { 1506 {
1576 struct array_cache *ptr; 1507 struct array_cache *ptr;
1577 1508
1578 ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); 1509 ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
1579 1510
1580 local_irq_disable();
1581 BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache); 1511 BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache);
1582 memcpy(ptr, cpu_cache_get(&cache_cache), 1512 memcpy(ptr, cpu_cache_get(&cache_cache),
1583 sizeof(struct arraycache_init)); 1513 sizeof(struct arraycache_init));
@@ -1587,11 +1517,9 @@ void __init kmem_cache_init(void)
1587 spin_lock_init(&ptr->lock); 1517 spin_lock_init(&ptr->lock);
1588 1518
1589 cache_cache.array[smp_processor_id()] = ptr; 1519 cache_cache.array[smp_processor_id()] = ptr;
1590 local_irq_enable();
1591 1520
1592 ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); 1521 ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
1593 1522
1594 local_irq_disable();
1595 BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep) 1523 BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep)
1596 != &initarray_generic.cache); 1524 != &initarray_generic.cache);
1597 memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep), 1525 memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep),
@@ -1603,7 +1531,6 @@ void __init kmem_cache_init(void)
1603 1531
1604 malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] = 1532 malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] =
1605 ptr; 1533 ptr;
1606 local_irq_enable();
1607 } 1534 }
1608 /* 5) Replace the bootstrap kmem_list3's */ 1535 /* 5) Replace the bootstrap kmem_list3's */
1609 { 1536 {
@@ -1622,19 +1549,27 @@ void __init kmem_cache_init(void)
1622 } 1549 }
1623 } 1550 }
1624 1551
1625 /* 6) resize the head arrays to their final sizes */ 1552 g_cpucache_up = EARLY;
1626 {
1627 struct kmem_cache *cachep;
1628 mutex_lock(&cache_chain_mutex);
1629 list_for_each_entry(cachep, &cache_chain, next)
1630 if (enable_cpucache(cachep))
1631 BUG();
1632 mutex_unlock(&cache_chain_mutex);
1633 }
1634 1553
1635 /* Annotate slab for lockdep -- annotate the malloc caches */ 1554 /* Annotate slab for lockdep -- annotate the malloc caches */
1636 init_lock_keys(); 1555 init_lock_keys();
1556}
1557
1558void __init kmem_cache_init_late(void)
1559{
1560 struct kmem_cache *cachep;
1637 1561
1562 /*
1563 * Interrupts are enabled now so all GFP allocations are safe.
1564 */
1565 slab_gfp_mask = __GFP_BITS_MASK;
1566
1567 /* 6) resize the head arrays to their final sizes */
1568 mutex_lock(&cache_chain_mutex);
1569 list_for_each_entry(cachep, &cache_chain, next)
1570 if (enable_cpucache(cachep, GFP_NOWAIT))
1571 BUG();
1572 mutex_unlock(&cache_chain_mutex);
1638 1573
1639 /* Done! */ 1574 /* Done! */
1640 g_cpucache_up = FULL; 1575 g_cpucache_up = FULL;
@@ -1689,7 +1624,7 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
1689 if (cachep->flags & SLAB_RECLAIM_ACCOUNT) 1624 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1690 flags |= __GFP_RECLAIMABLE; 1625 flags |= __GFP_RECLAIMABLE;
1691 1626
1692 page = alloc_pages_node(nodeid, flags, cachep->gfporder); 1627 page = alloc_pages_exact_node(nodeid, flags | __GFP_NOTRACK, cachep->gfporder);
1693 if (!page) 1628 if (!page)
1694 return NULL; 1629 return NULL;
1695 1630
@@ -1702,6 +1637,16 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
1702 NR_SLAB_UNRECLAIMABLE, nr_pages); 1637 NR_SLAB_UNRECLAIMABLE, nr_pages);
1703 for (i = 0; i < nr_pages; i++) 1638 for (i = 0; i < nr_pages; i++)
1704 __SetPageSlab(page + i); 1639 __SetPageSlab(page + i);
1640
1641 if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) {
1642 kmemcheck_alloc_shadow(page, cachep->gfporder, flags, nodeid);
1643
1644 if (cachep->ctor)
1645 kmemcheck_mark_uninitialized_pages(page, nr_pages);
1646 else
1647 kmemcheck_mark_unallocated_pages(page, nr_pages);
1648 }
1649
1705 return page_address(page); 1650 return page_address(page);
1706} 1651}
1707 1652
@@ -1714,6 +1659,8 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr)
1714 struct page *page = virt_to_page(addr); 1659 struct page *page = virt_to_page(addr);
1715 const unsigned long nr_freed = i; 1660 const unsigned long nr_freed = i;
1716 1661
1662 kmemcheck_free_shadow(page, cachep->gfporder);
1663
1717 if (cachep->flags & SLAB_RECLAIM_ACCOUNT) 1664 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1718 sub_zone_page_state(page_zone(page), 1665 sub_zone_page_state(page_zone(page),
1719 NR_SLAB_RECLAIMABLE, nr_freed); 1666 NR_SLAB_RECLAIMABLE, nr_freed);
@@ -2064,10 +2011,10 @@ static size_t calculate_slab_order(struct kmem_cache *cachep,
2064 return left_over; 2011 return left_over;
2065} 2012}
2066 2013
2067static int __init_refok setup_cpu_cache(struct kmem_cache *cachep) 2014static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
2068{ 2015{
2069 if (g_cpucache_up == FULL) 2016 if (g_cpucache_up == FULL)
2070 return enable_cpucache(cachep); 2017 return enable_cpucache(cachep, gfp);
2071 2018
2072 if (g_cpucache_up == NONE) { 2019 if (g_cpucache_up == NONE) {
2073 /* 2020 /*
@@ -2089,7 +2036,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep)
2089 g_cpucache_up = PARTIAL_AC; 2036 g_cpucache_up = PARTIAL_AC;
2090 } else { 2037 } else {
2091 cachep->array[smp_processor_id()] = 2038 cachep->array[smp_processor_id()] =
2092 kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); 2039 kmalloc(sizeof(struct arraycache_init), gfp);
2093 2040
2094 if (g_cpucache_up == PARTIAL_AC) { 2041 if (g_cpucache_up == PARTIAL_AC) {
2095 set_up_list3s(cachep, SIZE_L3); 2042 set_up_list3s(cachep, SIZE_L3);
@@ -2099,7 +2046,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep)
2099 for_each_online_node(node) { 2046 for_each_online_node(node) {
2100 cachep->nodelists[node] = 2047 cachep->nodelists[node] =
2101 kmalloc_node(sizeof(struct kmem_list3), 2048 kmalloc_node(sizeof(struct kmem_list3),
2102 GFP_KERNEL, node); 2049 gfp, node);
2103 BUG_ON(!cachep->nodelists[node]); 2050 BUG_ON(!cachep->nodelists[node]);
2104 kmem_list3_init(cachep->nodelists[node]); 2051 kmem_list3_init(cachep->nodelists[node]);
2105 } 2052 }
@@ -2153,6 +2100,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
2153{ 2100{
2154 size_t left_over, slab_size, ralign; 2101 size_t left_over, slab_size, ralign;
2155 struct kmem_cache *cachep = NULL, *pc; 2102 struct kmem_cache *cachep = NULL, *pc;
2103 gfp_t gfp;
2156 2104
2157 /* 2105 /*
2158 * Sanity checks... these are all serious usage bugs. 2106 * Sanity checks... these are all serious usage bugs.
@@ -2168,8 +2116,10 @@ kmem_cache_create (const char *name, size_t size, size_t align,
2168 * We use cache_chain_mutex to ensure a consistent view of 2116 * We use cache_chain_mutex to ensure a consistent view of
2169 * cpu_online_mask as well. Please see cpuup_callback 2117 * cpu_online_mask as well. Please see cpuup_callback
2170 */ 2118 */
2171 get_online_cpus(); 2119 if (slab_is_available()) {
2172 mutex_lock(&cache_chain_mutex); 2120 get_online_cpus();
2121 mutex_lock(&cache_chain_mutex);
2122 }
2173 2123
2174 list_for_each_entry(pc, &cache_chain, next) { 2124 list_for_each_entry(pc, &cache_chain, next) {
2175 char tmp; 2125 char tmp;
@@ -2278,8 +2228,13 @@ kmem_cache_create (const char *name, size_t size, size_t align,
2278 */ 2228 */
2279 align = ralign; 2229 align = ralign;
2280 2230
2231 if (slab_is_available())
2232 gfp = GFP_KERNEL;
2233 else
2234 gfp = GFP_NOWAIT;
2235
2281 /* Get cache's description obj. */ 2236 /* Get cache's description obj. */
2282 cachep = kmem_cache_zalloc(&cache_cache, GFP_KERNEL); 2237 cachep = kmem_cache_zalloc(&cache_cache, gfp);
2283 if (!cachep) 2238 if (!cachep)
2284 goto oops; 2239 goto oops;
2285 2240
@@ -2353,6 +2308,15 @@ kmem_cache_create (const char *name, size_t size, size_t align,
2353 /* really off slab. No need for manual alignment */ 2308 /* really off slab. No need for manual alignment */
2354 slab_size = 2309 slab_size =
2355 cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab); 2310 cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab);
2311
2312#ifdef CONFIG_PAGE_POISONING
2313 /* If we're going to use the generic kernel_map_pages()
2314 * poisoning, then it's going to smash the contents of
2315 * the redzone and userword anyhow, so switch them off.
2316 */
2317 if (size % PAGE_SIZE == 0 && flags & SLAB_POISON)
2318 flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
2319#endif
2356 } 2320 }
2357 2321
2358 cachep->colour_off = cache_line_size(); 2322 cachep->colour_off = cache_line_size();
@@ -2382,7 +2346,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
2382 cachep->ctor = ctor; 2346 cachep->ctor = ctor;
2383 cachep->name = name; 2347 cachep->name = name;
2384 2348
2385 if (setup_cpu_cache(cachep)) { 2349 if (setup_cpu_cache(cachep, gfp)) {
2386 __kmem_cache_destroy(cachep); 2350 __kmem_cache_destroy(cachep);
2387 cachep = NULL; 2351 cachep = NULL;
2388 goto oops; 2352 goto oops;
@@ -2394,8 +2358,10 @@ oops:
2394 if (!cachep && (flags & SLAB_PANIC)) 2358 if (!cachep && (flags & SLAB_PANIC))
2395 panic("kmem_cache_create(): failed to create slab `%s'\n", 2359 panic("kmem_cache_create(): failed to create slab `%s'\n",
2396 name); 2360 name);
2397 mutex_unlock(&cache_chain_mutex); 2361 if (slab_is_available()) {
2398 put_online_cpus(); 2362 mutex_unlock(&cache_chain_mutex);
2363 put_online_cpus();
2364 }
2399 return cachep; 2365 return cachep;
2400} 2366}
2401EXPORT_SYMBOL(kmem_cache_create); 2367EXPORT_SYMBOL(kmem_cache_create);
@@ -2621,6 +2587,14 @@ static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp,
2621 /* Slab management obj is off-slab. */ 2587 /* Slab management obj is off-slab. */
2622 slabp = kmem_cache_alloc_node(cachep->slabp_cache, 2588 slabp = kmem_cache_alloc_node(cachep->slabp_cache,
2623 local_flags, nodeid); 2589 local_flags, nodeid);
2590 /*
2591 * If the first object in the slab is leaked (it's allocated
2592 * but no one has a reference to it), we want to make sure
2593 * kmemleak does not treat the ->s_mem pointer as a reference
2594 * to the object. Otherwise we will not report the leak.
2595 */
2596 kmemleak_scan_area(slabp, offsetof(struct slab, list),
2597 sizeof(struct list_head), local_flags);
2624 if (!slabp) 2598 if (!slabp)
2625 return NULL; 2599 return NULL;
2626 } else { 2600 } else {
@@ -3141,6 +3115,12 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3141 STATS_INC_ALLOCMISS(cachep); 3115 STATS_INC_ALLOCMISS(cachep);
3142 objp = cache_alloc_refill(cachep, flags); 3116 objp = cache_alloc_refill(cachep, flags);
3143 } 3117 }
3118 /*
3119 * To avoid a false negative, if an object that is in one of the
3120 * per-CPU caches is leaked, we need to make sure kmemleak doesn't
3121 * treat the array pointers as a reference to the object.
3122 */
3123 kmemleak_erase(&ac->entry[ac->avail]);
3144 return objp; 3124 return objp;
3145} 3125}
3146 3126
@@ -3219,7 +3199,7 @@ retry:
3219 if (local_flags & __GFP_WAIT) 3199 if (local_flags & __GFP_WAIT)
3220 local_irq_enable(); 3200 local_irq_enable();
3221 kmem_flagcheck(cache, flags); 3201 kmem_flagcheck(cache, flags);
3222 obj = kmem_getpages(cache, local_flags, -1); 3202 obj = kmem_getpages(cache, local_flags, numa_node_id());
3223 if (local_flags & __GFP_WAIT) 3203 if (local_flags & __GFP_WAIT)
3224 local_irq_disable(); 3204 local_irq_disable();
3225 if (obj) { 3205 if (obj) {
@@ -3327,6 +3307,8 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
3327 unsigned long save_flags; 3307 unsigned long save_flags;
3328 void *ptr; 3308 void *ptr;
3329 3309
3310 flags &= slab_gfp_mask;
3311
3330 lockdep_trace_alloc(flags); 3312 lockdep_trace_alloc(flags);
3331 3313
3332 if (slab_should_failslab(cachep, flags)) 3314 if (slab_should_failslab(cachep, flags))
@@ -3360,6 +3342,11 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
3360 out: 3342 out:
3361 local_irq_restore(save_flags); 3343 local_irq_restore(save_flags);
3362 ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller); 3344 ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller);
3345 kmemleak_alloc_recursive(ptr, obj_size(cachep), 1, cachep->flags,
3346 flags);
3347
3348 if (likely(ptr))
3349 kmemcheck_slab_alloc(cachep, flags, ptr, obj_size(cachep));
3363 3350
3364 if (unlikely((flags & __GFP_ZERO) && ptr)) 3351 if (unlikely((flags & __GFP_ZERO) && ptr))
3365 memset(ptr, 0, obj_size(cachep)); 3352 memset(ptr, 0, obj_size(cachep));
@@ -3405,6 +3392,8 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
3405 unsigned long save_flags; 3392 unsigned long save_flags;
3406 void *objp; 3393 void *objp;
3407 3394
3395 flags &= slab_gfp_mask;
3396
3408 lockdep_trace_alloc(flags); 3397 lockdep_trace_alloc(flags);
3409 3398
3410 if (slab_should_failslab(cachep, flags)) 3399 if (slab_should_failslab(cachep, flags))
@@ -3415,8 +3404,13 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
3415 objp = __do_cache_alloc(cachep, flags); 3404 objp = __do_cache_alloc(cachep, flags);
3416 local_irq_restore(save_flags); 3405 local_irq_restore(save_flags);
3417 objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller); 3406 objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller);
3407 kmemleak_alloc_recursive(objp, obj_size(cachep), 1, cachep->flags,
3408 flags);
3418 prefetchw(objp); 3409 prefetchw(objp);
3419 3410
3411 if (likely(objp))
3412 kmemcheck_slab_alloc(cachep, flags, objp, obj_size(cachep));
3413
3420 if (unlikely((flags & __GFP_ZERO) && objp)) 3414 if (unlikely((flags & __GFP_ZERO) && objp))
3421 memset(objp, 0, obj_size(cachep)); 3415 memset(objp, 0, obj_size(cachep));
3422 3416
@@ -3530,8 +3524,11 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp)
3530 struct array_cache *ac = cpu_cache_get(cachep); 3524 struct array_cache *ac = cpu_cache_get(cachep);
3531 3525
3532 check_irq_off(); 3526 check_irq_off();
3527 kmemleak_free_recursive(objp, cachep->flags);
3533 objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0)); 3528 objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));
3534 3529
3530 kmemcheck_slab_free(cachep, objp, obj_size(cachep));
3531
3535 /* 3532 /*
3536 * Skip calling cache_free_alien() when the platform is not numa. 3533 * Skip calling cache_free_alien() when the platform is not numa.
3537 * This will avoid cache misses that happen while accessing slabp (which 3534 * This will avoid cache misses that happen while accessing slabp (which
@@ -3539,7 +3536,7 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp)
3539 * variable to skip the call, which is mostly likely to be present in 3536 * variable to skip the call, which is mostly likely to be present in
3540 * the cache. 3537 * the cache.
3541 */ 3538 */
3542 if (numa_platform && cache_free_alien(cachep, objp)) 3539 if (nr_online_nodes > 1 && cache_free_alien(cachep, objp))
3543 return; 3540 return;
3544 3541
3545 if (likely(ac->avail < ac->limit)) { 3542 if (likely(ac->avail < ac->limit)) {
@@ -3802,7 +3799,7 @@ EXPORT_SYMBOL_GPL(kmem_cache_name);
3802/* 3799/*
3803 * This initializes kmem_list3 or resizes various caches for all nodes. 3800 * This initializes kmem_list3 or resizes various caches for all nodes.
3804 */ 3801 */
3805static int alloc_kmemlist(struct kmem_cache *cachep) 3802static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp)
3806{ 3803{
3807 int node; 3804 int node;
3808 struct kmem_list3 *l3; 3805 struct kmem_list3 *l3;
@@ -3812,7 +3809,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep)
3812 for_each_online_node(node) { 3809 for_each_online_node(node) {
3813 3810
3814 if (use_alien_caches) { 3811 if (use_alien_caches) {
3815 new_alien = alloc_alien_cache(node, cachep->limit); 3812 new_alien = alloc_alien_cache(node, cachep->limit, gfp);
3816 if (!new_alien) 3813 if (!new_alien)
3817 goto fail; 3814 goto fail;
3818 } 3815 }
@@ -3821,7 +3818,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep)
3821 if (cachep->shared) { 3818 if (cachep->shared) {
3822 new_shared = alloc_arraycache(node, 3819 new_shared = alloc_arraycache(node,
3823 cachep->shared*cachep->batchcount, 3820 cachep->shared*cachep->batchcount,
3824 0xbaadf00d); 3821 0xbaadf00d, gfp);
3825 if (!new_shared) { 3822 if (!new_shared) {
3826 free_alien_cache(new_alien); 3823 free_alien_cache(new_alien);
3827 goto fail; 3824 goto fail;
@@ -3850,7 +3847,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep)
3850 free_alien_cache(new_alien); 3847 free_alien_cache(new_alien);
3851 continue; 3848 continue;
3852 } 3849 }
3853 l3 = kmalloc_node(sizeof(struct kmem_list3), GFP_KERNEL, node); 3850 l3 = kmalloc_node(sizeof(struct kmem_list3), gfp, node);
3854 if (!l3) { 3851 if (!l3) {
3855 free_alien_cache(new_alien); 3852 free_alien_cache(new_alien);
3856 kfree(new_shared); 3853 kfree(new_shared);
@@ -3906,18 +3903,18 @@ static void do_ccupdate_local(void *info)
3906 3903
3907/* Always called with the cache_chain_mutex held */ 3904/* Always called with the cache_chain_mutex held */
3908static int do_tune_cpucache(struct kmem_cache *cachep, int limit, 3905static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
3909 int batchcount, int shared) 3906 int batchcount, int shared, gfp_t gfp)
3910{ 3907{
3911 struct ccupdate_struct *new; 3908 struct ccupdate_struct *new;
3912 int i; 3909 int i;
3913 3910
3914 new = kzalloc(sizeof(*new), GFP_KERNEL); 3911 new = kzalloc(sizeof(*new), gfp);
3915 if (!new) 3912 if (!new)
3916 return -ENOMEM; 3913 return -ENOMEM;
3917 3914
3918 for_each_online_cpu(i) { 3915 for_each_online_cpu(i) {
3919 new->new[i] = alloc_arraycache(cpu_to_node(i), limit, 3916 new->new[i] = alloc_arraycache(cpu_to_node(i), limit,
3920 batchcount); 3917 batchcount, gfp);
3921 if (!new->new[i]) { 3918 if (!new->new[i]) {
3922 for (i--; i >= 0; i--) 3919 for (i--; i >= 0; i--)
3923 kfree(new->new[i]); 3920 kfree(new->new[i]);
@@ -3944,11 +3941,11 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
3944 kfree(ccold); 3941 kfree(ccold);
3945 } 3942 }
3946 kfree(new); 3943 kfree(new);
3947 return alloc_kmemlist(cachep); 3944 return alloc_kmemlist(cachep, gfp);
3948} 3945}
3949 3946
3950/* Called with cache_chain_mutex held always */ 3947/* Called with cache_chain_mutex held always */
3951static int enable_cpucache(struct kmem_cache *cachep) 3948static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
3952{ 3949{
3953 int err; 3950 int err;
3954 int limit, shared; 3951 int limit, shared;
@@ -3994,7 +3991,7 @@ static int enable_cpucache(struct kmem_cache *cachep)
3994 if (limit > 32) 3991 if (limit > 32)
3995 limit = 32; 3992 limit = 32;
3996#endif 3993#endif
3997 err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared); 3994 err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared, gfp);
3998 if (err) 3995 if (err)
3999 printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n", 3996 printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",
4000 cachep->name, -err); 3997 cachep->name, -err);
@@ -4300,7 +4297,8 @@ ssize_t slabinfo_write(struct file *file, const char __user * buffer,
4300 res = 0; 4297 res = 0;
4301 } else { 4298 } else {
4302 res = do_tune_cpucache(cachep, limit, 4299 res = do_tune_cpucache(cachep, limit,
4303 batchcount, shared); 4300 batchcount, shared,
4301 GFP_KERNEL);
4304 } 4302 }
4305 break; 4303 break;
4306 } 4304 }