diff options
Diffstat (limited to 'mm/slab.c')
-rw-r--r-- | mm/slab.c | 280 |
1 files changed, 139 insertions, 141 deletions
@@ -102,17 +102,19 @@ | |||
102 | #include <linux/cpu.h> | 102 | #include <linux/cpu.h> |
103 | #include <linux/sysctl.h> | 103 | #include <linux/sysctl.h> |
104 | #include <linux/module.h> | 104 | #include <linux/module.h> |
105 | #include <trace/kmemtrace.h> | 105 | #include <linux/kmemtrace.h> |
106 | #include <linux/rcupdate.h> | 106 | #include <linux/rcupdate.h> |
107 | #include <linux/string.h> | 107 | #include <linux/string.h> |
108 | #include <linux/uaccess.h> | 108 | #include <linux/uaccess.h> |
109 | #include <linux/nodemask.h> | 109 | #include <linux/nodemask.h> |
110 | #include <linux/kmemleak.h> | ||
110 | #include <linux/mempolicy.h> | 111 | #include <linux/mempolicy.h> |
111 | #include <linux/mutex.h> | 112 | #include <linux/mutex.h> |
112 | #include <linux/fault-inject.h> | 113 | #include <linux/fault-inject.h> |
113 | #include <linux/rtmutex.h> | 114 | #include <linux/rtmutex.h> |
114 | #include <linux/reciprocal_div.h> | 115 | #include <linux/reciprocal_div.h> |
115 | #include <linux/debugobjects.h> | 116 | #include <linux/debugobjects.h> |
117 | #include <linux/kmemcheck.h> | ||
116 | 118 | ||
117 | #include <asm/cacheflush.h> | 119 | #include <asm/cacheflush.h> |
118 | #include <asm/tlbflush.h> | 120 | #include <asm/tlbflush.h> |
@@ -178,13 +180,13 @@ | |||
178 | SLAB_STORE_USER | \ | 180 | SLAB_STORE_USER | \ |
179 | SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ | 181 | SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ |
180 | SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \ | 182 | SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \ |
181 | SLAB_DEBUG_OBJECTS) | 183 | SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE | SLAB_NOTRACK) |
182 | #else | 184 | #else |
183 | # define CREATE_MASK (SLAB_HWCACHE_ALIGN | \ | 185 | # define CREATE_MASK (SLAB_HWCACHE_ALIGN | \ |
184 | SLAB_CACHE_DMA | \ | 186 | SLAB_CACHE_DMA | \ |
185 | SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ | 187 | SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ |
186 | SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \ | 188 | SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \ |
187 | SLAB_DEBUG_OBJECTS) | 189 | SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE | SLAB_NOTRACK) |
188 | #endif | 190 | #endif |
189 | 191 | ||
190 | /* | 192 | /* |
@@ -303,6 +305,12 @@ struct kmem_list3 { | |||
303 | }; | 305 | }; |
304 | 306 | ||
305 | /* | 307 | /* |
308 | * The slab allocator is initialized with interrupts disabled. Therefore, make | ||
309 | * sure early boot allocations don't accidentally enable interrupts. | ||
310 | */ | ||
311 | static gfp_t slab_gfp_mask __read_mostly = SLAB_GFP_BOOT_MASK; | ||
312 | |||
313 | /* | ||
306 | * Need this for bootstrapping a per node allocator. | 314 | * Need this for bootstrapping a per node allocator. |
307 | */ | 315 | */ |
308 | #define NUM_INIT_LISTS (3 * MAX_NUMNODES) | 316 | #define NUM_INIT_LISTS (3 * MAX_NUMNODES) |
@@ -315,7 +323,7 @@ static int drain_freelist(struct kmem_cache *cache, | |||
315 | struct kmem_list3 *l3, int tofree); | 323 | struct kmem_list3 *l3, int tofree); |
316 | static void free_block(struct kmem_cache *cachep, void **objpp, int len, | 324 | static void free_block(struct kmem_cache *cachep, void **objpp, int len, |
317 | int node); | 325 | int node); |
318 | static int enable_cpucache(struct kmem_cache *cachep); | 326 | static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp); |
319 | static void cache_reap(struct work_struct *unused); | 327 | static void cache_reap(struct work_struct *unused); |
320 | 328 | ||
321 | /* | 329 | /* |
@@ -373,87 +381,6 @@ static void kmem_list3_init(struct kmem_list3 *parent) | |||
373 | MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \ | 381 | MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \ |
374 | } while (0) | 382 | } while (0) |
375 | 383 | ||
376 | /* | ||
377 | * struct kmem_cache | ||
378 | * | ||
379 | * manages a cache. | ||
380 | */ | ||
381 | |||
382 | struct kmem_cache { | ||
383 | /* 1) per-cpu data, touched during every alloc/free */ | ||
384 | struct array_cache *array[NR_CPUS]; | ||
385 | /* 2) Cache tunables. Protected by cache_chain_mutex */ | ||
386 | unsigned int batchcount; | ||
387 | unsigned int limit; | ||
388 | unsigned int shared; | ||
389 | |||
390 | unsigned int buffer_size; | ||
391 | u32 reciprocal_buffer_size; | ||
392 | /* 3) touched by every alloc & free from the backend */ | ||
393 | |||
394 | unsigned int flags; /* constant flags */ | ||
395 | unsigned int num; /* # of objs per slab */ | ||
396 | |||
397 | /* 4) cache_grow/shrink */ | ||
398 | /* order of pgs per slab (2^n) */ | ||
399 | unsigned int gfporder; | ||
400 | |||
401 | /* force GFP flags, e.g. GFP_DMA */ | ||
402 | gfp_t gfpflags; | ||
403 | |||
404 | size_t colour; /* cache colouring range */ | ||
405 | unsigned int colour_off; /* colour offset */ | ||
406 | struct kmem_cache *slabp_cache; | ||
407 | unsigned int slab_size; | ||
408 | unsigned int dflags; /* dynamic flags */ | ||
409 | |||
410 | /* constructor func */ | ||
411 | void (*ctor)(void *obj); | ||
412 | |||
413 | /* 5) cache creation/removal */ | ||
414 | const char *name; | ||
415 | struct list_head next; | ||
416 | |||
417 | /* 6) statistics */ | ||
418 | #if STATS | ||
419 | unsigned long num_active; | ||
420 | unsigned long num_allocations; | ||
421 | unsigned long high_mark; | ||
422 | unsigned long grown; | ||
423 | unsigned long reaped; | ||
424 | unsigned long errors; | ||
425 | unsigned long max_freeable; | ||
426 | unsigned long node_allocs; | ||
427 | unsigned long node_frees; | ||
428 | unsigned long node_overflow; | ||
429 | atomic_t allochit; | ||
430 | atomic_t allocmiss; | ||
431 | atomic_t freehit; | ||
432 | atomic_t freemiss; | ||
433 | #endif | ||
434 | #if DEBUG | ||
435 | /* | ||
436 | * If debugging is enabled, then the allocator can add additional | ||
437 | * fields and/or padding to every object. buffer_size contains the total | ||
438 | * object size including these internal fields, the following two | ||
439 | * variables contain the offset to the user object and its size. | ||
440 | */ | ||
441 | int obj_offset; | ||
442 | int obj_size; | ||
443 | #endif | ||
444 | /* | ||
445 | * We put nodelists[] at the end of kmem_cache, because we want to size | ||
446 | * this array to nr_node_ids slots instead of MAX_NUMNODES | ||
447 | * (see kmem_cache_init()) | ||
448 | * We still use [MAX_NUMNODES] and not [1] or [0] because cache_cache | ||
449 | * is statically defined, so we reserve the max number of nodes. | ||
450 | */ | ||
451 | struct kmem_list3 *nodelists[MAX_NUMNODES]; | ||
452 | /* | ||
453 | * Do not add fields after nodelists[] | ||
454 | */ | ||
455 | }; | ||
456 | |||
457 | #define CFLGS_OFF_SLAB (0x80000000UL) | 384 | #define CFLGS_OFF_SLAB (0x80000000UL) |
458 | #define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB) | 385 | #define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB) |
459 | 386 | ||
@@ -752,6 +679,7 @@ static enum { | |||
752 | NONE, | 679 | NONE, |
753 | PARTIAL_AC, | 680 | PARTIAL_AC, |
754 | PARTIAL_L3, | 681 | PARTIAL_L3, |
682 | EARLY, | ||
755 | FULL | 683 | FULL |
756 | } g_cpucache_up; | 684 | } g_cpucache_up; |
757 | 685 | ||
@@ -760,7 +688,7 @@ static enum { | |||
760 | */ | 688 | */ |
761 | int slab_is_available(void) | 689 | int slab_is_available(void) |
762 | { | 690 | { |
763 | return g_cpucache_up == FULL; | 691 | return g_cpucache_up >= EARLY; |
764 | } | 692 | } |
765 | 693 | ||
766 | static DEFINE_PER_CPU(struct delayed_work, reap_work); | 694 | static DEFINE_PER_CPU(struct delayed_work, reap_work); |
@@ -890,7 +818,6 @@ static void __slab_error(const char *function, struct kmem_cache *cachep, | |||
890 | */ | 818 | */ |
891 | 819 | ||
892 | static int use_alien_caches __read_mostly = 1; | 820 | static int use_alien_caches __read_mostly = 1; |
893 | static int numa_platform __read_mostly = 1; | ||
894 | static int __init noaliencache_setup(char *s) | 821 | static int __init noaliencache_setup(char *s) |
895 | { | 822 | { |
896 | use_alien_caches = 0; | 823 | use_alien_caches = 0; |
@@ -958,12 +885,20 @@ static void __cpuinit start_cpu_timer(int cpu) | |||
958 | } | 885 | } |
959 | 886 | ||
960 | static struct array_cache *alloc_arraycache(int node, int entries, | 887 | static struct array_cache *alloc_arraycache(int node, int entries, |
961 | int batchcount) | 888 | int batchcount, gfp_t gfp) |
962 | { | 889 | { |
963 | int memsize = sizeof(void *) * entries + sizeof(struct array_cache); | 890 | int memsize = sizeof(void *) * entries + sizeof(struct array_cache); |
964 | struct array_cache *nc = NULL; | 891 | struct array_cache *nc = NULL; |
965 | 892 | ||
966 | nc = kmalloc_node(memsize, GFP_KERNEL, node); | 893 | nc = kmalloc_node(memsize, gfp, node); |
894 | /* | ||
895 | * The array_cache structures contain pointers to free object. | ||
896 | * However, when such objects are allocated or transfered to another | ||
897 | * cache the pointers are not cleared and they could be counted as | ||
898 | * valid references during a kmemleak scan. Therefore, kmemleak must | ||
899 | * not scan such objects. | ||
900 | */ | ||
901 | kmemleak_no_scan(nc); | ||
967 | if (nc) { | 902 | if (nc) { |
968 | nc->avail = 0; | 903 | nc->avail = 0; |
969 | nc->limit = entries; | 904 | nc->limit = entries; |
@@ -1003,7 +938,7 @@ static int transfer_objects(struct array_cache *to, | |||
1003 | #define drain_alien_cache(cachep, alien) do { } while (0) | 938 | #define drain_alien_cache(cachep, alien) do { } while (0) |
1004 | #define reap_alien(cachep, l3) do { } while (0) | 939 | #define reap_alien(cachep, l3) do { } while (0) |
1005 | 940 | ||
1006 | static inline struct array_cache **alloc_alien_cache(int node, int limit) | 941 | static inline struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp) |
1007 | { | 942 | { |
1008 | return (struct array_cache **)BAD_ALIEN_MAGIC; | 943 | return (struct array_cache **)BAD_ALIEN_MAGIC; |
1009 | } | 944 | } |
@@ -1034,7 +969,7 @@ static inline void *____cache_alloc_node(struct kmem_cache *cachep, | |||
1034 | static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int); | 969 | static void *____cache_alloc_node(struct kmem_cache *, gfp_t, int); |
1035 | static void *alternate_node_alloc(struct kmem_cache *, gfp_t); | 970 | static void *alternate_node_alloc(struct kmem_cache *, gfp_t); |
1036 | 971 | ||
1037 | static struct array_cache **alloc_alien_cache(int node, int limit) | 972 | static struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp) |
1038 | { | 973 | { |
1039 | struct array_cache **ac_ptr; | 974 | struct array_cache **ac_ptr; |
1040 | int memsize = sizeof(void *) * nr_node_ids; | 975 | int memsize = sizeof(void *) * nr_node_ids; |
@@ -1042,14 +977,14 @@ static struct array_cache **alloc_alien_cache(int node, int limit) | |||
1042 | 977 | ||
1043 | if (limit > 1) | 978 | if (limit > 1) |
1044 | limit = 12; | 979 | limit = 12; |
1045 | ac_ptr = kmalloc_node(memsize, GFP_KERNEL, node); | 980 | ac_ptr = kmalloc_node(memsize, gfp, node); |
1046 | if (ac_ptr) { | 981 | if (ac_ptr) { |
1047 | for_each_node(i) { | 982 | for_each_node(i) { |
1048 | if (i == node || !node_online(i)) { | 983 | if (i == node || !node_online(i)) { |
1049 | ac_ptr[i] = NULL; | 984 | ac_ptr[i] = NULL; |
1050 | continue; | 985 | continue; |
1051 | } | 986 | } |
1052 | ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d); | 987 | ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d, gfp); |
1053 | if (!ac_ptr[i]) { | 988 | if (!ac_ptr[i]) { |
1054 | for (i--; i >= 0; i--) | 989 | for (i--; i >= 0; i--) |
1055 | kfree(ac_ptr[i]); | 990 | kfree(ac_ptr[i]); |
@@ -1282,20 +1217,20 @@ static int __cpuinit cpuup_prepare(long cpu) | |||
1282 | struct array_cache **alien = NULL; | 1217 | struct array_cache **alien = NULL; |
1283 | 1218 | ||
1284 | nc = alloc_arraycache(node, cachep->limit, | 1219 | nc = alloc_arraycache(node, cachep->limit, |
1285 | cachep->batchcount); | 1220 | cachep->batchcount, GFP_KERNEL); |
1286 | if (!nc) | 1221 | if (!nc) |
1287 | goto bad; | 1222 | goto bad; |
1288 | if (cachep->shared) { | 1223 | if (cachep->shared) { |
1289 | shared = alloc_arraycache(node, | 1224 | shared = alloc_arraycache(node, |
1290 | cachep->shared * cachep->batchcount, | 1225 | cachep->shared * cachep->batchcount, |
1291 | 0xbaadf00d); | 1226 | 0xbaadf00d, GFP_KERNEL); |
1292 | if (!shared) { | 1227 | if (!shared) { |
1293 | kfree(nc); | 1228 | kfree(nc); |
1294 | goto bad; | 1229 | goto bad; |
1295 | } | 1230 | } |
1296 | } | 1231 | } |
1297 | if (use_alien_caches) { | 1232 | if (use_alien_caches) { |
1298 | alien = alloc_alien_cache(node, cachep->limit); | 1233 | alien = alloc_alien_cache(node, cachep->limit, GFP_KERNEL); |
1299 | if (!alien) { | 1234 | if (!alien) { |
1300 | kfree(shared); | 1235 | kfree(shared); |
1301 | kfree(nc); | 1236 | kfree(nc); |
@@ -1399,10 +1334,9 @@ static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list, | |||
1399 | { | 1334 | { |
1400 | struct kmem_list3 *ptr; | 1335 | struct kmem_list3 *ptr; |
1401 | 1336 | ||
1402 | ptr = kmalloc_node(sizeof(struct kmem_list3), GFP_KERNEL, nodeid); | 1337 | ptr = kmalloc_node(sizeof(struct kmem_list3), GFP_NOWAIT, nodeid); |
1403 | BUG_ON(!ptr); | 1338 | BUG_ON(!ptr); |
1404 | 1339 | ||
1405 | local_irq_disable(); | ||
1406 | memcpy(ptr, list, sizeof(struct kmem_list3)); | 1340 | memcpy(ptr, list, sizeof(struct kmem_list3)); |
1407 | /* | 1341 | /* |
1408 | * Do not assume that spinlocks can be initialized via memcpy: | 1342 | * Do not assume that spinlocks can be initialized via memcpy: |
@@ -1411,7 +1345,6 @@ static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list, | |||
1411 | 1345 | ||
1412 | MAKE_ALL_LISTS(cachep, ptr, nodeid); | 1346 | MAKE_ALL_LISTS(cachep, ptr, nodeid); |
1413 | cachep->nodelists[nodeid] = ptr; | 1347 | cachep->nodelists[nodeid] = ptr; |
1414 | local_irq_enable(); | ||
1415 | } | 1348 | } |
1416 | 1349 | ||
1417 | /* | 1350 | /* |
@@ -1443,10 +1376,8 @@ void __init kmem_cache_init(void) | |||
1443 | int order; | 1376 | int order; |
1444 | int node; | 1377 | int node; |
1445 | 1378 | ||
1446 | if (num_possible_nodes() == 1) { | 1379 | if (num_possible_nodes() == 1) |
1447 | use_alien_caches = 0; | 1380 | use_alien_caches = 0; |
1448 | numa_platform = 0; | ||
1449 | } | ||
1450 | 1381 | ||
1451 | for (i = 0; i < NUM_INIT_LISTS; i++) { | 1382 | for (i = 0; i < NUM_INIT_LISTS; i++) { |
1452 | kmem_list3_init(&initkmem_list3[i]); | 1383 | kmem_list3_init(&initkmem_list3[i]); |
@@ -1575,9 +1506,8 @@ void __init kmem_cache_init(void) | |||
1575 | { | 1506 | { |
1576 | struct array_cache *ptr; | 1507 | struct array_cache *ptr; |
1577 | 1508 | ||
1578 | ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); | 1509 | ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT); |
1579 | 1510 | ||
1580 | local_irq_disable(); | ||
1581 | BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache); | 1511 | BUG_ON(cpu_cache_get(&cache_cache) != &initarray_cache.cache); |
1582 | memcpy(ptr, cpu_cache_get(&cache_cache), | 1512 | memcpy(ptr, cpu_cache_get(&cache_cache), |
1583 | sizeof(struct arraycache_init)); | 1513 | sizeof(struct arraycache_init)); |
@@ -1587,11 +1517,9 @@ void __init kmem_cache_init(void) | |||
1587 | spin_lock_init(&ptr->lock); | 1517 | spin_lock_init(&ptr->lock); |
1588 | 1518 | ||
1589 | cache_cache.array[smp_processor_id()] = ptr; | 1519 | cache_cache.array[smp_processor_id()] = ptr; |
1590 | local_irq_enable(); | ||
1591 | 1520 | ||
1592 | ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); | 1521 | ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT); |
1593 | 1522 | ||
1594 | local_irq_disable(); | ||
1595 | BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep) | 1523 | BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep) |
1596 | != &initarray_generic.cache); | 1524 | != &initarray_generic.cache); |
1597 | memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep), | 1525 | memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep), |
@@ -1603,7 +1531,6 @@ void __init kmem_cache_init(void) | |||
1603 | 1531 | ||
1604 | malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] = | 1532 | malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] = |
1605 | ptr; | 1533 | ptr; |
1606 | local_irq_enable(); | ||
1607 | } | 1534 | } |
1608 | /* 5) Replace the bootstrap kmem_list3's */ | 1535 | /* 5) Replace the bootstrap kmem_list3's */ |
1609 | { | 1536 | { |
@@ -1622,19 +1549,27 @@ void __init kmem_cache_init(void) | |||
1622 | } | 1549 | } |
1623 | } | 1550 | } |
1624 | 1551 | ||
1625 | /* 6) resize the head arrays to their final sizes */ | 1552 | g_cpucache_up = EARLY; |
1626 | { | ||
1627 | struct kmem_cache *cachep; | ||
1628 | mutex_lock(&cache_chain_mutex); | ||
1629 | list_for_each_entry(cachep, &cache_chain, next) | ||
1630 | if (enable_cpucache(cachep)) | ||
1631 | BUG(); | ||
1632 | mutex_unlock(&cache_chain_mutex); | ||
1633 | } | ||
1634 | 1553 | ||
1635 | /* Annotate slab for lockdep -- annotate the malloc caches */ | 1554 | /* Annotate slab for lockdep -- annotate the malloc caches */ |
1636 | init_lock_keys(); | 1555 | init_lock_keys(); |
1556 | } | ||
1557 | |||
1558 | void __init kmem_cache_init_late(void) | ||
1559 | { | ||
1560 | struct kmem_cache *cachep; | ||
1637 | 1561 | ||
1562 | /* | ||
1563 | * Interrupts are enabled now so all GFP allocations are safe. | ||
1564 | */ | ||
1565 | slab_gfp_mask = __GFP_BITS_MASK; | ||
1566 | |||
1567 | /* 6) resize the head arrays to their final sizes */ | ||
1568 | mutex_lock(&cache_chain_mutex); | ||
1569 | list_for_each_entry(cachep, &cache_chain, next) | ||
1570 | if (enable_cpucache(cachep, GFP_NOWAIT)) | ||
1571 | BUG(); | ||
1572 | mutex_unlock(&cache_chain_mutex); | ||
1638 | 1573 | ||
1639 | /* Done! */ | 1574 | /* Done! */ |
1640 | g_cpucache_up = FULL; | 1575 | g_cpucache_up = FULL; |
@@ -1689,7 +1624,7 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) | |||
1689 | if (cachep->flags & SLAB_RECLAIM_ACCOUNT) | 1624 | if (cachep->flags & SLAB_RECLAIM_ACCOUNT) |
1690 | flags |= __GFP_RECLAIMABLE; | 1625 | flags |= __GFP_RECLAIMABLE; |
1691 | 1626 | ||
1692 | page = alloc_pages_node(nodeid, flags, cachep->gfporder); | 1627 | page = alloc_pages_exact_node(nodeid, flags | __GFP_NOTRACK, cachep->gfporder); |
1693 | if (!page) | 1628 | if (!page) |
1694 | return NULL; | 1629 | return NULL; |
1695 | 1630 | ||
@@ -1702,6 +1637,16 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) | |||
1702 | NR_SLAB_UNRECLAIMABLE, nr_pages); | 1637 | NR_SLAB_UNRECLAIMABLE, nr_pages); |
1703 | for (i = 0; i < nr_pages; i++) | 1638 | for (i = 0; i < nr_pages; i++) |
1704 | __SetPageSlab(page + i); | 1639 | __SetPageSlab(page + i); |
1640 | |||
1641 | if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) { | ||
1642 | kmemcheck_alloc_shadow(page, cachep->gfporder, flags, nodeid); | ||
1643 | |||
1644 | if (cachep->ctor) | ||
1645 | kmemcheck_mark_uninitialized_pages(page, nr_pages); | ||
1646 | else | ||
1647 | kmemcheck_mark_unallocated_pages(page, nr_pages); | ||
1648 | } | ||
1649 | |||
1705 | return page_address(page); | 1650 | return page_address(page); |
1706 | } | 1651 | } |
1707 | 1652 | ||
@@ -1714,6 +1659,8 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr) | |||
1714 | struct page *page = virt_to_page(addr); | 1659 | struct page *page = virt_to_page(addr); |
1715 | const unsigned long nr_freed = i; | 1660 | const unsigned long nr_freed = i; |
1716 | 1661 | ||
1662 | kmemcheck_free_shadow(page, cachep->gfporder); | ||
1663 | |||
1717 | if (cachep->flags & SLAB_RECLAIM_ACCOUNT) | 1664 | if (cachep->flags & SLAB_RECLAIM_ACCOUNT) |
1718 | sub_zone_page_state(page_zone(page), | 1665 | sub_zone_page_state(page_zone(page), |
1719 | NR_SLAB_RECLAIMABLE, nr_freed); | 1666 | NR_SLAB_RECLAIMABLE, nr_freed); |
@@ -2064,10 +2011,10 @@ static size_t calculate_slab_order(struct kmem_cache *cachep, | |||
2064 | return left_over; | 2011 | return left_over; |
2065 | } | 2012 | } |
2066 | 2013 | ||
2067 | static int __init_refok setup_cpu_cache(struct kmem_cache *cachep) | 2014 | static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp) |
2068 | { | 2015 | { |
2069 | if (g_cpucache_up == FULL) | 2016 | if (g_cpucache_up == FULL) |
2070 | return enable_cpucache(cachep); | 2017 | return enable_cpucache(cachep, gfp); |
2071 | 2018 | ||
2072 | if (g_cpucache_up == NONE) { | 2019 | if (g_cpucache_up == NONE) { |
2073 | /* | 2020 | /* |
@@ -2089,7 +2036,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep) | |||
2089 | g_cpucache_up = PARTIAL_AC; | 2036 | g_cpucache_up = PARTIAL_AC; |
2090 | } else { | 2037 | } else { |
2091 | cachep->array[smp_processor_id()] = | 2038 | cachep->array[smp_processor_id()] = |
2092 | kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); | 2039 | kmalloc(sizeof(struct arraycache_init), gfp); |
2093 | 2040 | ||
2094 | if (g_cpucache_up == PARTIAL_AC) { | 2041 | if (g_cpucache_up == PARTIAL_AC) { |
2095 | set_up_list3s(cachep, SIZE_L3); | 2042 | set_up_list3s(cachep, SIZE_L3); |
@@ -2099,7 +2046,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep) | |||
2099 | for_each_online_node(node) { | 2046 | for_each_online_node(node) { |
2100 | cachep->nodelists[node] = | 2047 | cachep->nodelists[node] = |
2101 | kmalloc_node(sizeof(struct kmem_list3), | 2048 | kmalloc_node(sizeof(struct kmem_list3), |
2102 | GFP_KERNEL, node); | 2049 | gfp, node); |
2103 | BUG_ON(!cachep->nodelists[node]); | 2050 | BUG_ON(!cachep->nodelists[node]); |
2104 | kmem_list3_init(cachep->nodelists[node]); | 2051 | kmem_list3_init(cachep->nodelists[node]); |
2105 | } | 2052 | } |
@@ -2153,6 +2100,7 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
2153 | { | 2100 | { |
2154 | size_t left_over, slab_size, ralign; | 2101 | size_t left_over, slab_size, ralign; |
2155 | struct kmem_cache *cachep = NULL, *pc; | 2102 | struct kmem_cache *cachep = NULL, *pc; |
2103 | gfp_t gfp; | ||
2156 | 2104 | ||
2157 | /* | 2105 | /* |
2158 | * Sanity checks... these are all serious usage bugs. | 2106 | * Sanity checks... these are all serious usage bugs. |
@@ -2168,8 +2116,10 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
2168 | * We use cache_chain_mutex to ensure a consistent view of | 2116 | * We use cache_chain_mutex to ensure a consistent view of |
2169 | * cpu_online_mask as well. Please see cpuup_callback | 2117 | * cpu_online_mask as well. Please see cpuup_callback |
2170 | */ | 2118 | */ |
2171 | get_online_cpus(); | 2119 | if (slab_is_available()) { |
2172 | mutex_lock(&cache_chain_mutex); | 2120 | get_online_cpus(); |
2121 | mutex_lock(&cache_chain_mutex); | ||
2122 | } | ||
2173 | 2123 | ||
2174 | list_for_each_entry(pc, &cache_chain, next) { | 2124 | list_for_each_entry(pc, &cache_chain, next) { |
2175 | char tmp; | 2125 | char tmp; |
@@ -2278,8 +2228,13 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
2278 | */ | 2228 | */ |
2279 | align = ralign; | 2229 | align = ralign; |
2280 | 2230 | ||
2231 | if (slab_is_available()) | ||
2232 | gfp = GFP_KERNEL; | ||
2233 | else | ||
2234 | gfp = GFP_NOWAIT; | ||
2235 | |||
2281 | /* Get cache's description obj. */ | 2236 | /* Get cache's description obj. */ |
2282 | cachep = kmem_cache_zalloc(&cache_cache, GFP_KERNEL); | 2237 | cachep = kmem_cache_zalloc(&cache_cache, gfp); |
2283 | if (!cachep) | 2238 | if (!cachep) |
2284 | goto oops; | 2239 | goto oops; |
2285 | 2240 | ||
@@ -2353,6 +2308,15 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
2353 | /* really off slab. No need for manual alignment */ | 2308 | /* really off slab. No need for manual alignment */ |
2354 | slab_size = | 2309 | slab_size = |
2355 | cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab); | 2310 | cachep->num * sizeof(kmem_bufctl_t) + sizeof(struct slab); |
2311 | |||
2312 | #ifdef CONFIG_PAGE_POISONING | ||
2313 | /* If we're going to use the generic kernel_map_pages() | ||
2314 | * poisoning, then it's going to smash the contents of | ||
2315 | * the redzone and userword anyhow, so switch them off. | ||
2316 | */ | ||
2317 | if (size % PAGE_SIZE == 0 && flags & SLAB_POISON) | ||
2318 | flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); | ||
2319 | #endif | ||
2356 | } | 2320 | } |
2357 | 2321 | ||
2358 | cachep->colour_off = cache_line_size(); | 2322 | cachep->colour_off = cache_line_size(); |
@@ -2382,7 +2346,7 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
2382 | cachep->ctor = ctor; | 2346 | cachep->ctor = ctor; |
2383 | cachep->name = name; | 2347 | cachep->name = name; |
2384 | 2348 | ||
2385 | if (setup_cpu_cache(cachep)) { | 2349 | if (setup_cpu_cache(cachep, gfp)) { |
2386 | __kmem_cache_destroy(cachep); | 2350 | __kmem_cache_destroy(cachep); |
2387 | cachep = NULL; | 2351 | cachep = NULL; |
2388 | goto oops; | 2352 | goto oops; |
@@ -2394,8 +2358,10 @@ oops: | |||
2394 | if (!cachep && (flags & SLAB_PANIC)) | 2358 | if (!cachep && (flags & SLAB_PANIC)) |
2395 | panic("kmem_cache_create(): failed to create slab `%s'\n", | 2359 | panic("kmem_cache_create(): failed to create slab `%s'\n", |
2396 | name); | 2360 | name); |
2397 | mutex_unlock(&cache_chain_mutex); | 2361 | if (slab_is_available()) { |
2398 | put_online_cpus(); | 2362 | mutex_unlock(&cache_chain_mutex); |
2363 | put_online_cpus(); | ||
2364 | } | ||
2399 | return cachep; | 2365 | return cachep; |
2400 | } | 2366 | } |
2401 | EXPORT_SYMBOL(kmem_cache_create); | 2367 | EXPORT_SYMBOL(kmem_cache_create); |
@@ -2621,6 +2587,14 @@ static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp, | |||
2621 | /* Slab management obj is off-slab. */ | 2587 | /* Slab management obj is off-slab. */ |
2622 | slabp = kmem_cache_alloc_node(cachep->slabp_cache, | 2588 | slabp = kmem_cache_alloc_node(cachep->slabp_cache, |
2623 | local_flags, nodeid); | 2589 | local_flags, nodeid); |
2590 | /* | ||
2591 | * If the first object in the slab is leaked (it's allocated | ||
2592 | * but no one has a reference to it), we want to make sure | ||
2593 | * kmemleak does not treat the ->s_mem pointer as a reference | ||
2594 | * to the object. Otherwise we will not report the leak. | ||
2595 | */ | ||
2596 | kmemleak_scan_area(slabp, offsetof(struct slab, list), | ||
2597 | sizeof(struct list_head), local_flags); | ||
2624 | if (!slabp) | 2598 | if (!slabp) |
2625 | return NULL; | 2599 | return NULL; |
2626 | } else { | 2600 | } else { |
@@ -3141,6 +3115,12 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) | |||
3141 | STATS_INC_ALLOCMISS(cachep); | 3115 | STATS_INC_ALLOCMISS(cachep); |
3142 | objp = cache_alloc_refill(cachep, flags); | 3116 | objp = cache_alloc_refill(cachep, flags); |
3143 | } | 3117 | } |
3118 | /* | ||
3119 | * To avoid a false negative, if an object that is in one of the | ||
3120 | * per-CPU caches is leaked, we need to make sure kmemleak doesn't | ||
3121 | * treat the array pointers as a reference to the object. | ||
3122 | */ | ||
3123 | kmemleak_erase(&ac->entry[ac->avail]); | ||
3144 | return objp; | 3124 | return objp; |
3145 | } | 3125 | } |
3146 | 3126 | ||
@@ -3219,7 +3199,7 @@ retry: | |||
3219 | if (local_flags & __GFP_WAIT) | 3199 | if (local_flags & __GFP_WAIT) |
3220 | local_irq_enable(); | 3200 | local_irq_enable(); |
3221 | kmem_flagcheck(cache, flags); | 3201 | kmem_flagcheck(cache, flags); |
3222 | obj = kmem_getpages(cache, local_flags, -1); | 3202 | obj = kmem_getpages(cache, local_flags, numa_node_id()); |
3223 | if (local_flags & __GFP_WAIT) | 3203 | if (local_flags & __GFP_WAIT) |
3224 | local_irq_disable(); | 3204 | local_irq_disable(); |
3225 | if (obj) { | 3205 | if (obj) { |
@@ -3327,6 +3307,8 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, | |||
3327 | unsigned long save_flags; | 3307 | unsigned long save_flags; |
3328 | void *ptr; | 3308 | void *ptr; |
3329 | 3309 | ||
3310 | flags &= slab_gfp_mask; | ||
3311 | |||
3330 | lockdep_trace_alloc(flags); | 3312 | lockdep_trace_alloc(flags); |
3331 | 3313 | ||
3332 | if (slab_should_failslab(cachep, flags)) | 3314 | if (slab_should_failslab(cachep, flags)) |
@@ -3360,6 +3342,11 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, | |||
3360 | out: | 3342 | out: |
3361 | local_irq_restore(save_flags); | 3343 | local_irq_restore(save_flags); |
3362 | ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller); | 3344 | ptr = cache_alloc_debugcheck_after(cachep, flags, ptr, caller); |
3345 | kmemleak_alloc_recursive(ptr, obj_size(cachep), 1, cachep->flags, | ||
3346 | flags); | ||
3347 | |||
3348 | if (likely(ptr)) | ||
3349 | kmemcheck_slab_alloc(cachep, flags, ptr, obj_size(cachep)); | ||
3363 | 3350 | ||
3364 | if (unlikely((flags & __GFP_ZERO) && ptr)) | 3351 | if (unlikely((flags & __GFP_ZERO) && ptr)) |
3365 | memset(ptr, 0, obj_size(cachep)); | 3352 | memset(ptr, 0, obj_size(cachep)); |
@@ -3405,6 +3392,8 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller) | |||
3405 | unsigned long save_flags; | 3392 | unsigned long save_flags; |
3406 | void *objp; | 3393 | void *objp; |
3407 | 3394 | ||
3395 | flags &= slab_gfp_mask; | ||
3396 | |||
3408 | lockdep_trace_alloc(flags); | 3397 | lockdep_trace_alloc(flags); |
3409 | 3398 | ||
3410 | if (slab_should_failslab(cachep, flags)) | 3399 | if (slab_should_failslab(cachep, flags)) |
@@ -3415,8 +3404,13 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller) | |||
3415 | objp = __do_cache_alloc(cachep, flags); | 3404 | objp = __do_cache_alloc(cachep, flags); |
3416 | local_irq_restore(save_flags); | 3405 | local_irq_restore(save_flags); |
3417 | objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller); | 3406 | objp = cache_alloc_debugcheck_after(cachep, flags, objp, caller); |
3407 | kmemleak_alloc_recursive(objp, obj_size(cachep), 1, cachep->flags, | ||
3408 | flags); | ||
3418 | prefetchw(objp); | 3409 | prefetchw(objp); |
3419 | 3410 | ||
3411 | if (likely(objp)) | ||
3412 | kmemcheck_slab_alloc(cachep, flags, objp, obj_size(cachep)); | ||
3413 | |||
3420 | if (unlikely((flags & __GFP_ZERO) && objp)) | 3414 | if (unlikely((flags & __GFP_ZERO) && objp)) |
3421 | memset(objp, 0, obj_size(cachep)); | 3415 | memset(objp, 0, obj_size(cachep)); |
3422 | 3416 | ||
@@ -3530,8 +3524,11 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp) | |||
3530 | struct array_cache *ac = cpu_cache_get(cachep); | 3524 | struct array_cache *ac = cpu_cache_get(cachep); |
3531 | 3525 | ||
3532 | check_irq_off(); | 3526 | check_irq_off(); |
3527 | kmemleak_free_recursive(objp, cachep->flags); | ||
3533 | objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0)); | 3528 | objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0)); |
3534 | 3529 | ||
3530 | kmemcheck_slab_free(cachep, objp, obj_size(cachep)); | ||
3531 | |||
3535 | /* | 3532 | /* |
3536 | * Skip calling cache_free_alien() when the platform is not numa. | 3533 | * Skip calling cache_free_alien() when the platform is not numa. |
3537 | * This will avoid cache misses that happen while accessing slabp (which | 3534 | * This will avoid cache misses that happen while accessing slabp (which |
@@ -3539,7 +3536,7 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp) | |||
3539 | * variable to skip the call, which is mostly likely to be present in | 3536 | * variable to skip the call, which is mostly likely to be present in |
3540 | * the cache. | 3537 | * the cache. |
3541 | */ | 3538 | */ |
3542 | if (numa_platform && cache_free_alien(cachep, objp)) | 3539 | if (nr_online_nodes > 1 && cache_free_alien(cachep, objp)) |
3543 | return; | 3540 | return; |
3544 | 3541 | ||
3545 | if (likely(ac->avail < ac->limit)) { | 3542 | if (likely(ac->avail < ac->limit)) { |
@@ -3802,7 +3799,7 @@ EXPORT_SYMBOL_GPL(kmem_cache_name); | |||
3802 | /* | 3799 | /* |
3803 | * This initializes kmem_list3 or resizes various caches for all nodes. | 3800 | * This initializes kmem_list3 or resizes various caches for all nodes. |
3804 | */ | 3801 | */ |
3805 | static int alloc_kmemlist(struct kmem_cache *cachep) | 3802 | static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp) |
3806 | { | 3803 | { |
3807 | int node; | 3804 | int node; |
3808 | struct kmem_list3 *l3; | 3805 | struct kmem_list3 *l3; |
@@ -3812,7 +3809,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep) | |||
3812 | for_each_online_node(node) { | 3809 | for_each_online_node(node) { |
3813 | 3810 | ||
3814 | if (use_alien_caches) { | 3811 | if (use_alien_caches) { |
3815 | new_alien = alloc_alien_cache(node, cachep->limit); | 3812 | new_alien = alloc_alien_cache(node, cachep->limit, gfp); |
3816 | if (!new_alien) | 3813 | if (!new_alien) |
3817 | goto fail; | 3814 | goto fail; |
3818 | } | 3815 | } |
@@ -3821,7 +3818,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep) | |||
3821 | if (cachep->shared) { | 3818 | if (cachep->shared) { |
3822 | new_shared = alloc_arraycache(node, | 3819 | new_shared = alloc_arraycache(node, |
3823 | cachep->shared*cachep->batchcount, | 3820 | cachep->shared*cachep->batchcount, |
3824 | 0xbaadf00d); | 3821 | 0xbaadf00d, gfp); |
3825 | if (!new_shared) { | 3822 | if (!new_shared) { |
3826 | free_alien_cache(new_alien); | 3823 | free_alien_cache(new_alien); |
3827 | goto fail; | 3824 | goto fail; |
@@ -3850,7 +3847,7 @@ static int alloc_kmemlist(struct kmem_cache *cachep) | |||
3850 | free_alien_cache(new_alien); | 3847 | free_alien_cache(new_alien); |
3851 | continue; | 3848 | continue; |
3852 | } | 3849 | } |
3853 | l3 = kmalloc_node(sizeof(struct kmem_list3), GFP_KERNEL, node); | 3850 | l3 = kmalloc_node(sizeof(struct kmem_list3), gfp, node); |
3854 | if (!l3) { | 3851 | if (!l3) { |
3855 | free_alien_cache(new_alien); | 3852 | free_alien_cache(new_alien); |
3856 | kfree(new_shared); | 3853 | kfree(new_shared); |
@@ -3906,18 +3903,18 @@ static void do_ccupdate_local(void *info) | |||
3906 | 3903 | ||
3907 | /* Always called with the cache_chain_mutex held */ | 3904 | /* Always called with the cache_chain_mutex held */ |
3908 | static int do_tune_cpucache(struct kmem_cache *cachep, int limit, | 3905 | static int do_tune_cpucache(struct kmem_cache *cachep, int limit, |
3909 | int batchcount, int shared) | 3906 | int batchcount, int shared, gfp_t gfp) |
3910 | { | 3907 | { |
3911 | struct ccupdate_struct *new; | 3908 | struct ccupdate_struct *new; |
3912 | int i; | 3909 | int i; |
3913 | 3910 | ||
3914 | new = kzalloc(sizeof(*new), GFP_KERNEL); | 3911 | new = kzalloc(sizeof(*new), gfp); |
3915 | if (!new) | 3912 | if (!new) |
3916 | return -ENOMEM; | 3913 | return -ENOMEM; |
3917 | 3914 | ||
3918 | for_each_online_cpu(i) { | 3915 | for_each_online_cpu(i) { |
3919 | new->new[i] = alloc_arraycache(cpu_to_node(i), limit, | 3916 | new->new[i] = alloc_arraycache(cpu_to_node(i), limit, |
3920 | batchcount); | 3917 | batchcount, gfp); |
3921 | if (!new->new[i]) { | 3918 | if (!new->new[i]) { |
3922 | for (i--; i >= 0; i--) | 3919 | for (i--; i >= 0; i--) |
3923 | kfree(new->new[i]); | 3920 | kfree(new->new[i]); |
@@ -3944,11 +3941,11 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, | |||
3944 | kfree(ccold); | 3941 | kfree(ccold); |
3945 | } | 3942 | } |
3946 | kfree(new); | 3943 | kfree(new); |
3947 | return alloc_kmemlist(cachep); | 3944 | return alloc_kmemlist(cachep, gfp); |
3948 | } | 3945 | } |
3949 | 3946 | ||
3950 | /* Called with cache_chain_mutex held always */ | 3947 | /* Called with cache_chain_mutex held always */ |
3951 | static int enable_cpucache(struct kmem_cache *cachep) | 3948 | static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp) |
3952 | { | 3949 | { |
3953 | int err; | 3950 | int err; |
3954 | int limit, shared; | 3951 | int limit, shared; |
@@ -3994,7 +3991,7 @@ static int enable_cpucache(struct kmem_cache *cachep) | |||
3994 | if (limit > 32) | 3991 | if (limit > 32) |
3995 | limit = 32; | 3992 | limit = 32; |
3996 | #endif | 3993 | #endif |
3997 | err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared); | 3994 | err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared, gfp); |
3998 | if (err) | 3995 | if (err) |
3999 | printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n", | 3996 | printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n", |
4000 | cachep->name, -err); | 3997 | cachep->name, -err); |
@@ -4300,7 +4297,8 @@ ssize_t slabinfo_write(struct file *file, const char __user * buffer, | |||
4300 | res = 0; | 4297 | res = 0; |
4301 | } else { | 4298 | } else { |
4302 | res = do_tune_cpucache(cachep, limit, | 4299 | res = do_tune_cpucache(cachep, limit, |
4303 | batchcount, shared); | 4300 | batchcount, shared, |
4301 | GFP_KERNEL); | ||
4304 | } | 4302 | } |
4305 | break; | 4303 | break; |
4306 | } | 4304 | } |