aboutsummaryrefslogtreecommitdiffstats
path: root/mm/slab.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/slab.c')
-rw-r--r--mm/slab.c186
1 files changed, 98 insertions, 88 deletions
diff --git a/mm/slab.c b/mm/slab.c
index 7dfa481c96ba..bac0f4fcc216 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -490,7 +490,7 @@ static void **dbg_userword(struct kmem_cache *cachep, void *objp)
490 490
491#endif 491#endif
492 492
493#ifdef CONFIG_KMEMTRACE 493#ifdef CONFIG_TRACING
494size_t slab_buffer_size(struct kmem_cache *cachep) 494size_t slab_buffer_size(struct kmem_cache *cachep)
495{ 495{
496 return cachep->buffer_size; 496 return cachep->buffer_size;
@@ -604,6 +604,26 @@ static struct kmem_cache cache_cache = {
604 604
605#define BAD_ALIEN_MAGIC 0x01020304ul 605#define BAD_ALIEN_MAGIC 0x01020304ul
606 606
607/*
608 * chicken and egg problem: delay the per-cpu array allocation
609 * until the general caches are up.
610 */
611static enum {
612 NONE,
613 PARTIAL_AC,
614 PARTIAL_L3,
615 EARLY,
616 FULL
617} g_cpucache_up;
618
619/*
620 * used by boot code to determine if it can use slab based allocator
621 */
622int slab_is_available(void)
623{
624 return g_cpucache_up >= EARLY;
625}
626
607#ifdef CONFIG_LOCKDEP 627#ifdef CONFIG_LOCKDEP
608 628
609/* 629/*
@@ -620,40 +640,52 @@ static struct kmem_cache cache_cache = {
620static struct lock_class_key on_slab_l3_key; 640static struct lock_class_key on_slab_l3_key;
621static struct lock_class_key on_slab_alc_key; 641static struct lock_class_key on_slab_alc_key;
622 642
623static inline void init_lock_keys(void) 643static void init_node_lock_keys(int q)
624
625{ 644{
626 int q;
627 struct cache_sizes *s = malloc_sizes; 645 struct cache_sizes *s = malloc_sizes;
628 646
629 while (s->cs_size != ULONG_MAX) { 647 if (g_cpucache_up != FULL)
630 for_each_node(q) { 648 return;
631 struct array_cache **alc; 649
632 int r; 650 for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) {
633 struct kmem_list3 *l3 = s->cs_cachep->nodelists[q]; 651 struct array_cache **alc;
634 if (!l3 || OFF_SLAB(s->cs_cachep)) 652 struct kmem_list3 *l3;
635 continue; 653 int r;
636 lockdep_set_class(&l3->list_lock, &on_slab_l3_key); 654
637 alc = l3->alien; 655 l3 = s->cs_cachep->nodelists[q];
638 /* 656 if (!l3 || OFF_SLAB(s->cs_cachep))
639 * FIXME: This check for BAD_ALIEN_MAGIC 657 continue;
640 * should go away when common slab code is taught to 658 lockdep_set_class(&l3->list_lock, &on_slab_l3_key);
641 * work even without alien caches. 659 alc = l3->alien;
642 * Currently, non NUMA code returns BAD_ALIEN_MAGIC 660 /*
643 * for alloc_alien_cache, 661 * FIXME: This check for BAD_ALIEN_MAGIC
644 */ 662 * should go away when common slab code is taught to
645 if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC) 663 * work even without alien caches.
646 continue; 664 * Currently, non NUMA code returns BAD_ALIEN_MAGIC
647 for_each_node(r) { 665 * for alloc_alien_cache,
648 if (alc[r]) 666 */
649 lockdep_set_class(&alc[r]->lock, 667 if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
650 &on_slab_alc_key); 668 continue;
651 } 669 for_each_node(r) {
670 if (alc[r])
671 lockdep_set_class(&alc[r]->lock,
672 &on_slab_alc_key);
652 } 673 }
653 s++;
654 } 674 }
655} 675}
676
677static inline void init_lock_keys(void)
678{
679 int node;
680
681 for_each_node(node)
682 init_node_lock_keys(node);
683}
656#else 684#else
685static void init_node_lock_keys(int q)
686{
687}
688
657static inline void init_lock_keys(void) 689static inline void init_lock_keys(void)
658{ 690{
659} 691}
@@ -665,27 +697,7 @@ static inline void init_lock_keys(void)
665static DEFINE_MUTEX(cache_chain_mutex); 697static DEFINE_MUTEX(cache_chain_mutex);
666static struct list_head cache_chain; 698static struct list_head cache_chain;
667 699
668/* 700static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
669 * chicken and egg problem: delay the per-cpu array allocation
670 * until the general caches are up.
671 */
672static enum {
673 NONE,
674 PARTIAL_AC,
675 PARTIAL_L3,
676 EARLY,
677 FULL
678} g_cpucache_up;
679
680/*
681 * used by boot code to determine if it can use slab based allocator
682 */
683int slab_is_available(void)
684{
685 return g_cpucache_up >= EARLY;
686}
687
688static DEFINE_PER_CPU(struct delayed_work, reap_work);
689 701
690static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) 702static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
691{ 703{
@@ -826,7 +838,7 @@ __setup("noaliencache", noaliencache_setup);
826 * objects freed on different nodes from which they were allocated) and the 838 * objects freed on different nodes from which they were allocated) and the
827 * flushing of remote pcps by calling drain_node_pages. 839 * flushing of remote pcps by calling drain_node_pages.
828 */ 840 */
829static DEFINE_PER_CPU(unsigned long, reap_node); 841static DEFINE_PER_CPU(unsigned long, slab_reap_node);
830 842
831static void init_reap_node(int cpu) 843static void init_reap_node(int cpu)
832{ 844{
@@ -836,17 +848,17 @@ static void init_reap_node(int cpu)
836 if (node == MAX_NUMNODES) 848 if (node == MAX_NUMNODES)
837 node = first_node(node_online_map); 849 node = first_node(node_online_map);
838 850
839 per_cpu(reap_node, cpu) = node; 851 per_cpu(slab_reap_node, cpu) = node;
840} 852}
841 853
842static void next_reap_node(void) 854static void next_reap_node(void)
843{ 855{
844 int node = __get_cpu_var(reap_node); 856 int node = __get_cpu_var(slab_reap_node);
845 857
846 node = next_node(node, node_online_map); 858 node = next_node(node, node_online_map);
847 if (unlikely(node >= MAX_NUMNODES)) 859 if (unlikely(node >= MAX_NUMNODES))
848 node = first_node(node_online_map); 860 node = first_node(node_online_map);
849 __get_cpu_var(reap_node) = node; 861 __get_cpu_var(slab_reap_node) = node;
850} 862}
851 863
852#else 864#else
@@ -863,7 +875,7 @@ static void next_reap_node(void)
863 */ 875 */
864static void __cpuinit start_cpu_timer(int cpu) 876static void __cpuinit start_cpu_timer(int cpu)
865{ 877{
866 struct delayed_work *reap_work = &per_cpu(reap_work, cpu); 878 struct delayed_work *reap_work = &per_cpu(slab_reap_work, cpu);
867 879
868 /* 880 /*
869 * When this gets called from do_initcalls via cpucache_init(), 881 * When this gets called from do_initcalls via cpucache_init(),
@@ -923,7 +935,6 @@ static int transfer_objects(struct array_cache *to,
923 935
924 from->avail -= nr; 936 from->avail -= nr;
925 to->avail += nr; 937 to->avail += nr;
926 to->touched = 1;
927 return nr; 938 return nr;
928} 939}
929 940
@@ -971,13 +982,11 @@ static struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
971 982
972 if (limit > 1) 983 if (limit > 1)
973 limit = 12; 984 limit = 12;
974 ac_ptr = kmalloc_node(memsize, gfp, node); 985 ac_ptr = kzalloc_node(memsize, gfp, node);
975 if (ac_ptr) { 986 if (ac_ptr) {
976 for_each_node(i) { 987 for_each_node(i) {
977 if (i == node || !node_online(i)) { 988 if (i == node || !node_online(i))
978 ac_ptr[i] = NULL;
979 continue; 989 continue;
980 }
981 ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d, gfp); 990 ac_ptr[i] = alloc_arraycache(node, limit, 0xbaadf00d, gfp);
982 if (!ac_ptr[i]) { 991 if (!ac_ptr[i]) {
983 for (i--; i >= 0; i--) 992 for (i--; i >= 0; i--)
@@ -1027,7 +1036,7 @@ static void __drain_alien_cache(struct kmem_cache *cachep,
1027 */ 1036 */
1028static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3) 1037static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3)
1029{ 1038{
1030 int node = __get_cpu_var(reap_node); 1039 int node = __get_cpu_var(slab_reap_node);
1031 1040
1032 if (l3->alien) { 1041 if (l3->alien) {
1033 struct array_cache *ac = l3->alien[node]; 1042 struct array_cache *ac = l3->alien[node];
@@ -1120,7 +1129,7 @@ static void __cpuinit cpuup_canceled(long cpu)
1120 if (nc) 1129 if (nc)
1121 free_block(cachep, nc->entry, nc->avail, node); 1130 free_block(cachep, nc->entry, nc->avail, node);
1122 1131
1123 if (!cpus_empty(*mask)) { 1132 if (!cpumask_empty(mask)) {
1124 spin_unlock_irq(&l3->list_lock); 1133 spin_unlock_irq(&l3->list_lock);
1125 goto free_array_cache; 1134 goto free_array_cache;
1126 } 1135 }
@@ -1254,6 +1263,8 @@ static int __cpuinit cpuup_prepare(long cpu)
1254 kfree(shared); 1263 kfree(shared);
1255 free_alien_cache(alien); 1264 free_alien_cache(alien);
1256 } 1265 }
1266 init_node_lock_keys(node);
1267
1257 return 0; 1268 return 0;
1258bad: 1269bad:
1259 cpuup_canceled(cpu); 1270 cpuup_canceled(cpu);
@@ -1286,9 +1297,9 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb,
1286 * anything expensive but will only modify reap_work 1297 * anything expensive but will only modify reap_work
1287 * and reschedule the timer. 1298 * and reschedule the timer.
1288 */ 1299 */
1289 cancel_rearming_delayed_work(&per_cpu(reap_work, cpu)); 1300 cancel_rearming_delayed_work(&per_cpu(slab_reap_work, cpu));
1290 /* Now the cache_reaper is guaranteed to be not running. */ 1301 /* Now the cache_reaper is guaranteed to be not running. */
1291 per_cpu(reap_work, cpu).work.func = NULL; 1302 per_cpu(slab_reap_work, cpu).work.func = NULL;
1292 break; 1303 break;
1293 case CPU_DOWN_FAILED: 1304 case CPU_DOWN_FAILED:
1294 case CPU_DOWN_FAILED_FROZEN: 1305 case CPU_DOWN_FAILED_FROZEN:
@@ -2261,9 +2272,11 @@ kmem_cache_create (const char *name, size_t size, size_t align,
2261 /* 2272 /*
2262 * Determine if the slab management is 'on' or 'off' slab. 2273 * Determine if the slab management is 'on' or 'off' slab.
2263 * (bootstrapping cannot cope with offslab caches so don't do 2274 * (bootstrapping cannot cope with offslab caches so don't do
2264 * it too early on.) 2275 * it too early on. Always use on-slab management when
2276 * SLAB_NOLEAKTRACE to avoid recursive calls into kmemleak)
2265 */ 2277 */
2266 if ((size >= (PAGE_SIZE >> 3)) && !slab_early_init) 2278 if ((size >= (PAGE_SIZE >> 3)) && !slab_early_init &&
2279 !(flags & SLAB_NOLEAKTRACE))
2267 /* 2280 /*
2268 * Size is large, assume best to place the slab management obj 2281 * Size is large, assume best to place the slab management obj
2269 * off-slab (should allow better packing of objs). 2282 * off-slab (should allow better packing of objs).
@@ -2582,8 +2595,8 @@ static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp,
2582 * kmemleak does not treat the ->s_mem pointer as a reference 2595 * kmemleak does not treat the ->s_mem pointer as a reference
2583 * to the object. Otherwise we will not report the leak. 2596 * to the object. Otherwise we will not report the leak.
2584 */ 2597 */
2585 kmemleak_scan_area(slabp, offsetof(struct slab, list), 2598 kmemleak_scan_area(&slabp->list, sizeof(struct list_head),
2586 sizeof(struct list_head), local_flags); 2599 local_flags);
2587 if (!slabp) 2600 if (!slabp)
2588 return NULL; 2601 return NULL;
2589 } else { 2602 } else {
@@ -2947,8 +2960,10 @@ retry:
2947 spin_lock(&l3->list_lock); 2960 spin_lock(&l3->list_lock);
2948 2961
2949 /* See if we can refill from the shared array */ 2962 /* See if we can refill from the shared array */
2950 if (l3->shared && transfer_objects(ac, l3->shared, batchcount)) 2963 if (l3->shared && transfer_objects(ac, l3->shared, batchcount)) {
2964 l3->shared->touched = 1;
2951 goto alloc_done; 2965 goto alloc_done;
2966 }
2952 2967
2953 while (batchcount > 0) { 2968 while (batchcount > 0) {
2954 struct list_head *entry; 2969 struct list_head *entry;
@@ -3085,7 +3100,7 @@ static bool slab_should_failslab(struct kmem_cache *cachep, gfp_t flags)
3085 if (cachep == &cache_cache) 3100 if (cachep == &cache_cache)
3086 return false; 3101 return false;
3087 3102
3088 return should_failslab(obj_size(cachep), flags); 3103 return should_failslab(obj_size(cachep), flags, cachep->flags);
3089} 3104}
3090 3105
3091static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) 3106static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
@@ -3103,13 +3118,19 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3103 } else { 3118 } else {
3104 STATS_INC_ALLOCMISS(cachep); 3119 STATS_INC_ALLOCMISS(cachep);
3105 objp = cache_alloc_refill(cachep, flags); 3120 objp = cache_alloc_refill(cachep, flags);
3121 /*
3122 * the 'ac' may be updated by cache_alloc_refill(),
3123 * and kmemleak_erase() requires its correct value.
3124 */
3125 ac = cpu_cache_get(cachep);
3106 } 3126 }
3107 /* 3127 /*
3108 * To avoid a false negative, if an object that is in one of the 3128 * To avoid a false negative, if an object that is in one of the
3109 * per-CPU caches is leaked, we need to make sure kmemleak doesn't 3129 * per-CPU caches is leaked, we need to make sure kmemleak doesn't
3110 * treat the array pointers as a reference to the object. 3130 * treat the array pointers as a reference to the object.
3111 */ 3131 */
3112 kmemleak_erase(&ac->entry[ac->avail]); 3132 if (objp)
3133 kmemleak_erase(&ac->entry[ac->avail]);
3113 return objp; 3134 return objp;
3114} 3135}
3115 3136
@@ -3306,7 +3327,7 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
3306 cache_alloc_debugcheck_before(cachep, flags); 3327 cache_alloc_debugcheck_before(cachep, flags);
3307 local_irq_save(save_flags); 3328 local_irq_save(save_flags);
3308 3329
3309 if (unlikely(nodeid == -1)) 3330 if (nodeid == -1)
3310 nodeid = numa_node_id(); 3331 nodeid = numa_node_id();
3311 3332
3312 if (unlikely(!cachep->nodelists[nodeid])) { 3333 if (unlikely(!cachep->nodelists[nodeid])) {
@@ -3558,7 +3579,7 @@ void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3558} 3579}
3559EXPORT_SYMBOL(kmem_cache_alloc); 3580EXPORT_SYMBOL(kmem_cache_alloc);
3560 3581
3561#ifdef CONFIG_KMEMTRACE 3582#ifdef CONFIG_TRACING
3562void *kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags) 3583void *kmem_cache_alloc_notrace(struct kmem_cache *cachep, gfp_t flags)
3563{ 3584{
3564 return __cache_alloc(cachep, flags, __builtin_return_address(0)); 3585 return __cache_alloc(cachep, flags, __builtin_return_address(0));
@@ -3581,21 +3602,10 @@ EXPORT_SYMBOL(kmem_cache_alloc_notrace);
3581 */ 3602 */
3582int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr) 3603int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr)
3583{ 3604{
3584 unsigned long addr = (unsigned long)ptr;
3585 unsigned long min_addr = PAGE_OFFSET;
3586 unsigned long align_mask = BYTES_PER_WORD - 1;
3587 unsigned long size = cachep->buffer_size; 3605 unsigned long size = cachep->buffer_size;
3588 struct page *page; 3606 struct page *page;
3589 3607
3590 if (unlikely(addr < min_addr)) 3608 if (unlikely(!kern_ptr_validate(ptr, size)))
3591 goto out;
3592 if (unlikely(addr > (unsigned long)high_memory - size))
3593 goto out;
3594 if (unlikely(addr & align_mask))
3595 goto out;
3596 if (unlikely(!kern_addr_valid(addr)))
3597 goto out;
3598 if (unlikely(!kern_addr_valid(addr + size - 1)))
3599 goto out; 3609 goto out;
3600 page = virt_to_page(ptr); 3610 page = virt_to_page(ptr);
3601 if (unlikely(!PageSlab(page))) 3611 if (unlikely(!PageSlab(page)))
@@ -3621,7 +3631,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
3621} 3631}
3622EXPORT_SYMBOL(kmem_cache_alloc_node); 3632EXPORT_SYMBOL(kmem_cache_alloc_node);
3623 3633
3624#ifdef CONFIG_KMEMTRACE 3634#ifdef CONFIG_TRACING
3625void *kmem_cache_alloc_node_notrace(struct kmem_cache *cachep, 3635void *kmem_cache_alloc_node_notrace(struct kmem_cache *cachep,
3626 gfp_t flags, 3636 gfp_t flags,
3627 int nodeid) 3637 int nodeid)
@@ -3649,7 +3659,7 @@ __do_kmalloc_node(size_t size, gfp_t flags, int node, void *caller)
3649 return ret; 3659 return ret;
3650} 3660}
3651 3661
3652#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_KMEMTRACE) 3662#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_TRACING)
3653void *__kmalloc_node(size_t size, gfp_t flags, int node) 3663void *__kmalloc_node(size_t size, gfp_t flags, int node)
3654{ 3664{
3655 return __do_kmalloc_node(size, flags, node, 3665 return __do_kmalloc_node(size, flags, node,
@@ -3669,7 +3679,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
3669 return __do_kmalloc_node(size, flags, node, NULL); 3679 return __do_kmalloc_node(size, flags, node, NULL);
3670} 3680}
3671EXPORT_SYMBOL(__kmalloc_node); 3681EXPORT_SYMBOL(__kmalloc_node);
3672#endif /* CONFIG_DEBUG_SLAB */ 3682#endif /* CONFIG_DEBUG_SLAB || CONFIG_TRACING */
3673#endif /* CONFIG_NUMA */ 3683#endif /* CONFIG_NUMA */
3674 3684
3675/** 3685/**
@@ -3701,7 +3711,7 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
3701} 3711}
3702 3712
3703 3713
3704#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_KMEMTRACE) 3714#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_TRACING)
3705void *__kmalloc(size_t size, gfp_t flags) 3715void *__kmalloc(size_t size, gfp_t flags)
3706{ 3716{
3707 return __do_kmalloc(size, flags, __builtin_return_address(0)); 3717 return __do_kmalloc(size, flags, __builtin_return_address(0));