aboutsummaryrefslogtreecommitdiffstats
path: root/mm/slab.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/slab.c')
-rw-r--r--mm/slab.c249
1 files changed, 122 insertions, 127 deletions
diff --git a/mm/slab.c b/mm/slab.c
index f1b644eb39d8..98ac20bc0de9 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -331,6 +331,8 @@ static __always_inline int index_of(const size_t size)
331 return 0; 331 return 0;
332} 332}
333 333
334static int slab_early_init = 1;
335
334#define INDEX_AC index_of(sizeof(struct arraycache_init)) 336#define INDEX_AC index_of(sizeof(struct arraycache_init))
335#define INDEX_L3 index_of(sizeof(struct kmem_list3)) 337#define INDEX_L3 index_of(sizeof(struct kmem_list3))
336 338
@@ -592,6 +594,7 @@ static inline struct kmem_cache *page_get_cache(struct page *page)
592{ 594{
593 if (unlikely(PageCompound(page))) 595 if (unlikely(PageCompound(page)))
594 page = (struct page *)page_private(page); 596 page = (struct page *)page_private(page);
597 BUG_ON(!PageSlab(page));
595 return (struct kmem_cache *)page->lru.next; 598 return (struct kmem_cache *)page->lru.next;
596} 599}
597 600
@@ -604,6 +607,7 @@ static inline struct slab *page_get_slab(struct page *page)
604{ 607{
605 if (unlikely(PageCompound(page))) 608 if (unlikely(PageCompound(page)))
606 page = (struct page *)page_private(page); 609 page = (struct page *)page_private(page);
610 BUG_ON(!PageSlab(page));
607 return (struct slab *)page->lru.prev; 611 return (struct slab *)page->lru.prev;
608} 612}
609 613
@@ -1024,6 +1028,40 @@ static void drain_alien_cache(struct kmem_cache *cachep,
1024 } 1028 }
1025 } 1029 }
1026} 1030}
1031
1032static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
1033{
1034 struct slab *slabp = virt_to_slab(objp);
1035 int nodeid = slabp->nodeid;
1036 struct kmem_list3 *l3;
1037 struct array_cache *alien = NULL;
1038
1039 /*
1040 * Make sure we are not freeing a object from another node to the array
1041 * cache on this cpu.
1042 */
1043 if (likely(slabp->nodeid == numa_node_id()))
1044 return 0;
1045
1046 l3 = cachep->nodelists[numa_node_id()];
1047 STATS_INC_NODEFREES(cachep);
1048 if (l3->alien && l3->alien[nodeid]) {
1049 alien = l3->alien[nodeid];
1050 spin_lock(&alien->lock);
1051 if (unlikely(alien->avail == alien->limit)) {
1052 STATS_INC_ACOVERFLOW(cachep);
1053 __drain_alien_cache(cachep, alien, nodeid);
1054 }
1055 alien->entry[alien->avail++] = objp;
1056 spin_unlock(&alien->lock);
1057 } else {
1058 spin_lock(&(cachep->nodelists[nodeid])->list_lock);
1059 free_block(cachep, &objp, 1, nodeid);
1060 spin_unlock(&(cachep->nodelists[nodeid])->list_lock);
1061 }
1062 return 1;
1063}
1064
1027#else 1065#else
1028 1066
1029#define drain_alien_cache(cachep, alien) do { } while (0) 1067#define drain_alien_cache(cachep, alien) do { } while (0)
@@ -1038,6 +1076,11 @@ static inline void free_alien_cache(struct array_cache **ac_ptr)
1038{ 1076{
1039} 1077}
1040 1078
1079static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
1080{
1081 return 0;
1082}
1083
1041#endif 1084#endif
1042 1085
1043static int cpuup_callback(struct notifier_block *nfb, 1086static int cpuup_callback(struct notifier_block *nfb,
@@ -1335,6 +1378,8 @@ void __init kmem_cache_init(void)
1335 NULL, NULL); 1378 NULL, NULL);
1336 } 1379 }
1337 1380
1381 slab_early_init = 0;
1382
1338 while (sizes->cs_size != ULONG_MAX) { 1383 while (sizes->cs_size != ULONG_MAX) {
1339 /* 1384 /*
1340 * For performance, all the general caches are L1 aligned. 1385 * For performance, all the general caches are L1 aligned.
@@ -1450,31 +1495,29 @@ __initcall(cpucache_init);
1450static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) 1495static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
1451{ 1496{
1452 struct page *page; 1497 struct page *page;
1453 void *addr; 1498 int nr_pages;
1454 int i; 1499 int i;
1455 1500
1456 flags |= cachep->gfpflags;
1457#ifndef CONFIG_MMU 1501#ifndef CONFIG_MMU
1458 /* nommu uses slab's for process anonymous memory allocations, so 1502 /*
1459 * requires __GFP_COMP to properly refcount higher order allocations" 1503 * Nommu uses slab's for process anonymous memory allocations, and thus
1504 * requires __GFP_COMP to properly refcount higher order allocations
1460 */ 1505 */
1461 page = alloc_pages_node(nodeid, (flags | __GFP_COMP), cachep->gfporder); 1506 flags |= __GFP_COMP;
1462#else
1463 page = alloc_pages_node(nodeid, flags, cachep->gfporder);
1464#endif 1507#endif
1508 flags |= cachep->gfpflags;
1509
1510 page = alloc_pages_node(nodeid, flags, cachep->gfporder);
1465 if (!page) 1511 if (!page)
1466 return NULL; 1512 return NULL;
1467 addr = page_address(page);
1468 1513
1469 i = (1 << cachep->gfporder); 1514 nr_pages = (1 << cachep->gfporder);
1470 if (cachep->flags & SLAB_RECLAIM_ACCOUNT) 1515 if (cachep->flags & SLAB_RECLAIM_ACCOUNT)
1471 atomic_add(i, &slab_reclaim_pages); 1516 atomic_add(nr_pages, &slab_reclaim_pages);
1472 add_page_state(nr_slab, i); 1517 add_page_state(nr_slab, nr_pages);
1473 while (i--) { 1518 for (i = 0; i < nr_pages; i++)
1474 __SetPageSlab(page); 1519 __SetPageSlab(page + i);
1475 page++; 1520 return page_address(page);
1476 }
1477 return addr;
1478} 1521}
1479 1522
1480/* 1523/*
@@ -1913,8 +1956,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
1913 void (*dtor)(void*, struct kmem_cache *, unsigned long)) 1956 void (*dtor)(void*, struct kmem_cache *, unsigned long))
1914{ 1957{
1915 size_t left_over, slab_size, ralign; 1958 size_t left_over, slab_size, ralign;
1916 struct kmem_cache *cachep = NULL; 1959 struct kmem_cache *cachep = NULL, *pc;
1917 struct list_head *p;
1918 1960
1919 /* 1961 /*
1920 * Sanity checks... these are all serious usage bugs. 1962 * Sanity checks... these are all serious usage bugs.
@@ -1934,8 +1976,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
1934 1976
1935 mutex_lock(&cache_chain_mutex); 1977 mutex_lock(&cache_chain_mutex);
1936 1978
1937 list_for_each(p, &cache_chain) { 1979 list_for_each_entry(pc, &cache_chain, next) {
1938 struct kmem_cache *pc = list_entry(p, struct kmem_cache, next);
1939 mm_segment_t old_fs = get_fs(); 1980 mm_segment_t old_fs = get_fs();
1940 char tmp; 1981 char tmp;
1941 int res; 1982 int res;
@@ -2069,8 +2110,12 @@ kmem_cache_create (const char *name, size_t size, size_t align,
2069#endif 2110#endif
2070#endif 2111#endif
2071 2112
2072 /* Determine if the slab management is 'on' or 'off' slab. */ 2113 /*
2073 if (size >= (PAGE_SIZE >> 3)) 2114 * Determine if the slab management is 'on' or 'off' slab.
2115 * (bootstrapping cannot cope with offslab caches so don't do
2116 * it too early on.)
2117 */
2118 if ((size >= (PAGE_SIZE >> 3)) && !slab_early_init)
2074 /* 2119 /*
2075 * Size is large, assume best to place the slab management obj 2120 * Size is large, assume best to place the slab management obj
2076 * off-slab (should allow better packing of objs). 2121 * off-slab (should allow better packing of objs).
@@ -2460,23 +2505,28 @@ static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp,
2460 slabp->inuse--; 2505 slabp->inuse--;
2461} 2506}
2462 2507
2463static void set_slab_attr(struct kmem_cache *cachep, struct slab *slabp, 2508/*
2464 void *objp) 2509 * Map pages beginning at addr to the given cache and slab. This is required
2510 * for the slab allocator to be able to lookup the cache and slab of a
2511 * virtual address for kfree, ksize, kmem_ptr_validate, and slab debugging.
2512 */
2513static void slab_map_pages(struct kmem_cache *cache, struct slab *slab,
2514 void *addr)
2465{ 2515{
2466 int i; 2516 int nr_pages;
2467 struct page *page; 2517 struct page *page;
2468 2518
2469 /* Nasty!!!!!! I hope this is OK. */ 2519 page = virt_to_page(addr);
2470 page = virt_to_page(objp);
2471 2520
2472 i = 1; 2521 nr_pages = 1;
2473 if (likely(!PageCompound(page))) 2522 if (likely(!PageCompound(page)))
2474 i <<= cachep->gfporder; 2523 nr_pages <<= cache->gfporder;
2524
2475 do { 2525 do {
2476 page_set_cache(page, cachep); 2526 page_set_cache(page, cache);
2477 page_set_slab(page, slabp); 2527 page_set_slab(page, slab);
2478 page++; 2528 page++;
2479 } while (--i); 2529 } while (--nr_pages);
2480} 2530}
2481 2531
2482/* 2532/*
@@ -2548,7 +2598,7 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid)
2548 goto opps1; 2598 goto opps1;
2549 2599
2550 slabp->nodeid = nodeid; 2600 slabp->nodeid = nodeid;
2551 set_slab_attr(cachep, slabp, objp); 2601 slab_map_pages(cachep, slabp, objp);
2552 2602
2553 cache_init_objs(cachep, slabp, ctor_flags); 2603 cache_init_objs(cachep, slabp, ctor_flags);
2554 2604
@@ -2596,6 +2646,28 @@ static void kfree_debugcheck(const void *objp)
2596 } 2646 }
2597} 2647}
2598 2648
2649static inline void verify_redzone_free(struct kmem_cache *cache, void *obj)
2650{
2651 unsigned long redzone1, redzone2;
2652
2653 redzone1 = *dbg_redzone1(cache, obj);
2654 redzone2 = *dbg_redzone2(cache, obj);
2655
2656 /*
2657 * Redzone is ok.
2658 */
2659 if (redzone1 == RED_ACTIVE && redzone2 == RED_ACTIVE)
2660 return;
2661
2662 if (redzone1 == RED_INACTIVE && redzone2 == RED_INACTIVE)
2663 slab_error(cache, "double free detected");
2664 else
2665 slab_error(cache, "memory outside object was overwritten");
2666
2667 printk(KERN_ERR "%p: redzone 1:0x%lx, redzone 2:0x%lx.\n",
2668 obj, redzone1, redzone2);
2669}
2670
2599static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, 2671static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
2600 void *caller) 2672 void *caller)
2601{ 2673{
@@ -2607,27 +2679,10 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
2607 kfree_debugcheck(objp); 2679 kfree_debugcheck(objp);
2608 page = virt_to_page(objp); 2680 page = virt_to_page(objp);
2609 2681
2610 if (page_get_cache(page) != cachep) {
2611 printk(KERN_ERR "mismatch in kmem_cache_free: expected "
2612 "cache %p, got %p\n",
2613 page_get_cache(page), cachep);
2614 printk(KERN_ERR "%p is %s.\n", cachep, cachep->name);
2615 printk(KERN_ERR "%p is %s.\n", page_get_cache(page),
2616 page_get_cache(page)->name);
2617 WARN_ON(1);
2618 }
2619 slabp = page_get_slab(page); 2682 slabp = page_get_slab(page);
2620 2683
2621 if (cachep->flags & SLAB_RED_ZONE) { 2684 if (cachep->flags & SLAB_RED_ZONE) {
2622 if (*dbg_redzone1(cachep, objp) != RED_ACTIVE || 2685 verify_redzone_free(cachep, objp);
2623 *dbg_redzone2(cachep, objp) != RED_ACTIVE) {
2624 slab_error(cachep, "double free, or memory outside"
2625 " object was overwritten");
2626 printk(KERN_ERR "%p: redzone 1:0x%lx, "
2627 "redzone 2:0x%lx.\n",
2628 objp, *dbg_redzone1(cachep, objp),
2629 *dbg_redzone2(cachep, objp));
2630 }
2631 *dbg_redzone1(cachep, objp) = RED_INACTIVE; 2686 *dbg_redzone1(cachep, objp) = RED_INACTIVE;
2632 *dbg_redzone2(cachep, objp) = RED_INACTIVE; 2687 *dbg_redzone2(cachep, objp) = RED_INACTIVE;
2633 } 2688 }
@@ -3087,41 +3142,9 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp)
3087 check_irq_off(); 3142 check_irq_off();
3088 objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0)); 3143 objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));
3089 3144
3090 /* Make sure we are not freeing a object from another 3145 if (cache_free_alien(cachep, objp))
3091 * node to the array cache on this cpu. 3146 return;
3092 */ 3147
3093#ifdef CONFIG_NUMA
3094 {
3095 struct slab *slabp;
3096 slabp = virt_to_slab(objp);
3097 if (unlikely(slabp->nodeid != numa_node_id())) {
3098 struct array_cache *alien = NULL;
3099 int nodeid = slabp->nodeid;
3100 struct kmem_list3 *l3;
3101
3102 l3 = cachep->nodelists[numa_node_id()];
3103 STATS_INC_NODEFREES(cachep);
3104 if (l3->alien && l3->alien[nodeid]) {
3105 alien = l3->alien[nodeid];
3106 spin_lock(&alien->lock);
3107 if (unlikely(alien->avail == alien->limit)) {
3108 STATS_INC_ACOVERFLOW(cachep);
3109 __drain_alien_cache(cachep,
3110 alien, nodeid);
3111 }
3112 alien->entry[alien->avail++] = objp;
3113 spin_unlock(&alien->lock);
3114 } else {
3115 spin_lock(&(cachep->nodelists[nodeid])->
3116 list_lock);
3117 free_block(cachep, &objp, 1, nodeid);
3118 spin_unlock(&(cachep->nodelists[nodeid])->
3119 list_lock);
3120 }
3121 return;
3122 }
3123 }
3124#endif
3125 if (likely(ac->avail < ac->limit)) { 3148 if (likely(ac->avail < ac->limit)) {
3126 STATS_INC_FREEHIT(cachep); 3149 STATS_INC_FREEHIT(cachep);
3127 ac->entry[ac->avail++] = objp; 3150 ac->entry[ac->avail++] = objp;
@@ -3254,26 +3277,10 @@ EXPORT_SYMBOL(kmalloc_node);
3254#endif 3277#endif
3255 3278
3256/** 3279/**
3257 * kmalloc - allocate memory 3280 * __do_kmalloc - allocate memory
3258 * @size: how many bytes of memory are required. 3281 * @size: how many bytes of memory are required.
3259 * @flags: the type of memory to allocate. 3282 * @flags: the type of memory to allocate (see kmalloc).
3260 * @caller: function caller for debug tracking of the caller 3283 * @caller: function caller for debug tracking of the caller
3261 *
3262 * kmalloc is the normal method of allocating memory
3263 * in the kernel.
3264 *
3265 * The @flags argument may be one of:
3266 *
3267 * %GFP_USER - Allocate memory on behalf of user. May sleep.
3268 *
3269 * %GFP_KERNEL - Allocate normal kernel ram. May sleep.
3270 *
3271 * %GFP_ATOMIC - Allocation will not sleep. Use inside interrupt handlers.
3272 *
3273 * Additionally, the %GFP_DMA flag may be set to indicate the memory
3274 * must be suitable for DMA. This can mean different things on different
3275 * platforms. For example, on i386, it means that the memory must come
3276 * from the first 16MB.
3277 */ 3284 */
3278static __always_inline void *__do_kmalloc(size_t size, gfp_t flags, 3285static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
3279 void *caller) 3286 void *caller)
@@ -3371,6 +3378,8 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp)
3371{ 3378{
3372 unsigned long flags; 3379 unsigned long flags;
3373 3380
3381 BUG_ON(virt_to_cache(objp) != cachep);
3382
3374 local_irq_save(flags); 3383 local_irq_save(flags);
3375 __cache_free(cachep, objp); 3384 __cache_free(cachep, objp);
3376 local_irq_restore(flags); 3385 local_irq_restore(flags);
@@ -3680,7 +3689,7 @@ void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
3680 */ 3689 */
3681static void cache_reap(void *unused) 3690static void cache_reap(void *unused)
3682{ 3691{
3683 struct list_head *walk; 3692 struct kmem_cache *searchp;
3684 struct kmem_list3 *l3; 3693 struct kmem_list3 *l3;
3685 int node = numa_node_id(); 3694 int node = numa_node_id();
3686 3695
@@ -3691,13 +3700,11 @@ static void cache_reap(void *unused)
3691 return; 3700 return;
3692 } 3701 }
3693 3702
3694 list_for_each(walk, &cache_chain) { 3703 list_for_each_entry(searchp, &cache_chain, next) {
3695 struct kmem_cache *searchp;
3696 struct list_head *p; 3704 struct list_head *p;
3697 int tofree; 3705 int tofree;
3698 struct slab *slabp; 3706 struct slab *slabp;
3699 3707
3700 searchp = list_entry(walk, struct kmem_cache, next);
3701 check_irq_on(); 3708 check_irq_on();
3702 3709
3703 /* 3710 /*
@@ -3825,7 +3832,6 @@ static void s_stop(struct seq_file *m, void *p)
3825static int s_show(struct seq_file *m, void *p) 3832static int s_show(struct seq_file *m, void *p)
3826{ 3833{
3827 struct kmem_cache *cachep = p; 3834 struct kmem_cache *cachep = p;
3828 struct list_head *q;
3829 struct slab *slabp; 3835 struct slab *slabp;
3830 unsigned long active_objs; 3836 unsigned long active_objs;
3831 unsigned long num_objs; 3837 unsigned long num_objs;
@@ -3846,15 +3852,13 @@ static int s_show(struct seq_file *m, void *p)
3846 check_irq_on(); 3852 check_irq_on();
3847 spin_lock_irq(&l3->list_lock); 3853 spin_lock_irq(&l3->list_lock);
3848 3854
3849 list_for_each(q, &l3->slabs_full) { 3855 list_for_each_entry(slabp, &l3->slabs_full, list) {
3850 slabp = list_entry(q, struct slab, list);
3851 if (slabp->inuse != cachep->num && !error) 3856 if (slabp->inuse != cachep->num && !error)
3852 error = "slabs_full accounting error"; 3857 error = "slabs_full accounting error";
3853 active_objs += cachep->num; 3858 active_objs += cachep->num;
3854 active_slabs++; 3859 active_slabs++;
3855 } 3860 }
3856 list_for_each(q, &l3->slabs_partial) { 3861 list_for_each_entry(slabp, &l3->slabs_partial, list) {
3857 slabp = list_entry(q, struct slab, list);
3858 if (slabp->inuse == cachep->num && !error) 3862 if (slabp->inuse == cachep->num && !error)
3859 error = "slabs_partial inuse accounting error"; 3863 error = "slabs_partial inuse accounting error";
3860 if (!slabp->inuse && !error) 3864 if (!slabp->inuse && !error)
@@ -3862,8 +3866,7 @@ static int s_show(struct seq_file *m, void *p)
3862 active_objs += slabp->inuse; 3866 active_objs += slabp->inuse;
3863 active_slabs++; 3867 active_slabs++;
3864 } 3868 }
3865 list_for_each(q, &l3->slabs_free) { 3869 list_for_each_entry(slabp, &l3->slabs_free, list) {
3866 slabp = list_entry(q, struct slab, list);
3867 if (slabp->inuse && !error) 3870 if (slabp->inuse && !error)
3868 error = "slabs_free/inuse accounting error"; 3871 error = "slabs_free/inuse accounting error";
3869 num_slabs++; 3872 num_slabs++;
@@ -3956,7 +3959,7 @@ ssize_t slabinfo_write(struct file *file, const char __user * buffer,
3956{ 3959{
3957 char kbuf[MAX_SLABINFO_WRITE + 1], *tmp; 3960 char kbuf[MAX_SLABINFO_WRITE + 1], *tmp;
3958 int limit, batchcount, shared, res; 3961 int limit, batchcount, shared, res;
3959 struct list_head *p; 3962 struct kmem_cache *cachep;
3960 3963
3961 if (count > MAX_SLABINFO_WRITE) 3964 if (count > MAX_SLABINFO_WRITE)
3962 return -EINVAL; 3965 return -EINVAL;
@@ -3975,10 +3978,7 @@ ssize_t slabinfo_write(struct file *file, const char __user * buffer,
3975 /* Find the cache in the chain of caches. */ 3978 /* Find the cache in the chain of caches. */
3976 mutex_lock(&cache_chain_mutex); 3979 mutex_lock(&cache_chain_mutex);
3977 res = -EINVAL; 3980 res = -EINVAL;
3978 list_for_each(p, &cache_chain) { 3981 list_for_each_entry(cachep, &cache_chain, next) {
3979 struct kmem_cache *cachep;
3980
3981 cachep = list_entry(p, struct kmem_cache, next);
3982 if (!strcmp(cachep->name, kbuf)) { 3982 if (!strcmp(cachep->name, kbuf)) {
3983 if (limit < 1 || batchcount < 1 || 3983 if (limit < 1 || batchcount < 1 ||
3984 batchcount > limit || shared < 0) { 3984 batchcount > limit || shared < 0) {
@@ -4080,7 +4080,6 @@ static void show_symbol(struct seq_file *m, unsigned long address)
4080static int leaks_show(struct seq_file *m, void *p) 4080static int leaks_show(struct seq_file *m, void *p)
4081{ 4081{
4082 struct kmem_cache *cachep = p; 4082 struct kmem_cache *cachep = p;
4083 struct list_head *q;
4084 struct slab *slabp; 4083 struct slab *slabp;
4085 struct kmem_list3 *l3; 4084 struct kmem_list3 *l3;
4086 const char *name; 4085 const char *name;
@@ -4105,14 +4104,10 @@ static int leaks_show(struct seq_file *m, void *p)
4105 check_irq_on(); 4104 check_irq_on();
4106 spin_lock_irq(&l3->list_lock); 4105 spin_lock_irq(&l3->list_lock);
4107 4106
4108 list_for_each(q, &l3->slabs_full) { 4107 list_for_each_entry(slabp, &l3->slabs_full, list)
4109 slabp = list_entry(q, struct slab, list);
4110 handle_slab(n, cachep, slabp); 4108 handle_slab(n, cachep, slabp);
4111 } 4109 list_for_each_entry(slabp, &l3->slabs_partial, list)
4112 list_for_each(q, &l3->slabs_partial) {
4113 slabp = list_entry(q, struct slab, list);
4114 handle_slab(n, cachep, slabp); 4110 handle_slab(n, cachep, slabp);
4115 }
4116 spin_unlock_irq(&l3->list_lock); 4111 spin_unlock_irq(&l3->list_lock);
4117 } 4112 }
4118 name = cachep->name; 4113 name = cachep->name;