aboutsummaryrefslogtreecommitdiffstats
path: root/mm/slub.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/slub.c')
-rw-r--r--mm/slub.c173
1 files changed, 139 insertions, 34 deletions
diff --git a/mm/slub.c b/mm/slub.c
index 7ab54ecbd3f3..b2b0c78ae35d 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -9,6 +9,7 @@
9 */ 9 */
10 10
11#include <linux/mm.h> 11#include <linux/mm.h>
12#include <linux/swap.h> /* struct reclaim_state */
12#include <linux/module.h> 13#include <linux/module.h>
13#include <linux/bit_spinlock.h> 14#include <linux/bit_spinlock.h>
14#include <linux/interrupt.h> 15#include <linux/interrupt.h>
@@ -16,9 +17,11 @@
16#include <linux/slab.h> 17#include <linux/slab.h>
17#include <linux/proc_fs.h> 18#include <linux/proc_fs.h>
18#include <linux/seq_file.h> 19#include <linux/seq_file.h>
19#include <trace/kmemtrace.h> 20#include <linux/kmemtrace.h>
21#include <linux/kmemcheck.h>
20#include <linux/cpu.h> 22#include <linux/cpu.h>
21#include <linux/cpuset.h> 23#include <linux/cpuset.h>
24#include <linux/kmemleak.h>
22#include <linux/mempolicy.h> 25#include <linux/mempolicy.h>
23#include <linux/ctype.h> 26#include <linux/ctype.h>
24#include <linux/debugobjects.h> 27#include <linux/debugobjects.h>
@@ -142,10 +145,10 @@
142 * Set of flags that will prevent slab merging 145 * Set of flags that will prevent slab merging
143 */ 146 */
144#define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \ 147#define SLUB_NEVER_MERGE (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
145 SLAB_TRACE | SLAB_DESTROY_BY_RCU) 148 SLAB_TRACE | SLAB_DESTROY_BY_RCU | SLAB_NOLEAKTRACE)
146 149
147#define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \ 150#define SLUB_MERGE_SAME (SLAB_DEBUG_FREE | SLAB_RECLAIM_ACCOUNT | \
148 SLAB_CACHE_DMA) 151 SLAB_CACHE_DMA | SLAB_NOTRACK)
149 152
150#ifndef ARCH_KMALLOC_MINALIGN 153#ifndef ARCH_KMALLOC_MINALIGN
151#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long) 154#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
@@ -176,6 +179,12 @@ static enum {
176 SYSFS /* Sysfs up */ 179 SYSFS /* Sysfs up */
177} slab_state = DOWN; 180} slab_state = DOWN;
178 181
182/*
183 * The slab allocator is initialized with interrupts disabled. Therefore, make
184 * sure early boot allocations don't accidentally enable interrupts.
185 */
186static gfp_t slab_gfp_mask __read_mostly = SLAB_GFP_BOOT_MASK;
187
179/* A list of all slab caches on the system */ 188/* A list of all slab caches on the system */
180static DECLARE_RWSEM(slub_lock); 189static DECLARE_RWSEM(slub_lock);
181static LIST_HEAD(slab_caches); 190static LIST_HEAD(slab_caches);
@@ -831,6 +840,11 @@ static inline unsigned long slabs_node(struct kmem_cache *s, int node)
831 return atomic_long_read(&n->nr_slabs); 840 return atomic_long_read(&n->nr_slabs);
832} 841}
833 842
843static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
844{
845 return atomic_long_read(&n->nr_slabs);
846}
847
834static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects) 848static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
835{ 849{
836 struct kmem_cache_node *n = get_node(s, node); 850 struct kmem_cache_node *n = get_node(s, node);
@@ -1049,6 +1063,8 @@ static inline unsigned long kmem_cache_flags(unsigned long objsize,
1049 1063
1050static inline unsigned long slabs_node(struct kmem_cache *s, int node) 1064static inline unsigned long slabs_node(struct kmem_cache *s, int node)
1051 { return 0; } 1065 { return 0; }
1066static inline unsigned long node_nr_slabs(struct kmem_cache_node *n)
1067 { return 0; }
1052static inline void inc_slabs_node(struct kmem_cache *s, int node, 1068static inline void inc_slabs_node(struct kmem_cache *s, int node,
1053 int objects) {} 1069 int objects) {}
1054static inline void dec_slabs_node(struct kmem_cache *s, int node, 1070static inline void dec_slabs_node(struct kmem_cache *s, int node,
@@ -1063,6 +1079,8 @@ static inline struct page *alloc_slab_page(gfp_t flags, int node,
1063{ 1079{
1064 int order = oo_order(oo); 1080 int order = oo_order(oo);
1065 1081
1082 flags |= __GFP_NOTRACK;
1083
1066 if (node == -1) 1084 if (node == -1)
1067 return alloc_pages(flags, order); 1085 return alloc_pages(flags, order);
1068 else 1086 else
@@ -1090,6 +1108,24 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
1090 1108
1091 stat(get_cpu_slab(s, raw_smp_processor_id()), ORDER_FALLBACK); 1109 stat(get_cpu_slab(s, raw_smp_processor_id()), ORDER_FALLBACK);
1092 } 1110 }
1111
1112 if (kmemcheck_enabled
1113 && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS)))
1114 {
1115 int pages = 1 << oo_order(oo);
1116
1117 kmemcheck_alloc_shadow(page, oo_order(oo), flags, node);
1118
1119 /*
1120 * Objects from caches that have a constructor don't get
1121 * cleared when they're allocated, so we need to do it here.
1122 */
1123 if (s->ctor)
1124 kmemcheck_mark_uninitialized_pages(page, pages);
1125 else
1126 kmemcheck_mark_unallocated_pages(page, pages);
1127 }
1128
1093 page->objects = oo_objects(oo); 1129 page->objects = oo_objects(oo);
1094 mod_zone_page_state(page_zone(page), 1130 mod_zone_page_state(page_zone(page),
1095 (s->flags & SLAB_RECLAIM_ACCOUNT) ? 1131 (s->flags & SLAB_RECLAIM_ACCOUNT) ?
@@ -1163,6 +1199,8 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
1163 __ClearPageSlubDebug(page); 1199 __ClearPageSlubDebug(page);
1164 } 1200 }
1165 1201
1202 kmemcheck_free_shadow(page, compound_order(page));
1203
1166 mod_zone_page_state(page_zone(page), 1204 mod_zone_page_state(page_zone(page),
1167 (s->flags & SLAB_RECLAIM_ACCOUNT) ? 1205 (s->flags & SLAB_RECLAIM_ACCOUNT) ?
1168 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, 1206 NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE,
@@ -1170,6 +1208,8 @@ static void __free_slab(struct kmem_cache *s, struct page *page)
1170 1208
1171 __ClearPageSlab(page); 1209 __ClearPageSlab(page);
1172 reset_page_mapcount(page); 1210 reset_page_mapcount(page);
1211 if (current->reclaim_state)
1212 current->reclaim_state->reclaimed_slab += pages;
1173 __free_pages(page, order); 1213 __free_pages(page, order);
1174} 1214}
1175 1215
@@ -1481,6 +1521,65 @@ static inline int node_match(struct kmem_cache_cpu *c, int node)
1481 return 1; 1521 return 1;
1482} 1522}
1483 1523
1524static int count_free(struct page *page)
1525{
1526 return page->objects - page->inuse;
1527}
1528
1529static unsigned long count_partial(struct kmem_cache_node *n,
1530 int (*get_count)(struct page *))
1531{
1532 unsigned long flags;
1533 unsigned long x = 0;
1534 struct page *page;
1535
1536 spin_lock_irqsave(&n->list_lock, flags);
1537 list_for_each_entry(page, &n->partial, lru)
1538 x += get_count(page);
1539 spin_unlock_irqrestore(&n->list_lock, flags);
1540 return x;
1541}
1542
1543static inline unsigned long node_nr_objs(struct kmem_cache_node *n)
1544{
1545#ifdef CONFIG_SLUB_DEBUG
1546 return atomic_long_read(&n->total_objects);
1547#else
1548 return 0;
1549#endif
1550}
1551
1552static noinline void
1553slab_out_of_memory(struct kmem_cache *s, gfp_t gfpflags, int nid)
1554{
1555 int node;
1556
1557 printk(KERN_WARNING
1558 "SLUB: Unable to allocate memory on node %d (gfp=0x%x)\n",
1559 nid, gfpflags);
1560 printk(KERN_WARNING " cache: %s, object size: %d, buffer size: %d, "
1561 "default order: %d, min order: %d\n", s->name, s->objsize,
1562 s->size, oo_order(s->oo), oo_order(s->min));
1563
1564 for_each_online_node(node) {
1565 struct kmem_cache_node *n = get_node(s, node);
1566 unsigned long nr_slabs;
1567 unsigned long nr_objs;
1568 unsigned long nr_free;
1569
1570 if (!n)
1571 continue;
1572
1573 nr_free = count_partial(n, count_free);
1574 nr_slabs = node_nr_slabs(n);
1575 nr_objs = node_nr_objs(n);
1576
1577 printk(KERN_WARNING
1578 " node %d: slabs: %ld, objs: %ld, free: %ld\n",
1579 node, nr_slabs, nr_objs, nr_free);
1580 }
1581}
1582
1484/* 1583/*
1485 * Slow path. The lockless freelist is empty or we need to perform 1584 * Slow path. The lockless freelist is empty or we need to perform
1486 * debugging duties. 1585 * debugging duties.
@@ -1562,6 +1661,8 @@ new_slab:
1562 c->page = new; 1661 c->page = new;
1563 goto load_freelist; 1662 goto load_freelist;
1564 } 1663 }
1664 if (!(gfpflags & __GFP_NOWARN) && printk_ratelimit())
1665 slab_out_of_memory(s, gfpflags, node);
1565 return NULL; 1666 return NULL;
1566debug: 1667debug:
1567 if (!alloc_debug_processing(s, c->page, object, addr)) 1668 if (!alloc_debug_processing(s, c->page, object, addr))
@@ -1591,6 +1692,8 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
1591 unsigned long flags; 1692 unsigned long flags;
1592 unsigned int objsize; 1693 unsigned int objsize;
1593 1694
1695 gfpflags &= slab_gfp_mask;
1696
1594 lockdep_trace_alloc(gfpflags); 1697 lockdep_trace_alloc(gfpflags);
1595 might_sleep_if(gfpflags & __GFP_WAIT); 1698 might_sleep_if(gfpflags & __GFP_WAIT);
1596 1699
@@ -1614,6 +1717,9 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
1614 if (unlikely((gfpflags & __GFP_ZERO) && object)) 1717 if (unlikely((gfpflags & __GFP_ZERO) && object))
1615 memset(object, 0, objsize); 1718 memset(object, 0, objsize);
1616 1719
1720 kmemcheck_slab_alloc(s, gfpflags, object, c->objsize);
1721 kmemleak_alloc_recursive(object, objsize, 1, s->flags, gfpflags);
1722
1617 return object; 1723 return object;
1618} 1724}
1619 1725
@@ -1743,8 +1849,10 @@ static __always_inline void slab_free(struct kmem_cache *s,
1743 struct kmem_cache_cpu *c; 1849 struct kmem_cache_cpu *c;
1744 unsigned long flags; 1850 unsigned long flags;
1745 1851
1852 kmemleak_free_recursive(x, s->flags);
1746 local_irq_save(flags); 1853 local_irq_save(flags);
1747 c = get_cpu_slab(s, smp_processor_id()); 1854 c = get_cpu_slab(s, smp_processor_id());
1855 kmemcheck_slab_free(s, object, c->objsize);
1748 debug_check_no_locks_freed(object, c->objsize); 1856 debug_check_no_locks_freed(object, c->objsize);
1749 if (!(s->flags & SLAB_DEBUG_OBJECTS)) 1857 if (!(s->flags & SLAB_DEBUG_OBJECTS))
1750 debug_check_no_obj_freed(object, c->objsize); 1858 debug_check_no_obj_freed(object, c->objsize);
@@ -1909,7 +2017,7 @@ static inline int calculate_order(int size)
1909 * Doh this slab cannot be placed using slub_max_order. 2017 * Doh this slab cannot be placed using slub_max_order.
1910 */ 2018 */
1911 order = slab_order(size, 1, MAX_ORDER, 1); 2019 order = slab_order(size, 1, MAX_ORDER, 1);
1912 if (order <= MAX_ORDER) 2020 if (order < MAX_ORDER)
1913 return order; 2021 return order;
1914 return -ENOSYS; 2022 return -ENOSYS;
1915} 2023}
@@ -2522,6 +2630,7 @@ __setup("slub_min_order=", setup_slub_min_order);
2522static int __init setup_slub_max_order(char *str) 2630static int __init setup_slub_max_order(char *str)
2523{ 2631{
2524 get_option(&str, &slub_max_order); 2632 get_option(&str, &slub_max_order);
2633 slub_max_order = min(slub_max_order, MAX_ORDER - 1);
2525 2634
2526 return 1; 2635 return 1;
2527} 2636}
@@ -2553,13 +2662,16 @@ static struct kmem_cache *create_kmalloc_cache(struct kmem_cache *s,
2553 if (gfp_flags & SLUB_DMA) 2662 if (gfp_flags & SLUB_DMA)
2554 flags = SLAB_CACHE_DMA; 2663 flags = SLAB_CACHE_DMA;
2555 2664
2556 down_write(&slub_lock); 2665 /*
2666 * This function is called with IRQs disabled during early-boot on
2667 * single CPU so there's no need to take slub_lock here.
2668 */
2557 if (!kmem_cache_open(s, gfp_flags, name, size, ARCH_KMALLOC_MINALIGN, 2669 if (!kmem_cache_open(s, gfp_flags, name, size, ARCH_KMALLOC_MINALIGN,
2558 flags, NULL)) 2670 flags, NULL))
2559 goto panic; 2671 goto panic;
2560 2672
2561 list_add(&s->list, &slab_caches); 2673 list_add(&s->list, &slab_caches);
2562 up_write(&slub_lock); 2674
2563 if (sysfs_slab_add(s)) 2675 if (sysfs_slab_add(s))
2564 goto panic; 2676 goto panic;
2565 return s; 2677 return s;
@@ -2615,7 +2727,8 @@ static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags)
2615 2727
2616 if (!s || !text || !kmem_cache_open(s, flags, text, 2728 if (!s || !text || !kmem_cache_open(s, flags, text,
2617 realsize, ARCH_KMALLOC_MINALIGN, 2729 realsize, ARCH_KMALLOC_MINALIGN,
2618 SLAB_CACHE_DMA|__SYSFS_ADD_DEFERRED, NULL)) { 2730 SLAB_CACHE_DMA|SLAB_NOTRACK|__SYSFS_ADD_DEFERRED,
2731 NULL)) {
2619 kfree(s); 2732 kfree(s);
2620 kfree(text); 2733 kfree(text);
2621 goto unlock_out; 2734 goto unlock_out;
@@ -2709,9 +2822,10 @@ EXPORT_SYMBOL(__kmalloc);
2709 2822
2710static void *kmalloc_large_node(size_t size, gfp_t flags, int node) 2823static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
2711{ 2824{
2712 struct page *page = alloc_pages_node(node, flags | __GFP_COMP, 2825 struct page *page;
2713 get_order(size));
2714 2826
2827 flags |= __GFP_COMP | __GFP_NOTRACK;
2828 page = alloc_pages_node(node, flags, get_order(size));
2715 if (page) 2829 if (page)
2716 return page_address(page); 2830 return page_address(page);
2717 else 2831 else
@@ -3017,7 +3131,7 @@ void __init kmem_cache_init(void)
3017 * kmem_cache_open for slab_state == DOWN. 3131 * kmem_cache_open for slab_state == DOWN.
3018 */ 3132 */
3019 create_kmalloc_cache(&kmalloc_caches[0], "kmem_cache_node", 3133 create_kmalloc_cache(&kmalloc_caches[0], "kmem_cache_node",
3020 sizeof(struct kmem_cache_node), GFP_KERNEL); 3134 sizeof(struct kmem_cache_node), GFP_NOWAIT);
3021 kmalloc_caches[0].refcount = -1; 3135 kmalloc_caches[0].refcount = -1;
3022 caches++; 3136 caches++;
3023 3137
@@ -3030,16 +3144,16 @@ void __init kmem_cache_init(void)
3030 /* Caches that are not of the two-to-the-power-of size */ 3144 /* Caches that are not of the two-to-the-power-of size */
3031 if (KMALLOC_MIN_SIZE <= 64) { 3145 if (KMALLOC_MIN_SIZE <= 64) {
3032 create_kmalloc_cache(&kmalloc_caches[1], 3146 create_kmalloc_cache(&kmalloc_caches[1],
3033 "kmalloc-96", 96, GFP_KERNEL); 3147 "kmalloc-96", 96, GFP_NOWAIT);
3034 caches++; 3148 caches++;
3035 create_kmalloc_cache(&kmalloc_caches[2], 3149 create_kmalloc_cache(&kmalloc_caches[2],
3036 "kmalloc-192", 192, GFP_KERNEL); 3150 "kmalloc-192", 192, GFP_NOWAIT);
3037 caches++; 3151 caches++;
3038 } 3152 }
3039 3153
3040 for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) { 3154 for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {
3041 create_kmalloc_cache(&kmalloc_caches[i], 3155 create_kmalloc_cache(&kmalloc_caches[i],
3042 "kmalloc", 1 << i, GFP_KERNEL); 3156 "kmalloc", 1 << i, GFP_NOWAIT);
3043 caches++; 3157 caches++;
3044 } 3158 }
3045 3159
@@ -3076,7 +3190,7 @@ void __init kmem_cache_init(void)
3076 /* Provide the correct kmalloc names now that the caches are up */ 3190 /* Provide the correct kmalloc names now that the caches are up */
3077 for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) 3191 for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++)
3078 kmalloc_caches[i]. name = 3192 kmalloc_caches[i]. name =
3079 kasprintf(GFP_KERNEL, "kmalloc-%d", 1 << i); 3193 kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i);
3080 3194
3081#ifdef CONFIG_SMP 3195#ifdef CONFIG_SMP
3082 register_cpu_notifier(&slab_notifier); 3196 register_cpu_notifier(&slab_notifier);
@@ -3094,6 +3208,14 @@ void __init kmem_cache_init(void)
3094 nr_cpu_ids, nr_node_ids); 3208 nr_cpu_ids, nr_node_ids);
3095} 3209}
3096 3210
3211void __init kmem_cache_init_late(void)
3212{
3213 /*
3214 * Interrupts are enabled now so all GFP allocations are safe.
3215 */
3216 slab_gfp_mask = __GFP_BITS_MASK;
3217}
3218
3097/* 3219/*
3098 * Find a mergeable slab cache 3220 * Find a mergeable slab cache
3099 */ 3221 */
@@ -3314,20 +3436,6 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
3314} 3436}
3315 3437
3316#ifdef CONFIG_SLUB_DEBUG 3438#ifdef CONFIG_SLUB_DEBUG
3317static unsigned long count_partial(struct kmem_cache_node *n,
3318 int (*get_count)(struct page *))
3319{
3320 unsigned long flags;
3321 unsigned long x = 0;
3322 struct page *page;
3323
3324 spin_lock_irqsave(&n->list_lock, flags);
3325 list_for_each_entry(page, &n->partial, lru)
3326 x += get_count(page);
3327 spin_unlock_irqrestore(&n->list_lock, flags);
3328 return x;
3329}
3330
3331static int count_inuse(struct page *page) 3439static int count_inuse(struct page *page)
3332{ 3440{
3333 return page->inuse; 3441 return page->inuse;
@@ -3338,11 +3446,6 @@ static int count_total(struct page *page)
3338 return page->objects; 3446 return page->objects;
3339} 3447}
3340 3448
3341static int count_free(struct page *page)
3342{
3343 return page->objects - page->inuse;
3344}
3345
3346static int validate_slab(struct kmem_cache *s, struct page *page, 3449static int validate_slab(struct kmem_cache *s, struct page *page,
3347 unsigned long *map) 3450 unsigned long *map)
3348{ 3451{
@@ -3711,7 +3814,7 @@ static int list_locations(struct kmem_cache *s, char *buf,
3711 to_cpumask(l->cpus)); 3814 to_cpumask(l->cpus));
3712 } 3815 }
3713 3816
3714 if (num_online_nodes() > 1 && !nodes_empty(l->nodes) && 3817 if (nr_online_nodes > 1 && !nodes_empty(l->nodes) &&
3715 len < PAGE_SIZE - 60) { 3818 len < PAGE_SIZE - 60) {
3716 len += sprintf(buf + len, " nodes="); 3819 len += sprintf(buf + len, " nodes=");
3717 len += nodelist_scnprintf(buf + len, PAGE_SIZE - len - 50, 3820 len += nodelist_scnprintf(buf + len, PAGE_SIZE - len - 50,
@@ -4386,6 +4489,8 @@ static char *create_unique_id(struct kmem_cache *s)
4386 *p++ = 'a'; 4489 *p++ = 'a';
4387 if (s->flags & SLAB_DEBUG_FREE) 4490 if (s->flags & SLAB_DEBUG_FREE)
4388 *p++ = 'F'; 4491 *p++ = 'F';
4492 if (!(s->flags & SLAB_NOTRACK))
4493 *p++ = 't';
4389 if (p != name + 1) 4494 if (p != name + 1)
4390 *p++ = '-'; 4495 *p++ = '-';
4391 p += sprintf(p, "%07d", s->size); 4496 p += sprintf(p, "%07d", s->size);