aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-07-22 15:44:30 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-07-22 15:44:30 -0400
commitf99b7880cb9863e11441bd8b2f31d4f556ef1a44 (patch)
tree6f3dc6e33e847b431dd899bd968d799f0d4a8fff
parent02f8c6aee8df3cdc935e9bdd4f2d020306035dbe (diff)
parent7ea466f2256b02a7047dfd47d76a2f6c1e427e3e (diff)
Merge branch 'slab-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/slab-2.6
* 'slab-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/slab-2.6: slab: fix DEBUG_SLAB warning slab: shrink sizeof(struct kmem_cache) slab: fix DEBUG_SLAB build SLUB: Fix missing <linux/stacktrace.h> include slub: reduce overhead of slub_debug slub: Add method to verify memory is not freed slub: Enable backtrace for create/delete points slab allocators: Provide generic description of alignment defines slab, slub, slob: Unify alignment definition slob/lockdep: Fix gfp flags passed to lockdep
-rw-r--r--include/linux/slab.h20
-rw-r--r--include/linux/slab_def.h52
-rw-r--r--include/linux/slob_def.h10
-rw-r--r--include/linux/slub_def.h23
-rw-r--r--mm/slab.c17
-rw-r--r--mm/slob.c6
-rw-r--r--mm/slub.c105
7 files changed, 164 insertions, 69 deletions
diff --git a/include/linux/slab.h b/include/linux/slab.h
index ad4dd1c8d30a..573c809c33d9 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -134,6 +134,26 @@ unsigned int kmem_cache_size(struct kmem_cache *);
134#define KMALLOC_MAX_ORDER (KMALLOC_SHIFT_HIGH - PAGE_SHIFT) 134#define KMALLOC_MAX_ORDER (KMALLOC_SHIFT_HIGH - PAGE_SHIFT)
135 135
136/* 136/*
137 * Some archs want to perform DMA into kmalloc caches and need a guaranteed
138 * alignment larger than the alignment of a 64-bit integer.
139 * Setting ARCH_KMALLOC_MINALIGN in arch headers allows that.
140 */
141#ifdef ARCH_DMA_MINALIGN
142#define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN
143#else
144#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
145#endif
146
147/*
148 * Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment.
149 * Intended for arches that get misalignment faults even for 64 bit integer
150 * aligned buffers.
151 */
152#ifndef ARCH_SLAB_MINALIGN
153#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
154#endif
155
156/*
137 * Common kmalloc functions provided by all allocators 157 * Common kmalloc functions provided by all allocators
138 */ 158 */
139void * __must_check __krealloc(const void *, size_t, gfp_t); 159void * __must_check __krealloc(const void *, size_t, gfp_t);
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index 83203ae9390b..d00e0bacda93 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -18,53 +18,25 @@
18#include <trace/events/kmem.h> 18#include <trace/events/kmem.h>
19 19
20/* 20/*
21 * Enforce a minimum alignment for the kmalloc caches.
22 * Usually, the kmalloc caches are cache_line_size() aligned, except when
23 * DEBUG and FORCED_DEBUG are enabled, then they are BYTES_PER_WORD aligned.
24 * Some archs want to perform DMA into kmalloc caches and need a guaranteed
25 * alignment larger than the alignment of a 64-bit integer.
26 * ARCH_KMALLOC_MINALIGN allows that.
27 * Note that increasing this value may disable some debug features.
28 */
29#ifdef ARCH_DMA_MINALIGN
30#define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN
31#else
32#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
33#endif
34
35#ifndef ARCH_SLAB_MINALIGN
36/*
37 * Enforce a minimum alignment for all caches.
38 * Intended for archs that get misalignment faults even for BYTES_PER_WORD
39 * aligned buffers. Includes ARCH_KMALLOC_MINALIGN.
40 * If possible: Do not enable this flag for CONFIG_DEBUG_SLAB, it disables
41 * some debug features.
42 */
43#define ARCH_SLAB_MINALIGN 0
44#endif
45
46/*
47 * struct kmem_cache 21 * struct kmem_cache
48 * 22 *
49 * manages a cache. 23 * manages a cache.
50 */ 24 */
51 25
52struct kmem_cache { 26struct kmem_cache {
53/* 1) per-cpu data, touched during every alloc/free */ 27/* 1) Cache tunables. Protected by cache_chain_mutex */
54 struct array_cache *array[NR_CPUS];
55/* 2) Cache tunables. Protected by cache_chain_mutex */
56 unsigned int batchcount; 28 unsigned int batchcount;
57 unsigned int limit; 29 unsigned int limit;
58 unsigned int shared; 30 unsigned int shared;
59 31
60 unsigned int buffer_size; 32 unsigned int buffer_size;
61 u32 reciprocal_buffer_size; 33 u32 reciprocal_buffer_size;
62/* 3) touched by every alloc & free from the backend */ 34/* 2) touched by every alloc & free from the backend */
63 35
64 unsigned int flags; /* constant flags */ 36 unsigned int flags; /* constant flags */
65 unsigned int num; /* # of objs per slab */ 37 unsigned int num; /* # of objs per slab */
66 38
67/* 4) cache_grow/shrink */ 39/* 3) cache_grow/shrink */
68 /* order of pgs per slab (2^n) */ 40 /* order of pgs per slab (2^n) */
69 unsigned int gfporder; 41 unsigned int gfporder;
70 42
@@ -80,11 +52,11 @@ struct kmem_cache {
80 /* constructor func */ 52 /* constructor func */
81 void (*ctor)(void *obj); 53 void (*ctor)(void *obj);
82 54
83/* 5) cache creation/removal */ 55/* 4) cache creation/removal */
84 const char *name; 56 const char *name;
85 struct list_head next; 57 struct list_head next;
86 58
87/* 6) statistics */ 59/* 5) statistics */
88#ifdef CONFIG_DEBUG_SLAB 60#ifdef CONFIG_DEBUG_SLAB
89 unsigned long num_active; 61 unsigned long num_active;
90 unsigned long num_allocations; 62 unsigned long num_allocations;
@@ -111,16 +83,18 @@ struct kmem_cache {
111 int obj_size; 83 int obj_size;
112#endif /* CONFIG_DEBUG_SLAB */ 84#endif /* CONFIG_DEBUG_SLAB */
113 85
86/* 6) per-cpu/per-node data, touched during every alloc/free */
114 /* 87 /*
115 * We put nodelists[] at the end of kmem_cache, because we want to size 88 * We put array[] at the end of kmem_cache, because we want to size
116 * this array to nr_node_ids slots instead of MAX_NUMNODES 89 * this array to nr_cpu_ids slots instead of NR_CPUS
117 * (see kmem_cache_init()) 90 * (see kmem_cache_init())
118 * We still use [MAX_NUMNODES] and not [1] or [0] because cache_cache 91 * We still use [NR_CPUS] and not [1] or [0] because cache_cache
119 * is statically defined, so we reserve the max number of nodes. 92 * is statically defined, so we reserve the max number of cpus.
120 */ 93 */
121 struct kmem_list3 *nodelists[MAX_NUMNODES]; 94 struct kmem_list3 **nodelists;
95 struct array_cache *array[NR_CPUS];
122 /* 96 /*
123 * Do not add fields after nodelists[] 97 * Do not add fields after array[]
124 */ 98 */
125}; 99};
126 100
diff --git a/include/linux/slob_def.h b/include/linux/slob_def.h
index 4382db09df4f..0ec00b39d006 100644
--- a/include/linux/slob_def.h
+++ b/include/linux/slob_def.h
@@ -1,16 +1,6 @@
1#ifndef __LINUX_SLOB_DEF_H 1#ifndef __LINUX_SLOB_DEF_H
2#define __LINUX_SLOB_DEF_H 2#define __LINUX_SLOB_DEF_H
3 3
4#ifdef ARCH_DMA_MINALIGN
5#define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN
6#else
7#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long)
8#endif
9
10#ifndef ARCH_SLAB_MINALIGN
11#define ARCH_SLAB_MINALIGN __alignof__(unsigned long)
12#endif
13
14void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node); 4void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
15 5
16static __always_inline void *kmem_cache_alloc(struct kmem_cache *cachep, 6static __always_inline void *kmem_cache_alloc(struct kmem_cache *cachep,
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index c8668d161dd8..4b35c06dfbc5 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -113,16 +113,6 @@ struct kmem_cache {
113 113
114#define KMALLOC_SHIFT_LOW ilog2(KMALLOC_MIN_SIZE) 114#define KMALLOC_SHIFT_LOW ilog2(KMALLOC_MIN_SIZE)
115 115
116#ifdef ARCH_DMA_MINALIGN
117#define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN
118#else
119#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
120#endif
121
122#ifndef ARCH_SLAB_MINALIGN
123#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
124#endif
125
126/* 116/*
127 * Maximum kmalloc object size handled by SLUB. Larger object allocations 117 * Maximum kmalloc object size handled by SLUB. Larger object allocations
128 * are passed through to the page allocator. The page allocator "fastpath" 118 * are passed through to the page allocator. The page allocator "fastpath"
@@ -228,6 +218,19 @@ kmalloc_order(size_t size, gfp_t flags, unsigned int order)
228 return ret; 218 return ret;
229} 219}
230 220
221/**
222 * Calling this on allocated memory will check that the memory
223 * is expected to be in use, and print warnings if not.
224 */
225#ifdef CONFIG_SLUB_DEBUG
226extern bool verify_mem_not_deleted(const void *x);
227#else
228static inline bool verify_mem_not_deleted(const void *x)
229{
230 return true;
231}
232#endif
233
231#ifdef CONFIG_TRACING 234#ifdef CONFIG_TRACING
232extern void * 235extern void *
233kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size); 236kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size);
diff --git a/mm/slab.c b/mm/slab.c
index d96e223de775..1e523ed47c61 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -574,7 +574,9 @@ static struct arraycache_init initarray_generic =
574 { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} }; 574 { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
575 575
576/* internal cache of cache description objs */ 576/* internal cache of cache description objs */
577static struct kmem_list3 *cache_cache_nodelists[MAX_NUMNODES];
577static struct kmem_cache cache_cache = { 578static struct kmem_cache cache_cache = {
579 .nodelists = cache_cache_nodelists,
578 .batchcount = 1, 580 .batchcount = 1,
579 .limit = BOOT_CPUCACHE_ENTRIES, 581 .limit = BOOT_CPUCACHE_ENTRIES,
580 .shared = 1, 582 .shared = 1,
@@ -1492,11 +1494,10 @@ void __init kmem_cache_init(void)
1492 cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE + node]; 1494 cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE + node];
1493 1495
1494 /* 1496 /*
1495 * struct kmem_cache size depends on nr_node_ids, which 1497 * struct kmem_cache size depends on nr_node_ids & nr_cpu_ids
1496 * can be less than MAX_NUMNODES.
1497 */ 1498 */
1498 cache_cache.buffer_size = offsetof(struct kmem_cache, nodelists) + 1499 cache_cache.buffer_size = offsetof(struct kmem_cache, array[nr_cpu_ids]) +
1499 nr_node_ids * sizeof(struct kmem_list3 *); 1500 nr_node_ids * sizeof(struct kmem_list3 *);
1500#if DEBUG 1501#if DEBUG
1501 cache_cache.obj_size = cache_cache.buffer_size; 1502 cache_cache.obj_size = cache_cache.buffer_size;
1502#endif 1503#endif
@@ -2308,6 +2309,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
2308 if (!cachep) 2309 if (!cachep)
2309 goto oops; 2310 goto oops;
2310 2311
2312 cachep->nodelists = (struct kmem_list3 **)&cachep->array[nr_cpu_ids];
2311#if DEBUG 2313#if DEBUG
2312 cachep->obj_size = size; 2314 cachep->obj_size = size;
2313 2315
@@ -3153,12 +3155,11 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
3153 objp += obj_offset(cachep); 3155 objp += obj_offset(cachep);
3154 if (cachep->ctor && cachep->flags & SLAB_POISON) 3156 if (cachep->ctor && cachep->flags & SLAB_POISON)
3155 cachep->ctor(objp); 3157 cachep->ctor(objp);
3156#if ARCH_SLAB_MINALIGN 3158 if (ARCH_SLAB_MINALIGN &&
3157 if ((u32)objp & (ARCH_SLAB_MINALIGN-1)) { 3159 ((unsigned long)objp & (ARCH_SLAB_MINALIGN-1))) {
3158 printk(KERN_ERR "0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n", 3160 printk(KERN_ERR "0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n",
3159 objp, ARCH_SLAB_MINALIGN); 3161 objp, (int)ARCH_SLAB_MINALIGN);
3160 } 3162 }
3161#endif
3162 return objp; 3163 return objp;
3163} 3164}
3164#else 3165#else
diff --git a/mm/slob.c b/mm/slob.c
index 46e0aee33a23..0ae881831ae2 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -482,6 +482,8 @@ void *__kmalloc_node(size_t size, gfp_t gfp, int node)
482 int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN); 482 int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
483 void *ret; 483 void *ret;
484 484
485 gfp &= gfp_allowed_mask;
486
485 lockdep_trace_alloc(gfp); 487 lockdep_trace_alloc(gfp);
486 488
487 if (size < PAGE_SIZE - align) { 489 if (size < PAGE_SIZE - align) {
@@ -608,6 +610,10 @@ void *kmem_cache_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
608{ 610{
609 void *b; 611 void *b;
610 612
613 flags &= gfp_allowed_mask;
614
615 lockdep_trace_alloc(flags);
616
611 if (c->size < PAGE_SIZE) { 617 if (c->size < PAGE_SIZE) {
612 b = slob_alloc(c->size, flags, c->align, node); 618 b = slob_alloc(c->size, flags, c->align, node);
613 trace_kmem_cache_alloc_node(_RET_IP_, b, c->size, 619 trace_kmem_cache_alloc_node(_RET_IP_, b, c->size,
diff --git a/mm/slub.c b/mm/slub.c
index 35f351f26193..ba83f3fd0757 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -27,6 +27,7 @@
27#include <linux/memory.h> 27#include <linux/memory.h>
28#include <linux/math64.h> 28#include <linux/math64.h>
29#include <linux/fault-inject.h> 29#include <linux/fault-inject.h>
30#include <linux/stacktrace.h>
30 31
31#include <trace/events/kmem.h> 32#include <trace/events/kmem.h>
32 33
@@ -191,8 +192,12 @@ static LIST_HEAD(slab_caches);
191/* 192/*
192 * Tracking user of a slab. 193 * Tracking user of a slab.
193 */ 194 */
195#define TRACK_ADDRS_COUNT 16
194struct track { 196struct track {
195 unsigned long addr; /* Called from address */ 197 unsigned long addr; /* Called from address */
198#ifdef CONFIG_STACKTRACE
199 unsigned long addrs[TRACK_ADDRS_COUNT]; /* Called from address */
200#endif
196 int cpu; /* Was running on cpu */ 201 int cpu; /* Was running on cpu */
197 int pid; /* Pid context */ 202 int pid; /* Pid context */
198 unsigned long when; /* When did the operation occur */ 203 unsigned long when; /* When did the operation occur */
@@ -420,6 +425,24 @@ static void set_track(struct kmem_cache *s, void *object,
420 struct track *p = get_track(s, object, alloc); 425 struct track *p = get_track(s, object, alloc);
421 426
422 if (addr) { 427 if (addr) {
428#ifdef CONFIG_STACKTRACE
429 struct stack_trace trace;
430 int i;
431
432 trace.nr_entries = 0;
433 trace.max_entries = TRACK_ADDRS_COUNT;
434 trace.entries = p->addrs;
435 trace.skip = 3;
436 save_stack_trace(&trace);
437
438 /* See rant in lockdep.c */
439 if (trace.nr_entries != 0 &&
440 trace.entries[trace.nr_entries - 1] == ULONG_MAX)
441 trace.nr_entries--;
442
443 for (i = trace.nr_entries; i < TRACK_ADDRS_COUNT; i++)
444 p->addrs[i] = 0;
445#endif
423 p->addr = addr; 446 p->addr = addr;
424 p->cpu = smp_processor_id(); 447 p->cpu = smp_processor_id();
425 p->pid = current->pid; 448 p->pid = current->pid;
@@ -444,6 +467,16 @@ static void print_track(const char *s, struct track *t)
444 467
445 printk(KERN_ERR "INFO: %s in %pS age=%lu cpu=%u pid=%d\n", 468 printk(KERN_ERR "INFO: %s in %pS age=%lu cpu=%u pid=%d\n",
446 s, (void *)t->addr, jiffies - t->when, t->cpu, t->pid); 469 s, (void *)t->addr, jiffies - t->when, t->cpu, t->pid);
470#ifdef CONFIG_STACKTRACE
471 {
472 int i;
473 for (i = 0; i < TRACK_ADDRS_COUNT; i++)
474 if (t->addrs[i])
475 printk(KERN_ERR "\t%pS\n", (void *)t->addrs[i]);
476 else
477 break;
478 }
479#endif
447} 480}
448 481
449static void print_tracking(struct kmem_cache *s, void *object) 482static void print_tracking(struct kmem_cache *s, void *object)
@@ -557,10 +590,10 @@ static void init_object(struct kmem_cache *s, void *object, u8 val)
557 memset(p + s->objsize, val, s->inuse - s->objsize); 590 memset(p + s->objsize, val, s->inuse - s->objsize);
558} 591}
559 592
560static u8 *check_bytes(u8 *start, unsigned int value, unsigned int bytes) 593static u8 *check_bytes8(u8 *start, u8 value, unsigned int bytes)
561{ 594{
562 while (bytes) { 595 while (bytes) {
563 if (*start != (u8)value) 596 if (*start != value)
564 return start; 597 return start;
565 start++; 598 start++;
566 bytes--; 599 bytes--;
@@ -568,6 +601,38 @@ static u8 *check_bytes(u8 *start, unsigned int value, unsigned int bytes)
568 return NULL; 601 return NULL;
569} 602}
570 603
604static u8 *check_bytes(u8 *start, u8 value, unsigned int bytes)
605{
606 u64 value64;
607 unsigned int words, prefix;
608
609 if (bytes <= 16)
610 return check_bytes8(start, value, bytes);
611
612 value64 = value | value << 8 | value << 16 | value << 24;
613 value64 = value64 | value64 << 32;
614 prefix = 8 - ((unsigned long)start) % 8;
615
616 if (prefix) {
617 u8 *r = check_bytes8(start, value, prefix);
618 if (r)
619 return r;
620 start += prefix;
621 bytes -= prefix;
622 }
623
624 words = bytes / 8;
625
626 while (words) {
627 if (*(u64 *)start != value64)
628 return check_bytes8(start, value, 8);
629 start += 8;
630 words--;
631 }
632
633 return check_bytes8(start, value, bytes % 8);
634}
635
571static void restore_bytes(struct kmem_cache *s, char *message, u8 data, 636static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
572 void *from, void *to) 637 void *from, void *to)
573{ 638{
@@ -2928,6 +2993,42 @@ size_t ksize(const void *object)
2928} 2993}
2929EXPORT_SYMBOL(ksize); 2994EXPORT_SYMBOL(ksize);
2930 2995
2996#ifdef CONFIG_SLUB_DEBUG
2997bool verify_mem_not_deleted(const void *x)
2998{
2999 struct page *page;
3000 void *object = (void *)x;
3001 unsigned long flags;
3002 bool rv;
3003
3004 if (unlikely(ZERO_OR_NULL_PTR(x)))
3005 return false;
3006
3007 local_irq_save(flags);
3008
3009 page = virt_to_head_page(x);
3010 if (unlikely(!PageSlab(page))) {
3011 /* maybe it was from stack? */
3012 rv = true;
3013 goto out_unlock;
3014 }
3015
3016 slab_lock(page);
3017 if (on_freelist(page->slab, page, object)) {
3018 object_err(page->slab, page, object, "Object is on free-list");
3019 rv = false;
3020 } else {
3021 rv = true;
3022 }
3023 slab_unlock(page);
3024
3025out_unlock:
3026 local_irq_restore(flags);
3027 return rv;
3028}
3029EXPORT_SYMBOL(verify_mem_not_deleted);
3030#endif
3031
2931void kfree(const void *x) 3032void kfree(const void *x)
2932{ 3033{
2933 struct page *page; 3034 struct page *page;