Merge branch 'slab-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/slab-2.6

* 'slab-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/slab-2.6: slab: fix DEBUG_SLAB warning slab: shrink sizeof(struct kmem_cache) slab: fix DEBUG_SLAB build SLUB: Fix missing <linux/stacktrace.h> include slub: reduce overhead of slub_debug slub: Add method to verify memory is not freed slub: Enable backtrace for create/delete points slab allocators: Provide generic description of alignment defines slab, slub, slob: Unify alignment definition slob/lockdep: Fix gfp flags passed to lockdep
author: Linus Torvalds <torvalds@linux-foundation.org> 2011-07-22 15:44:30 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2011-07-22 15:44:30 -0400
commit: f99b7880cb9863e11441bd8b2f31d4f556ef1a44 (patch)
tree: 6f3dc6e33e847b431dd899bd968d799f0d4a8fff
parent: 02f8c6aee8df3cdc935e9bdd4f2d020306035dbe (diff)
parent: 7ea466f2256b02a7047dfd47d76a2f6c1e427e3e (diff)
7 files changed, 164 insertions, 69 deletions
diff --git a/include/linux/slab.h b/include/linux/slab.h
index ad4dd1c8d30a..573c809c33d9 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -134,6 +134,26 @@ unsigned int kmem_cache_size(struct kmem_cache *);
 #define KMALLOC_MAX_ORDER       (KMALLOC_SHIFT_HIGH - PAGE_SHIFT)
 /*
+ * Some archs want to perform DMA into kmalloc caches and need a guaranteed
+ * alignment larger than the alignment of a 64-bit integer.
+ * Setting ARCH_KMALLOC_MINALIGN in arch headers allows that.
+ */
+#ifdef ARCH_DMA_MINALIGN
+#define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN
+#else
+#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
+#endif
+/*
+ * Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment.
+ * Intended for arches that get misalignment faults even for 64 bit integer
+ * aligned buffers.
+ */
+#ifndef ARCH_SLAB_MINALIGN
+#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
+#endif
+/*
 * Common kmalloc functions provided by all allocators
 */
 void * __must_check __krealloc(const void *, size_t, gfp_t);
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index 83203ae9390b..d00e0bacda93 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -18,53 +18,25 @@
 #include <trace/events/kmem.h>
 /*
- * Enforce a minimum alignment for the kmalloc caches.
- * Usually, the kmalloc caches are cache_line_size() aligned, except when
- * DEBUG and FORCED_DEBUG are enabled, then they are BYTES_PER_WORD aligned.
- * Some archs want to perform DMA into kmalloc caches and need a guaranteed
- * alignment larger than the alignment of a 64-bit integer.
- * ARCH_KMALLOC_MINALIGN allows that.
- * Note that increasing this value may disable some debug features.
- */
-#ifdef ARCH_DMA_MINALIGN
-#define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN
-#else
-#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
-#endif
-#ifndef ARCH_SLAB_MINALIGN
-/*
- * Enforce a minimum alignment for all caches.
- * Intended for archs that get misalignment faults even for BYTES_PER_WORD
- * aligned buffers. Includes ARCH_KMALLOC_MINALIGN.
- * If possible: Do not enable this flag for CONFIG_DEBUG_SLAB, it disables
- * some debug features.
- */
-#define ARCH_SLAB_MINALIGN 0
-#endif
-/*
 * struct kmem_cache
 *
 * manages a cache.
 */
 struct kmem_cache {
-/* 1) per-cpu data, touched during every alloc/free */
+/* 1) Cache tunables. Protected by cache_chain_mutex */
-        struct array_cache *array[NR_CPUS];
-/* 2) Cache tunables. Protected by cache_chain_mutex */
        unsigned int batchcount;
        unsigned int limit;
        unsigned int shared;
        unsigned int buffer_size;
        u32 reciprocal_buffer_size;
-/* 3) touched by every alloc & free from the backend */
+/* 2) touched by every alloc & free from the backend */
        unsigned int flags;             /* constant flags */
        unsigned int num;               /* # of objs per slab */
-/* 4) cache_grow/shrink */
+/* 3) cache_grow/shrink */
        /* order of pgs per slab (2^n) */
        unsigned int gfporder;
@@ -80,11 +52,11 @@ struct kmem_cache {
        /* constructor func */
        void (*ctor)(void *obj);
-/* 5) cache creation/removal */
+/* 4) cache creation/removal */
        const char *name;
        struct list_head next;
-/* 6) statistics */
+/* 5) statistics */
 #ifdef CONFIG_DEBUG_SLAB
        unsigned long num_active;
        unsigned long num_allocations;
@@ -111,16 +83,18 @@ struct kmem_cache {
        int obj_size;
 #endif /* CONFIG_DEBUG_SLAB */
+/* 6) per-cpu/per-node data, touched during every alloc/free */
        /*
-         * We put nodelists[] at the end of kmem_cache, because we want to size
+         * We put array[] at the end of kmem_cache, because we want to size
-         * this array to nr_node_ids slots instead of MAX_NUMNODES
+         * this array to nr_cpu_ids slots instead of NR_CPUS
         * (see kmem_cache_init())
-         * We still use [MAX_NUMNODES] and not [1] or [0] because cache_cache
+         * We still use [NR_CPUS] and not [1] or [0] because cache_cache
-         * is statically defined, so we reserve the max number of nodes.
+         * is statically defined, so we reserve the max number of cpus.
         */
-        struct kmem_list3 *nodelists[MAX_NUMNODES];
+        struct kmem_list3 **nodelists;
+        struct array_cache *array[NR_CPUS];
        /*
-         * Do not add fields after nodelists[]
+         * Do not add fields after array[]
         */
 };
diff --git a/include/linux/slob_def.h b/include/linux/slob_def.h
index 4382db09df4f..0ec00b39d006 100644
--- a/include/linux/slob_def.h
+++ b/include/linux/slob_def.h
@@ -1,16 +1,6 @@
 #ifndef __LINUX_SLOB_DEF_H
 #define __LINUX_SLOB_DEF_H
-#ifdef ARCH_DMA_MINALIGN
-#define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN
-#else
-#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long)
-#endif
-#ifndef ARCH_SLAB_MINALIGN
-#define ARCH_SLAB_MINALIGN __alignof__(unsigned long)
-#endif
 void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node);
 static __always_inline void *kmem_cache_alloc(struct kmem_cache *cachep,
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index c8668d161dd8..4b35c06dfbc5 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -113,16 +113,6 @@ struct kmem_cache {
 #define KMALLOC_SHIFT_LOW ilog2(KMALLOC_MIN_SIZE)
-#ifdef ARCH_DMA_MINALIGN
-#define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN
-#else
-#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
-#endif
-#ifndef ARCH_SLAB_MINALIGN
-#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long)
-#endif
 /*
 * Maximum kmalloc object size handled by SLUB. Larger object allocations
 * are passed through to the page allocator. The page allocator "fastpath"
@@ -228,6 +218,19 @@ kmalloc_order(size_t size, gfp_t flags, unsigned int order)
        return ret;
 }
+/**
+ * Calling this on allocated memory will check that the memory
+ * is expected to be in use, and print warnings if not.
+ */
+#ifdef CONFIG_SLUB_DEBUG
+extern bool verify_mem_not_deleted(const void *x);
+#else
+static inline bool verify_mem_not_deleted(const void *x)
+{
+        return true;
+}
+#endif
 #ifdef CONFIG_TRACING
 extern void *
 kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size);
diff --git a/mm/slab.c b/mm/slab.c
index d96e223de775..1e523ed47c61 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -574,7 +574,9 @@ static struct arraycache_init initarray_generic =
    { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
 /* internal cache of cache description objs */
+static struct kmem_list3 *cache_cache_nodelists[MAX_NUMNODES];
 static struct kmem_cache cache_cache = {
+        .nodelists = cache_cache_nodelists,
        .batchcount = 1,
        .limit = BOOT_CPUCACHE_ENTRIES,
        .shared = 1,
@@ -1492,11 +1494,10 @@ void __init kmem_cache_init(void)
        cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE + node];
        /*
-         * struct kmem_cache size depends on nr_node_ids, which
+         * struct kmem_cache size depends on nr_node_ids & nr_cpu_ids
-         * can be less than MAX_NUMNODES.
         */
-        cache_cache.buffer_size = offsetof(struct kmem_cache, nodelists) +
+        cache_cache.buffer_size = offsetof(struct kmem_cache, array[nr_cpu_ids]) +
-                                 nr_node_ids * sizeof(struct kmem_list3 *);
+                                  nr_node_ids * sizeof(struct kmem_list3 *);
 #if DEBUG
        cache_cache.obj_size = cache_cache.buffer_size;
 #endif
@@ -2308,6 +2309,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
        if (!cachep)
                goto oops;
+        cachep->nodelists = (struct kmem_list3 **)&cachep->array[nr_cpu_ids];
 #if DEBUG
        cachep->obj_size = size;
@@ -3153,12 +3155,11 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
        objp += obj_offset(cachep);
        if (cachep->ctor && cachep->flags & SLAB_POISON)
                cachep->ctor(objp);
-#if ARCH_SLAB_MINALIGN
+        if (ARCH_SLAB_MINALIGN &&
-        if ((u32)objp & (ARCH_SLAB_MINALIGN-1)) {
+            ((unsigned long)objp & (ARCH_SLAB_MINALIGN-1))) {
                printk(KERN_ERR "0x%p: not aligned to ARCH_SLAB_MINALIGN=%d\n",
-                       objp, ARCH_SLAB_MINALIGN);
+                       objp, (int)ARCH_SLAB_MINALIGN);
        }
-#endif
        return objp;
 }
 #else
diff --git a/mm/slob.c b/mm/slob.c
index 46e0aee33a23..0ae881831ae2 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -482,6 +482,8 @@ void *__kmalloc_node(size_t size, gfp_t gfp, int node)
        int align = max(ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
        void *ret;
+        gfp &= gfp_allowed_mask;
        lockdep_trace_alloc(gfp);
        if (size < PAGE_SIZE - align) {
@@ -608,6 +610,10 @@ void *kmem_cache_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
 {
        void *b;
+        flags &= gfp_allowed_mask;
+        lockdep_trace_alloc(flags);
        if (c->size < PAGE_SIZE) {
                b = slob_alloc(c->size, flags, c->align, node);
                trace_kmem_cache_alloc_node(_RET_IP_, b, c->size,
diff --git a/mm/slub.c b/mm/slub.c
index 35f351f26193..ba83f3fd0757 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -27,6 +27,7 @@
 #include <linux/memory.h>
 #include <linux/math64.h>
 #include <linux/fault-inject.h>
+#include <linux/stacktrace.h>
 #include <trace/events/kmem.h>
@@ -191,8 +192,12 @@ static LIST_HEAD(slab_caches);
 /*
 * Tracking user of a slab.
 */
+#define TRACK_ADDRS_COUNT 16
 struct track {
        unsigned long addr;     /* Called from address */
+#ifdef CONFIG_STACKTRACE
+        unsigned long addrs[TRACK_ADDRS_COUNT]; /* Called from address */
+#endif
        int cpu;                /* Was running on cpu */
        int pid;                /* Pid context */
        unsigned long when;     /* When did the operation occur */
@@ -420,6 +425,24 @@ static void set_track(struct kmem_cache *s, void *object,
        struct track *p = get_track(s, object, alloc);
        if (addr) {
+#ifdef CONFIG_STACKTRACE
+                struct stack_trace trace;
+                int i;
+                trace.nr_entries = 0;
+                trace.max_entries = TRACK_ADDRS_COUNT;
+                trace.entries = p->addrs;
+                trace.skip = 3;
+                save_stack_trace(&trace);
+                /* See rant in lockdep.c */
+                if (trace.nr_entries != 0 &&
+                    trace.entries[trace.nr_entries - 1] == ULONG_MAX)
+                        trace.nr_entries--;
+                for (i = trace.nr_entries; i < TRACK_ADDRS_COUNT; i++)
+                        p->addrs[i] = 0;
+#endif
                p->addr = addr;
                p->cpu = smp_processor_id();
                p->pid = current->pid;
@@ -444,6 +467,16 @@ static void print_track(const char *s, struct track *t)
        printk(KERN_ERR "INFO: %s in %pS age=%lu cpu=%u pid=%d\n",
                s, (void *)t->addr, jiffies - t->when, t->cpu, t->pid);
+#ifdef CONFIG_STACKTRACE
+        {
+                int i;
+                for (i = 0; i < TRACK_ADDRS_COUNT; i++)
+                        if (t->addrs[i])
+                                printk(KERN_ERR "\t%pS\n", (void *)t->addrs[i]);
+                        else
+                                break;
+        }
+#endif
 }
 static void print_tracking(struct kmem_cache *s, void *object)
@@ -557,10 +590,10 @@ static void init_object(struct kmem_cache *s, void *object, u8 val)
                memset(p + s->objsize, val, s->inuse - s->objsize);
 }
-static u8 *check_bytes(u8 *start, unsigned int value, unsigned int bytes)
+static u8 *check_bytes8(u8 *start, u8 value, unsigned int bytes)
 {
        while (bytes) {
-                if (*start != (u8)value)
+                if (*start != value)
                        return start;
                start++;
                bytes--;
@@ -568,6 +601,38 @@ static u8 *check_bytes(u8 *start, unsigned int value, unsigned int bytes)
        return NULL;
 }
+static u8 *check_bytes(u8 *start, u8 value, unsigned int bytes)
+{
+        u64 value64;
+        unsigned int words, prefix;
+        if (bytes <= 16)
+                return check_bytes8(start, value, bytes);
+        value64 = value | value << 8 | value << 16 | value << 24;
+        value64 = value64 | value64 << 32;
+        prefix = 8 - ((unsigned long)start) % 8;
+        if (prefix) {
+                u8 *r = check_bytes8(start, value, prefix);
+                if (r)
+                        return r;
+                start += prefix;
+                bytes -= prefix;
+        }
+        words = bytes / 8;
+        while (words) {
+                if (*(u64 *)start != value64)
+                        return check_bytes8(start, value, 8);
+                start += 8;
+                words--;
+        }
+        return check_bytes8(start, value, bytes % 8);
+}
 static void restore_bytes(struct kmem_cache *s, char *message, u8 data,
                                                void *from, void *to)
 {
@@ -2928,6 +2993,42 @@ size_t ksize(const void *object)
 }
 EXPORT_SYMBOL(ksize);
+#ifdef CONFIG_SLUB_DEBUG
+bool verify_mem_not_deleted(const void *x)
+{
+        struct page *page;
+        void *object = (void *)x;
+        unsigned long flags;
+        bool rv;
+        if (unlikely(ZERO_OR_NULL_PTR(x)))
+                return false;
+        local_irq_save(flags);
+        page = virt_to_head_page(x);
+        if (unlikely(!PageSlab(page))) {
+                /* maybe it was from stack? */
+                rv = true;
+                goto out_unlock;
+        }
+        slab_lock(page);
+        if (on_freelist(page->slab, page, object)) {
+                object_err(page->slab, page, object, "Object is on free-list");
+                rv = false;
+        } else {
+                rv = true;
+        }
+        slab_unlock(page);
+out_unlock:
+        local_irq_restore(flags);
+        return rv;
+}
+EXPORT_SYMBOL(verify_mem_not_deleted);
+#endif
 void kfree(const void *x)
 {
        struct page *page;
author	Linus Torvalds <torvalds@linux-foundation.org>	2011-07-22 15:44:30 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2011-07-22 15:44:30 -0400
commit	f99b7880cb9863e11441bd8b2f31d4f556ef1a44 (patch)
tree	6f3dc6e33e847b431dd899bd968d799f0d4a8fff
parent	02f8c6aee8df3cdc935e9bdd4f2d020306035dbe (diff)
parent	7ea466f2256b02a7047dfd47d76a2f6c1e427e3e (diff)