Merge branch 'slab/next' into slab/for-linus

Fix up a trivial merge conflict with commit baaf1dd ("mm/slob: use min_t() to compare ARCH_SLAB_MINALIGN") that did not go through the slab tree. Conflicts: mm/slob.c Signed-off-by: Pekka Enberg <penberg@kernel.org>
author: Pekka Enberg <penberg@kernel.org> 2012-12-18 05:46:20 -0500
committer: Pekka Enberg <penberg@kernel.org> 2012-12-18 05:46:20 -0500
commit: 08afe22c68d8c07e8e31ee6491c37f36199ba14b (patch)
tree: 875d203149b74fddb50522fd5df3d6b154f5fe1e /mm
parent: a304f836a2e6d257c1f918b3431f97ef6b33e02e (diff)
parent: 4590685546a374fb0f60682ce0e3a6fd48911d46 (diff)
5 files changed, 221 insertions, 326 deletions
diff --git a/mm/slab.c b/mm/slab.c
index 6d5c83c6ddd5..2c3a2e0394db 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -162,23 +162,6 @@
 */
 static bool pfmemalloc_active __read_mostly;
-/* Legal flag mask for kmem_cache_create(). */
-#if DEBUG
-# define CREATE_MASK    (SLAB_RED_ZONE | \
-                         SLAB_POISON | SLAB_HWCACHE_ALIGN | \
-                         SLAB_CACHE_DMA | \
-                         SLAB_STORE_USER | \
-                         SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
-                         SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \
-                         SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE | SLAB_NOTRACK)
-#else
-# define CREATE_MASK    (SLAB_HWCACHE_ALIGN | \
-                         SLAB_CACHE_DMA | \
-                         SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
-                         SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD | \
-                         SLAB_DEBUG_OBJECTS | SLAB_NOLEAKTRACE | SLAB_NOTRACK)
-#endif
 /*
 * kmem_bufctl_t:
 *
@@ -564,15 +547,11 @@ static struct cache_names __initdata cache_names[] = {
 #undef CACHE
 };
-static struct arraycache_init initarray_cache __initdata =
-    { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
 static struct arraycache_init initarray_generic =
    { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
 /* internal cache of cache description objs */
-static struct kmem_list3 *kmem_cache_nodelists[MAX_NUMNODES];
 static struct kmem_cache kmem_cache_boot = {
-        .nodelists = kmem_cache_nodelists,
        .batchcount = 1,
        .limit = BOOT_CPUCACHE_ENTRIES,
        .shared = 1,
@@ -1577,28 +1556,33 @@ static void __init set_up_list3s(struct kmem_cache *cachep, int index)
 }
 /*
+ * The memory after the last cpu cache pointer is used for the
+ * the nodelists pointer.
+ */
+static void setup_nodelists_pointer(struct kmem_cache *cachep)
+{
+        cachep->nodelists = (struct kmem_list3 **)&cachep->array[nr_cpu_ids];
+}
+/*
 * Initialisation.  Called after the page allocator have been initialised and
 * before smp_init().
 */
 void __init kmem_cache_init(void)
 {
-        size_t left_over;
        struct cache_sizes *sizes;
        struct cache_names *names;
        int i;
-        int order;
-        int node;
        kmem_cache = &kmem_cache_boot;
+        setup_nodelists_pointer(kmem_cache);
        if (num_possible_nodes() == 1)
                use_alien_caches = 0;
-        for (i = 0; i < NUM_INIT_LISTS; i++) {
+        for (i = 0; i < NUM_INIT_LISTS; i++)
                kmem_list3_init(&initkmem_list3[i]);
-                if (i < MAX_NUMNODES)
-                        kmem_cache->nodelists[i] = NULL;
-        }
        set_up_list3s(kmem_cache, CACHE_CACHE);
        /*
@@ -1629,37 +1613,16 @@ void __init kmem_cache_init(void)
         * 6) Resize the head arrays of the kmalloc caches to their final sizes.
         */
-        node = numa_mem_id();
        /* 1) create the kmem_cache */
-        INIT_LIST_HEAD(&slab_caches);
-        list_add(&kmem_cache->list, &slab_caches);
-        kmem_cache->colour_off = cache_line_size();
-        kmem_cache->array[smp_processor_id()] = &initarray_cache.cache;
-        kmem_cache->nodelists[node] = &initkmem_list3[CACHE_CACHE + node];
        /*
         * struct kmem_cache size depends on nr_node_ids & nr_cpu_ids
         */
-        kmem_cache->size = offsetof(struct kmem_cache, array[nr_cpu_ids]) +
+        create_boot_cache(kmem_cache, "kmem_cache",
-                                  nr_node_ids * sizeof(struct kmem_list3 *);
+                offsetof(struct kmem_cache, array[nr_cpu_ids]) +
-        kmem_cache->object_size = kmem_cache->size;
+                                  nr_node_ids * sizeof(struct kmem_list3 *),
-        kmem_cache->size = ALIGN(kmem_cache->object_size,
+                                  SLAB_HWCACHE_ALIGN);
-                                        cache_line_size());
+        list_add(&kmem_cache->list, &slab_caches);
-        kmem_cache->reciprocal_buffer_size =
-                reciprocal_value(kmem_cache->size);
-        for (order = 0; order < MAX_ORDER; order++) {
-                cache_estimate(order, kmem_cache->size,
-                        cache_line_size(), 0, &left_over, &kmem_cache->num);
-                if (kmem_cache->num)
-                        break;
-        }
-        BUG_ON(!kmem_cache->num);
-        kmem_cache->gfporder = order;
-        kmem_cache->colour = left_over / kmem_cache->colour_off;
-        kmem_cache->slab_size = ALIGN(kmem_cache->num * sizeof(kmem_bufctl_t) +
-                                      sizeof(struct slab), cache_line_size());
        /* 2+3) create the kmalloc caches */
        sizes = malloc_sizes;
@@ -1671,23 +1634,13 @@ void __init kmem_cache_init(void)
         * bug.
         */
-        sizes[INDEX_AC].cs_cachep = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
+        sizes[INDEX_AC].cs_cachep = create_kmalloc_cache(names[INDEX_AC].name,
-        sizes[INDEX_AC].cs_cachep->name = names[INDEX_AC].name;
+                                        sizes[INDEX_AC].cs_size, ARCH_KMALLOC_FLAGS);
-        sizes[INDEX_AC].cs_cachep->size = sizes[INDEX_AC].cs_size;
-        sizes[INDEX_AC].cs_cachep->object_size = sizes[INDEX_AC].cs_size;
+        if (INDEX_AC != INDEX_L3)
-        sizes[INDEX_AC].cs_cachep->align = ARCH_KMALLOC_MINALIGN;
+                sizes[INDEX_L3].cs_cachep =
-        __kmem_cache_create(sizes[INDEX_AC].cs_cachep, ARCH_KMALLOC_FLAGS|SLAB_PANIC);
+                        create_kmalloc_cache(names[INDEX_L3].name,
-        list_add(&sizes[INDEX_AC].cs_cachep->list, &slab_caches);
+                                sizes[INDEX_L3].cs_size, ARCH_KMALLOC_FLAGS);
-        if (INDEX_AC != INDEX_L3) {
-                sizes[INDEX_L3].cs_cachep = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
-                sizes[INDEX_L3].cs_cachep->name = names[INDEX_L3].name;
-                sizes[INDEX_L3].cs_cachep->size = sizes[INDEX_L3].cs_size;
-                sizes[INDEX_L3].cs_cachep->object_size = sizes[INDEX_L3].cs_size;
-                sizes[INDEX_L3].cs_cachep->align = ARCH_KMALLOC_MINALIGN;
-                __kmem_cache_create(sizes[INDEX_L3].cs_cachep, ARCH_KMALLOC_FLAGS|SLAB_PANIC);
-                list_add(&sizes[INDEX_L3].cs_cachep->list, &slab_caches);
-        }
        slab_early_init = 0;
@@ -1699,24 +1652,14 @@ void __init kmem_cache_init(void)
                 * Note for systems short on memory removing the alignment will
                 * allow tighter packing of the smaller caches.
                 */
-                if (!sizes->cs_cachep) {
+                if (!sizes->cs_cachep)
-                        sizes->cs_cachep = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
+                        sizes->cs_cachep = create_kmalloc_cache(names->name,
-                        sizes->cs_cachep->name = names->name;
+                                        sizes->cs_size, ARCH_KMALLOC_FLAGS);
-                        sizes->cs_cachep->size = sizes->cs_size;
-                        sizes->cs_cachep->object_size = sizes->cs_size;
-                        sizes->cs_cachep->align = ARCH_KMALLOC_MINALIGN;
-                        __kmem_cache_create(sizes->cs_cachep, ARCH_KMALLOC_FLAGS|SLAB_PANIC);
-                        list_add(&sizes->cs_cachep->list, &slab_caches);
-                }
 #ifdef CONFIG_ZONE_DMA
-                sizes->cs_dmacachep = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
+                sizes->cs_dmacachep = create_kmalloc_cache(
-                sizes->cs_dmacachep->name = names->name_dma;
+                        names->name_dma, sizes->cs_size,
-                sizes->cs_dmacachep->size = sizes->cs_size;
+                        SLAB_CACHE_DMA|ARCH_KMALLOC_FLAGS);
-                sizes->cs_dmacachep->object_size = sizes->cs_size;
-                sizes->cs_dmacachep->align = ARCH_KMALLOC_MINALIGN;
-                __kmem_cache_create(sizes->cs_dmacachep,
-                               ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA| SLAB_PANIC);
-                list_add(&sizes->cs_dmacachep->list, &slab_caches);
 #endif
                sizes++;
                names++;
@@ -1727,7 +1670,6 @@ void __init kmem_cache_init(void)
                ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
-                BUG_ON(cpu_cache_get(kmem_cache) != &initarray_cache.cache);
                memcpy(ptr, cpu_cache_get(kmem_cache),
                       sizeof(struct arraycache_init));
                /*
@@ -2282,7 +2224,15 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
        if (slab_state == DOWN) {
                /*
-                 * Note: the first kmem_cache_create must create the cache
+                 * Note: Creation of first cache (kmem_cache).
+                 * The setup_list3s is taken care
+                 * of by the caller of __kmem_cache_create
+                 */
+                cachep->array[smp_processor_id()] = &initarray_generic.cache;
+                slab_state = PARTIAL;
+        } else if (slab_state == PARTIAL) {
+                /*
+                 * Note: the second kmem_cache_create must create the cache
                 * that's used by kmalloc(24), otherwise the creation of
                 * further caches will BUG().
                 */
@@ -2290,7 +2240,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
                /*
                 * If the cache that's used by kmalloc(sizeof(kmem_list3)) is
-                 * the first cache, then we need to set up all its list3s,
+                 * the second cache, then we need to set up all its list3s,
                 * otherwise the creation of further caches will BUG().
                 */
                set_up_list3s(cachep, SIZE_AC);
@@ -2299,6 +2249,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
                else
                        slab_state = PARTIAL_ARRAYCACHE;
        } else {
+                /* Remaining boot caches */
                cachep->array[smp_processor_id()] =
                        kmalloc(sizeof(struct arraycache_init), gfp);
@@ -2331,11 +2282,8 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
 /**
 * __kmem_cache_create - Create a cache.
- * @name: A string which is used in /proc/slabinfo to identify this cache.
+ * @cachep: cache management descriptor
- * @size: The size of objects to be created in this cache.
- * @align: The required alignment for the objects.
 * @flags: SLAB flags
- * @ctor: A constructor for the objects.
 *
 * Returns a ptr to the cache on success, NULL on failure.
 * Cannot be called within a int, but can be interrupted.
@@ -2378,11 +2326,6 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
        if (flags & SLAB_DESTROY_BY_RCU)
                BUG_ON(flags & SLAB_POISON);
 #endif
-        /*
-         * Always checks flags, a caller might be expecting debug support which
-         * isn't available.
-         */
-        BUG_ON(flags & ~CREATE_MASK);
        /*
         * Check that size is in terms of words.  This is needed to avoid
@@ -2394,22 +2337,6 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
                size &= ~(BYTES_PER_WORD - 1);
        }
-        /* calculate the final buffer alignment: */
-        /* 1) arch recommendation: can be overridden for debug */
-        if (flags & SLAB_HWCACHE_ALIGN) {
-                /*
-                 * Default alignment: as specified by the arch code.  Except if
-                 * an object is really small, then squeeze multiple objects into
-                 * one cacheline.
-                 */
-                ralign = cache_line_size();
-                while (size <= ralign / 2)
-                        ralign /= 2;
-        } else {
-                ralign = BYTES_PER_WORD;
-        }
        /*
         * Redzoning and user store require word alignment or possibly larger.
         * Note this will be overridden by architecture or caller mandated
@@ -2426,10 +2353,6 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
                size &= ~(REDZONE_ALIGN - 1);
        }
-        /* 2) arch mandated alignment */
-        if (ralign < ARCH_SLAB_MINALIGN) {
-                ralign = ARCH_SLAB_MINALIGN;
-        }
        /* 3) caller mandated alignment */
        if (ralign < cachep->align) {
                ralign = cachep->align;
@@ -2447,7 +2370,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
        else
                gfp = GFP_NOWAIT;
-        cachep->nodelists = (struct kmem_list3 **)&cachep->array[nr_cpu_ids];
+        setup_nodelists_pointer(cachep);
 #if DEBUG
        /*
@@ -3969,12 +3892,6 @@ void kfree(const void *objp)
 }
 EXPORT_SYMBOL(kfree);
-unsigned int kmem_cache_size(struct kmem_cache *cachep)
-{
-        return cachep->object_size;
-}
-EXPORT_SYMBOL(kmem_cache_size);
 /*
 * This initializes kmem_list3 or resizes various caches for all nodes.
 */
diff --git a/mm/slab.h b/mm/slab.h
index 5a43c2f13621..1cb9c9ee0e6f 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -32,9 +32,17 @@ extern struct list_head slab_caches;
 /* The slab cache that manages slab cache information */
 extern struct kmem_cache *kmem_cache;
+unsigned long calculate_alignment(unsigned long flags,
+                unsigned long align, unsigned long size);
 /* Functions provided by the slab allocators */
 extern int __kmem_cache_create(struct kmem_cache *, unsigned long flags);
+extern struct kmem_cache *create_kmalloc_cache(const char *name, size_t size,
+                        unsigned long flags);
+extern void create_boot_cache(struct kmem_cache *, const char *name,
+                        size_t size, unsigned long flags);
 #ifdef CONFIG_SLUB
 struct kmem_cache *__kmem_cache_alias(const char *name, size_t size,
        size_t align, unsigned long flags, void (*ctor)(void *));
@@ -45,6 +53,31 @@ static inline struct kmem_cache *__kmem_cache_alias(const char *name, size_t siz
 #endif
+/* Legal flag mask for kmem_cache_create(), for various configurations */
+#define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | SLAB_PANIC | \
+                         SLAB_DESTROY_BY_RCU | SLAB_DEBUG_OBJECTS )
+#if defined(CONFIG_DEBUG_SLAB)
+#define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER)
+#elif defined(CONFIG_SLUB_DEBUG)
+#define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
+                          SLAB_TRACE | SLAB_DEBUG_FREE)
+#else
+#define SLAB_DEBUG_FLAGS (0)
+#endif
+#if defined(CONFIG_SLAB)
+#define SLAB_CACHE_FLAGS (SLAB_MEM_SPREAD | SLAB_NOLEAKTRACE | \
+                          SLAB_RECLAIM_ACCOUNT | SLAB_TEMPORARY | SLAB_NOTRACK)
+#elif defined(CONFIG_SLUB)
+#define SLAB_CACHE_FLAGS (SLAB_NOLEAKTRACE | SLAB_RECLAIM_ACCOUNT | \
+                          SLAB_TEMPORARY | SLAB_NOTRACK)
+#else
+#define SLAB_CACHE_FLAGS (0)
+#endif
+#define CACHE_CREATE_MASK (SLAB_CORE_FLAGS | SLAB_DEBUG_FLAGS | SLAB_CACHE_FLAGS)
 int __kmem_cache_shutdown(struct kmem_cache *);
 struct seq_file;
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 5fb753da6cf0..a8e76d79ee65 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -73,6 +73,34 @@ static inline int kmem_cache_sanity_check(const char *name, size_t size)
 #endif
 /*
+ * Figure out what the alignment of the objects will be given a set of
+ * flags, a user specified alignment and the size of the objects.
+ */
+unsigned long calculate_alignment(unsigned long flags,
+                unsigned long align, unsigned long size)
+{
+        /*
+         * If the user wants hardware cache aligned objects then follow that
+         * suggestion if the object is sufficiently large.
+         *
+         * The hardware cache alignment cannot override the specified
+         * alignment though. If that is greater then use it.
+         */
+        if (flags & SLAB_HWCACHE_ALIGN) {
+                unsigned long ralign = cache_line_size();
+                while (size <= ralign / 2)
+                        ralign /= 2;
+                align = max(align, ralign);
+        }
+        if (align < ARCH_SLAB_MINALIGN)
+                align = ARCH_SLAB_MINALIGN;
+        return ALIGN(align, sizeof(void *));
+}
+/*
 * kmem_cache_create - Create a cache.
 * @name: A string which is used in /proc/slabinfo to identify this cache.
 * @size: The size of objects to be created in this cache.
@@ -109,6 +137,13 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, size_t align
        if (!kmem_cache_sanity_check(name, size) == 0)
                goto out_locked;
+        /*
+         * Some allocators will constraint the set of valid flags to a subset
+         * of all flags. We expect them to define CACHE_CREATE_MASK in this
+         * case, and we'll just provide them with a sanitized version of the
+         * passed flags.
+         */
+        flags &= CACHE_CREATE_MASK;
        s = __kmem_cache_alias(name, size, align, flags, ctor);
        if (s)
@@ -117,7 +152,7 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, size_t align
        s = kmem_cache_zalloc(kmem_cache, GFP_KERNEL);
        if (s) {
                s->object_size = s->size = size;
-                s->align = align;
+                s->align = calculate_alignment(flags, align, size);
                s->ctor = ctor;
                s->name = kstrdup(name, GFP_KERNEL);
                if (!s->name) {
@@ -195,6 +230,42 @@ int slab_is_available(void)
        return slab_state >= UP;
 }
+#ifndef CONFIG_SLOB
+/* Create a cache during boot when no slab services are available yet */
+void __init create_boot_cache(struct kmem_cache *s, const char *name, size_t size,
+                unsigned long flags)
+{
+        int err;
+        s->name = name;
+        s->size = s->object_size = size;
+        s->align = calculate_alignment(flags, ARCH_KMALLOC_MINALIGN, size);
+        err = __kmem_cache_create(s, flags);
+        if (err)
+                panic("Creation of kmalloc slab %s size=%zd failed. Reason %d\n",
+                                        name, size, err);
+        s->refcount = -1;       /* Exempt from merging for now */
+}
+struct kmem_cache *__init create_kmalloc_cache(const char *name, size_t size,
+                                unsigned long flags)
+{
+        struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
+        if (!s)
+                panic("Out of memory when creating slab %s\n", name);
+        create_boot_cache(s, name, size, flags);
+        list_add(&s->list, &slab_caches);
+        s->refcount = 1;
+        return s;
+}
+#endif /* !CONFIG_SLOB */
 #ifdef CONFIG_SLABINFO
 static void print_slabinfo_header(struct seq_file *m)
 {
diff --git a/mm/slob.c b/mm/slob.c
index 1e921c5e9576..795bab7d391d 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -28,9 +28,8 @@
 * from kmalloc are prepended with a 4-byte header with the kmalloc size.
 * If kmalloc is asked for objects of PAGE_SIZE or larger, it calls
 * alloc_pages() directly, allocating compound pages so the page order
- * does not have to be separately tracked, and also stores the exact
+ * does not have to be separately tracked.
- * allocation size in page->private so that it can be used to accurately
+ * These objects are detected in kfree() because PageSlab()
- * provide ksize(). These objects are detected in kfree() because slob_page()
 * is false for them.
 *
 * SLAB is emulated on top of SLOB by simply calling constructors and
@@ -124,7 +123,6 @@ static inline void clear_slob_page_free(struct page *sp)
 #define SLOB_UNIT sizeof(slob_t)
 #define SLOB_UNITS(size) (((size) + SLOB_UNIT - 1)/SLOB_UNIT)
-#define SLOB_ALIGN L1_CACHE_BYTES
 /*
 * struct slob_rcu is inserted at the tail of allocated slob blocks, which
@@ -455,11 +453,6 @@ __do_kmalloc_node(size_t size, gfp_t gfp, int node, unsigned long caller)
                if (likely(order))
                        gfp |= __GFP_COMP;
                ret = slob_new_pages(gfp, order, node);
-                if (ret) {
-                        struct page *page;
-                        page = virt_to_page(ret);
-                        page->private = size;
-                }
                trace_kmalloc_node(caller, ret,
                                   size, PAGE_SIZE << order, gfp, node);
@@ -506,7 +499,7 @@ void kfree(const void *block)
                unsigned int *m = (unsigned int *)(block - align);
                slob_free(m, *m + align);
        } else
-                put_page(sp);
+                __free_pages(sp, compound_order(sp));
 }
 EXPORT_SYMBOL(kfree);
@@ -514,37 +507,30 @@ EXPORT_SYMBOL(kfree);
 size_t ksize(const void *block)
 {
        struct page *sp;
+        int align;
+        unsigned int *m;
        BUG_ON(!block);
        if (unlikely(block == ZERO_SIZE_PTR))
                return 0;
        sp = virt_to_page(block);
-        if (PageSlab(sp)) {
+        if (unlikely(!PageSlab(sp)))
-                int align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
+                return PAGE_SIZE << compound_order(sp);
-                unsigned int *m = (unsigned int *)(block - align);
-                return SLOB_UNITS(*m) * SLOB_UNIT;
+        align = max_t(size_t, ARCH_KMALLOC_MINALIGN, ARCH_SLAB_MINALIGN);
-        } else
+        m = (unsigned int *)(block - align);
-                return sp->private;
+        return SLOB_UNITS(*m) * SLOB_UNIT;
 }
 EXPORT_SYMBOL(ksize);
 int __kmem_cache_create(struct kmem_cache *c, unsigned long flags)
 {
-        size_t align = c->size;
        if (flags & SLAB_DESTROY_BY_RCU) {
                /* leave room for rcu footer at the end of object */
                c->size += sizeof(struct slob_rcu);
        }
        c->flags = flags;
-        /* ignore alignment unless it's forced */
-        c->align = (flags & SLAB_HWCACHE_ALIGN) ? SLOB_ALIGN : 0;
-        if (c->align < ARCH_SLAB_MINALIGN)
-                c->align = ARCH_SLAB_MINALIGN;
-        if (c->align < align)
-                c->align = align;
        return 0;
 }
@@ -558,12 +544,12 @@ void *kmem_cache_alloc_node(struct kmem_cache *c, gfp_t flags, int node)
        if (c->size < PAGE_SIZE) {
                b = slob_alloc(c->size, flags, c->align, node);
-                trace_kmem_cache_alloc_node(_RET_IP_, b, c->size,
+                trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size,
                                            SLOB_UNITS(c->size) * SLOB_UNIT,
                                            flags, node);
        } else {
                b = slob_new_pages(flags, get_order(c->size), node);
-                trace_kmem_cache_alloc_node(_RET_IP_, b, c->size,
+                trace_kmem_cache_alloc_node(_RET_IP_, b, c->object_size,
                                            PAGE_SIZE << get_order(c->size),
                                            flags, node);
        }
@@ -608,12 +594,6 @@ void kmem_cache_free(struct kmem_cache *c, void *b)
 }
 EXPORT_SYMBOL(kmem_cache_free);
-unsigned int kmem_cache_size(struct kmem_cache *c)
-{
-        return c->size;
-}
-EXPORT_SYMBOL(kmem_cache_size);
 int __kmem_cache_shutdown(struct kmem_cache *c)
 {
        /* No way to check for remaining objects */
diff --git a/mm/slub.c b/mm/slub.c
index 472e739278b4..9640edd2cc78 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -112,9 +112,6 @@
 *                      the fast path and disables lockless freelists.
 */
-#define SLAB_DEBUG_FLAGS (SLAB_RED_ZONE | SLAB_POISON | SLAB_STORE_USER | \
-                SLAB_TRACE | SLAB_DEBUG_FREE)
 static inline int kmem_cache_debug(struct kmem_cache *s)
 {
 #ifdef CONFIG_SLUB_DEBUG
@@ -179,8 +176,6 @@ static inline int kmem_cache_debug(struct kmem_cache *s)
 #define __OBJECT_POISON         0x80000000UL /* Poison object */
 #define __CMPXCHG_DOUBLE        0x40000000UL /* Use cmpxchg_double */
-static int kmem_size = sizeof(struct kmem_cache);
 #ifdef CONFIG_SMP
 static struct notifier_block slab_notifier;
 #endif
@@ -1092,11 +1087,11 @@ static noinline struct kmem_cache_node *free_debug_processing(
        if (!check_object(s, page, object, SLUB_RED_ACTIVE))
                goto out;
-        if (unlikely(s != page->slab)) {
+        if (unlikely(s != page->slab_cache)) {
                if (!PageSlab(page)) {
                        slab_err(s, page, "Attempt to free object(0x%p) "
                                "outside of slab", object);
-                } else if (!page->slab) {
+                } else if (!page->slab_cache) {
                        printk(KERN_ERR
                                "SLUB <none>: no slab for object 0x%p.\n",
                                                object);
@@ -1357,7 +1352,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node)
                goto out;
        inc_slabs_node(s, page_to_nid(page), page->objects);
-        page->slab = s;
+        page->slab_cache = s;
        __SetPageSlab(page);
        if (page->pfmemalloc)
                SetPageSlabPfmemalloc(page);
@@ -1424,7 +1419,7 @@ static void rcu_free_slab(struct rcu_head *h)
        else
                page = container_of((struct list_head *)h, struct page, lru);
-        __free_slab(page->slab, page);
+        __free_slab(page->slab_cache, page);
 }
 static void free_slab(struct kmem_cache *s, struct page *page)
@@ -1872,12 +1867,14 @@ redo:
 /*
 * Unfreeze all the cpu partial slabs.
 *
- * This function must be called with interrupt disabled.
+ * This function must be called with interrupts disabled
+ * for the cpu using c (or some other guarantee must be there
+ * to guarantee no concurrent accesses).
 */
-static void unfreeze_partials(struct kmem_cache *s)
+static void unfreeze_partials(struct kmem_cache *s,
+                struct kmem_cache_cpu *c)
 {
        struct kmem_cache_node *n = NULL, *n2 = NULL;
-        struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab);
        struct page *page, *discard_page = NULL;
        while ((page = c->partial)) {
@@ -1963,7 +1960,7 @@ static int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
                                 * set to the per node partial list.
                                 */
                                local_irq_save(flags);
-                                unfreeze_partials(s);
+                                unfreeze_partials(s, this_cpu_ptr(s->cpu_slab));
                                local_irq_restore(flags);
                                oldpage = NULL;
                                pobjects = 0;
@@ -2006,7 +2003,7 @@ static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
                if (c->page)
                        flush_slab(s, c);
-                unfreeze_partials(s);
+                unfreeze_partials(s, c);
        }
 }
@@ -2459,7 +2456,6 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
        void *prior;
        void **object = (void *)x;
        int was_frozen;
-        int inuse;
        struct page new;
        unsigned long counters;
        struct kmem_cache_node *n = NULL;
@@ -2472,13 +2468,17 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
                return;
        do {
+                if (unlikely(n)) {
+                        spin_unlock_irqrestore(&n->list_lock, flags);
+                        n = NULL;
+                }
                prior = page->freelist;
                counters = page->counters;
                set_freepointer(s, object, prior);
                new.counters = counters;
                was_frozen = new.frozen;
                new.inuse--;
-                if ((!new.inuse || !prior) && !was_frozen && !n) {
+                if ((!new.inuse || !prior) && !was_frozen) {
                        if (!kmem_cache_debug(s) && !prior)
@@ -2503,7 +2503,6 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
                        }
                }
-                inuse = new.inuse;
        } while (!cmpxchg_double_slab(s, page,
                prior, counters,
@@ -2529,25 +2528,17 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
                return;
        }
+        if (unlikely(!new.inuse && n->nr_partial > s->min_partial))
+                goto slab_empty;
        /*
-         * was_frozen may have been set after we acquired the list_lock in
+         * Objects left in the slab. If it was not on the partial list before
-         * an earlier loop. So we need to check it here again.
+         * then add it.
         */
-        if (was_frozen)
+        if (kmem_cache_debug(s) && unlikely(!prior)) {
-                stat(s, FREE_FROZEN);
+                remove_full(s, page);
-        else {
+                add_partial(n, page, DEACTIVATE_TO_TAIL);
-                if (unlikely(!inuse && n->nr_partial > s->min_partial))
+                stat(s, FREE_ADD_PARTIAL);
-                        goto slab_empty;
-                /*
-                 * Objects left in the slab. If it was not on the partial list before
-                 * then add it.
-                 */
-                if (unlikely(!prior)) {
-                        remove_full(s, page);
-                        add_partial(n, page, DEACTIVATE_TO_TAIL);
-                        stat(s, FREE_ADD_PARTIAL);
-                }
        }
        spin_unlock_irqrestore(&n->list_lock, flags);
        return;
@@ -2623,9 +2614,9 @@ void kmem_cache_free(struct kmem_cache *s, void *x)
        page = virt_to_head_page(x);
-        if (kmem_cache_debug(s) && page->slab != s) {
+        if (kmem_cache_debug(s) && page->slab_cache != s) {
                pr_err("kmem_cache_free: Wrong slab cache. %s but object"
-                        " is from  %s\n", page->slab->name, s->name);
+                        " is from  %s\n", page->slab_cache->name, s->name);
                WARN_ON_ONCE(1);
                return;
        }
@@ -2769,32 +2760,6 @@ static inline int calculate_order(int size, int reserved)
        return -ENOSYS;
 }
-/*
- * Figure out what the alignment of the objects will be.
- */
-static unsigned long calculate_alignment(unsigned long flags,
-                unsigned long align, unsigned long size)
-{
-        /*
-         * If the user wants hardware cache aligned objects then follow that
-         * suggestion if the object is sufficiently large.
-         *
-         * The hardware cache alignment cannot override the specified
-         * alignment though. If that is greater then use it.
-         */
-        if (flags & SLAB_HWCACHE_ALIGN) {
-                unsigned long ralign = cache_line_size();
-                while (size <= ralign / 2)
-                        ralign /= 2;
-                align = max(align, ralign);
-        }
-        if (align < ARCH_SLAB_MINALIGN)
-                align = ARCH_SLAB_MINALIGN;
-        return ALIGN(align, sizeof(void *));
-}
 static void
 init_kmem_cache_node(struct kmem_cache_node *n)
 {
@@ -2928,7 +2893,6 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
 {
        unsigned long flags = s->flags;
        unsigned long size = s->object_size;
-        unsigned long align = s->align;
        int order;
        /*
@@ -3000,19 +2964,11 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
 #endif
        /*
-         * Determine the alignment based on various parameters that the
-         * user specified and the dynamic determination of cache line size
-         * on bootup.
-         */
-        align = calculate_alignment(flags, align, s->object_size);
-        s->align = align;
-        /*
         * SLUB stores one object immediately after another beginning from
         * offset 0. In order to align the objects we have to simply size
         * each object to conform to the alignment.
         */
-        size = ALIGN(size, align);
+        size = ALIGN(size, s->align);
        s->size = size;
        if (forced_order >= 0)
                order = forced_order;
@@ -3041,7 +2997,6 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
                s->max = s->oo;
        return !!oo_objects(s->oo);
 }
 static int kmem_cache_open(struct kmem_cache *s, unsigned long flags)
@@ -3127,15 +3082,6 @@ error:
        return -EINVAL;
 }
-/*
- * Determine the size of a slab object
- */
-unsigned int kmem_cache_size(struct kmem_cache *s)
-{
-        return s->object_size;
-}
-EXPORT_SYMBOL(kmem_cache_size);
 static void list_slab_objects(struct kmem_cache *s, struct page *page,
                                                        const char *text)
 {
@@ -3261,32 +3207,6 @@ static int __init setup_slub_nomerge(char *str)
 __setup("slub_nomerge", setup_slub_nomerge);
-static struct kmem_cache *__init create_kmalloc_cache(const char *name,
-                                                int size, unsigned int flags)
-{
-        struct kmem_cache *s;
-        s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
-        s->name = name;
-        s->size = s->object_size = size;
-        s->align = ARCH_KMALLOC_MINALIGN;
-        /*
-         * This function is called with IRQs disabled during early-boot on
-         * single CPU so there's no need to take slab_mutex here.
-         */
-        if (kmem_cache_open(s, flags))
-                goto panic;
-        list_add(&s->list, &slab_caches);
-        return s;
-panic:
-        panic("Creation of kmalloc slab %s size=%d failed.\n", name, size);
-        return NULL;
-}
 /*
 * Conversion table for small slabs sizes / 8 to the index in the
 * kmalloc array. This is necessary for slabs < 192 since we have non power
@@ -3424,7 +3344,7 @@ size_t ksize(const void *object)
                return PAGE_SIZE << compound_order(page);
        }
-        return slab_ksize(page->slab);
+        return slab_ksize(page->slab_cache);
 }
 EXPORT_SYMBOL(ksize);
@@ -3449,8 +3369,8 @@ bool verify_mem_not_deleted(const void *x)
        }
        slab_lock(page);
-        if (on_freelist(page->slab, page, object)) {
+        if (on_freelist(page->slab_cache, page, object)) {
-                object_err(page->slab, page, object, "Object is on free-list");
+                object_err(page->slab_cache, page, object, "Object is on free-list");
                rv = false;
        } else {
                rv = true;
@@ -3481,7 +3401,7 @@ void kfree(const void *x)
                __free_pages(page, compound_order(page));
                return;
        }
-        slab_free(page->slab, page, object, _RET_IP_);
+        slab_free(page->slab_cache, page, object, _RET_IP_);
 }
 EXPORT_SYMBOL(kfree);
@@ -3676,15 +3596,16 @@ static int slab_memory_callback(struct notifier_block *self,
 /*
 * Used for early kmem_cache structures that were allocated using
- * the page allocator
+ * the page allocator. Allocate them properly then fix up the pointers
+ * that may be pointing to the wrong kmem_cache structure.
 */
-static void __init kmem_cache_bootstrap_fixup(struct kmem_cache *s)
+static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
 {
        int node;
+        struct kmem_cache *s = kmem_cache_zalloc(kmem_cache, GFP_NOWAIT);
-        list_add(&s->list, &slab_caches);
+        memcpy(s, static_cache, kmem_cache->object_size);
-        s->refcount = -1;
        for_each_node_state(node, N_NORMAL_MEMORY) {
                struct kmem_cache_node *n = get_node(s, node);
@@ -3692,78 +3613,52 @@ static void __init kmem_cache_bootstrap_fixup(struct kmem_cache *s)
                if (n) {
                        list_for_each_entry(p, &n->partial, lru)
-                                p->slab = s;
+                                p->slab_cache = s;
 #ifdef CONFIG_SLUB_DEBUG
                        list_for_each_entry(p, &n->full, lru)
-                                p->slab = s;
+                                p->slab_cache = s;
 #endif
                }
        }
+        list_add(&s->list, &slab_caches);
+        return s;
 }
 void __init kmem_cache_init(void)
 {
+        static __initdata struct kmem_cache boot_kmem_cache,
+                boot_kmem_cache_node;
        int i;
-        int caches = 0;
+        int caches = 2;
-        struct kmem_cache *temp_kmem_cache;
-        int order;
-        struct kmem_cache *temp_kmem_cache_node;
-        unsigned long kmalloc_size;
        if (debug_guardpage_minorder())
                slub_max_order = 0;
-        kmem_size = offsetof(struct kmem_cache, node) +
+        kmem_cache_node = &boot_kmem_cache_node;
-                        nr_node_ids * sizeof(struct kmem_cache_node *);
+        kmem_cache = &boot_kmem_cache;
-        /* Allocate two kmem_caches from the page allocator */
-        kmalloc_size = ALIGN(kmem_size, cache_line_size());
-        order = get_order(2 * kmalloc_size);
-        kmem_cache = (void *)__get_free_pages(GFP_NOWAIT | __GFP_ZERO, order);
-        /*
-         * Must first have the slab cache available for the allocations of the
-         * struct kmem_cache_node's. There is special bootstrap code in
-         * kmem_cache_open for slab_state == DOWN.
-         */
-        kmem_cache_node = (void *)kmem_cache + kmalloc_size;
-        kmem_cache_node->name = "kmem_cache_node";
+        create_boot_cache(kmem_cache_node, "kmem_cache_node",
-        kmem_cache_node->size = kmem_cache_node->object_size =
+                sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN);
-                sizeof(struct kmem_cache_node);
-        kmem_cache_open(kmem_cache_node, SLAB_HWCACHE_ALIGN | SLAB_PANIC);
        hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
        /* Able to allocate the per node structures */
        slab_state = PARTIAL;
-        temp_kmem_cache = kmem_cache;
+        create_boot_cache(kmem_cache, "kmem_cache",
-        kmem_cache->name = "kmem_cache";
+                        offsetof(struct kmem_cache, node) +
-        kmem_cache->size = kmem_cache->object_size = kmem_size;
+                                nr_node_ids * sizeof(struct kmem_cache_node *),
-        kmem_cache_open(kmem_cache, SLAB_HWCACHE_ALIGN | SLAB_PANIC);
+                       SLAB_HWCACHE_ALIGN);
-        kmem_cache = kmem_cache_alloc(kmem_cache, GFP_NOWAIT);
+        kmem_cache = bootstrap(&boot_kmem_cache);
-        memcpy(kmem_cache, temp_kmem_cache, kmem_size);
        /*
         * Allocate kmem_cache_node properly from the kmem_cache slab.
         * kmem_cache_node is separately allocated so no need to
         * update any list pointers.
         */
-        temp_kmem_cache_node = kmem_cache_node;
+        kmem_cache_node = bootstrap(&boot_kmem_cache_node);
-        kmem_cache_node = kmem_cache_alloc(kmem_cache, GFP_NOWAIT);
-        memcpy(kmem_cache_node, temp_kmem_cache_node, kmem_size);
-        kmem_cache_bootstrap_fixup(kmem_cache_node);
-        caches++;
-        kmem_cache_bootstrap_fixup(kmem_cache);
-        caches++;
-        /* Free temporary boot structure */
-        free_pages((unsigned long)temp_kmem_cache, order);
        /* Now we can use the kmem_cache to allocate kmalloc slabs */
@@ -3964,6 +3859,10 @@ int __kmem_cache_create(struct kmem_cache *s, unsigned long flags)
        if (err)
                return err;
+        /* Mutex is not taken during early boot */
+        if (slab_state <= UP)
+                return 0;
        mutex_unlock(&slab_mutex);
        err = sysfs_slab_add(s);
        mutex_lock(&slab_mutex);
@@ -5265,13 +5164,8 @@ static int sysfs_slab_add(struct kmem_cache *s)
 {
        int err;
        const char *name;
-        int unmergeable;
+        int unmergeable = slab_unmergeable(s);
-        if (slab_state < FULL)
-                /* Defer until later */
-                return 0;
-        unmergeable = slab_unmergeable(s);
        if (unmergeable) {
                /*
                 * Slabcache can never be merged so we can use the name proper.
author	Pekka Enberg <penberg@kernel.org>	2012-12-18 05:46:20 -0500
committer	Pekka Enberg <penberg@kernel.org>	2012-12-18 05:46:20 -0500
commit	08afe22c68d8c07e8e31ee6491c37f36199ba14b (patch)
tree	875d203149b74fddb50522fd5df3d6b154f5fe1e /mm
parent	a304f836a2e6d257c1f918b3431f97ef6b33e02e (diff)
parent	4590685546a374fb0f60682ce0e3a6fd48911d46 (diff)