mm/slab: use percpu allocator for cpu cache

Because of chicken and egg problem, initialization of SLAB is really complicated. We need to allocate cpu cache through SLAB to make the kmem_cache work, but before initialization of kmem_cache, allocation through SLAB is impossible. On the other hand, SLUB does initialization in a more simple way. It uses percpu allocator to allocate cpu cache so there is no chicken and egg problem. So, this patch try to use percpu allocator in SLAB. This simplifies the initialization step in SLAB so that we could maintain SLAB code more easily. In my testing there is no performance difference. This implementation relies on percpu allocator. Because percpu allocator uses vmalloc address space, vmalloc address space could be exhausted by this change on many cpu system with *32 bit* kernel. This implementation can cover 1024 cpus in worst case by following calculation. Worst: 1024 cpus * 4 bytes for pointer * 300 kmem_caches * 120 objects per cpu_cache = 140 MB Normal: 1024 cpus * 4 bytes for pointer * 150 kmem_caches(slab merge) * 80 objects per cpu_cache = 46 MB Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com> Acked-by: Christoph Lameter <cl@linux.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: David Rientjes <rientjes@google.com> Cc: Jeremiah Mahler <jmmahler@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Joonsoo Kim <iamjoonsoo.kim@lge.com> 2014-10-09 18:26:27 -0400
committer: Linus Torvalds <torvalds@linux-foundation.org> 2014-10-09 22:25:51 -0400
commit: bf0dea23a9c094ae869a88bb694fbe966671bf6d (patch)
tree: 420ca01f321664323b3ad0eeead8f2b4e04cd51e /mm/slab.c
parent: 12220dea07f1ac6ac717707104773d771c3f3077 (diff)
1 files changed, 75 insertions, 164 deletions
diff --git a/mm/slab.c b/mm/slab.c
index 328233a724af..655d65c3f010 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -237,11 +237,10 @@ struct arraycache_init {
 /*
 * Need this for bootstrapping a per node allocator.
 */
-#define NUM_INIT_LISTS (3 * MAX_NUMNODES)
+#define NUM_INIT_LISTS (2 * MAX_NUMNODES)
 static struct kmem_cache_node __initdata init_kmem_cache_node[NUM_INIT_LISTS];
 #define CACHE_CACHE 0
-#define SIZE_AC MAX_NUMNODES
+#define SIZE_NODE (MAX_NUMNODES)
-#define SIZE_NODE (2 * MAX_NUMNODES)
 static int drain_freelist(struct kmem_cache *cache,
                        struct kmem_cache_node *n, int tofree);
@@ -253,7 +252,6 @@ static void cache_reap(struct work_struct *unused);
 static int slab_early_init = 1;
-#define INDEX_AC kmalloc_index(sizeof(struct arraycache_init))
 #define INDEX_NODE kmalloc_index(sizeof(struct kmem_cache_node))
 static void kmem_cache_node_init(struct kmem_cache_node *parent)
@@ -458,9 +456,6 @@ static inline unsigned int obj_to_index(const struct kmem_cache *cache,
        return reciprocal_divide(offset, cache->reciprocal_buffer_size);
 }
-static struct arraycache_init initarray_generic =
-    { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
 /* internal cache of cache description objs */
 static struct kmem_cache kmem_cache_boot = {
        .batchcount = 1,
@@ -476,7 +471,7 @@ static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
 static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
 {
-        return cachep->array[smp_processor_id()];
+        return this_cpu_ptr(cachep->cpu_cache);
 }
 static size_t calculate_freelist_size(int nr_objs, size_t align)
@@ -1096,24 +1091,25 @@ static void cpuup_canceled(long cpu)
                struct alien_cache **alien;
                LIST_HEAD(list);
-                /* cpu is dead; no one can alloc from it. */
-                nc = cachep->array[cpu];
-                cachep->array[cpu] = NULL;
                n = get_node(cachep, node);
                if (!n)
-                        goto free_array_cache;
+                        continue;
                spin_lock_irq(&n->list_lock);
                /* Free limit for this kmem_cache_node */
                n->free_limit -= cachep->batchcount;
-                if (nc)
+                /* cpu is dead; no one can alloc from it. */
+                nc = per_cpu_ptr(cachep->cpu_cache, cpu);
+                if (nc) {
                        free_block(cachep, nc->entry, nc->avail, node, &list);
+                        nc->avail = 0;
+                }
                if (!cpumask_empty(mask)) {
                        spin_unlock_irq(&n->list_lock);
-                        goto free_array_cache;
+                        goto free_slab;
                }
                shared = n->shared;
@@ -1133,9 +1129,9 @@ static void cpuup_canceled(long cpu)
                        drain_alien_cache(cachep, alien);
                        free_alien_cache(alien);
                }
-free_array_cache:
+free_slab:
                slabs_destroy(cachep, &list);
-                kfree(nc);
        }
        /*
         * In the previous loop, all the objects were freed to
@@ -1172,32 +1168,23 @@ static int cpuup_prepare(long cpu)
         * array caches
         */
        list_for_each_entry(cachep, &slab_caches, list) {
-                struct array_cache *nc;
                struct array_cache *shared = NULL;
                struct alien_cache **alien = NULL;
-                nc = alloc_arraycache(node, cachep->limit,
-                                        cachep->batchcount, GFP_KERNEL);
-                if (!nc)
-                        goto bad;
                if (cachep->shared) {
                        shared = alloc_arraycache(node,
                                cachep->shared * cachep->batchcount,
                                0xbaadf00d, GFP_KERNEL);
-                        if (!shared) {
+                        if (!shared)
-                                kfree(nc);
                                goto bad;
-                        }
                }
                if (use_alien_caches) {
                        alien = alloc_alien_cache(node, cachep->limit, GFP_KERNEL);
                        if (!alien) {
                                kfree(shared);
-                                kfree(nc);
                                goto bad;
                        }
                }
-                cachep->array[cpu] = nc;
                n = get_node(cachep, node);
                BUG_ON(!n);
@@ -1389,15 +1376,6 @@ static void __init set_up_node(struct kmem_cache *cachep, int index)
 }
 /*
- * The memory after the last cpu cache pointer is used for the
- * the node pointer.
- */
-static void setup_node_pointer(struct kmem_cache *cachep)
-{
-        cachep->node = (struct kmem_cache_node **)&cachep->array[nr_cpu_ids];
-}
-/*
 * Initialisation.  Called after the page allocator have been initialised and
 * before smp_init().
 */
@@ -1408,7 +1386,6 @@ void __init kmem_cache_init(void)
        BUILD_BUG_ON(sizeof(((struct page *)NULL)->lru) <
                                        sizeof(struct rcu_head));
        kmem_cache = &kmem_cache_boot;
-        setup_node_pointer(kmem_cache);
        if (num_possible_nodes() == 1)
                use_alien_caches = 0;
@@ -1416,8 +1393,6 @@ void __init kmem_cache_init(void)
        for (i = 0; i < NUM_INIT_LISTS; i++)
                kmem_cache_node_init(&init_kmem_cache_node[i]);
-        set_up_node(kmem_cache, CACHE_CACHE);
        /*
         * Fragmentation resistance on low memory - only use bigger
         * page orders on machines with more than 32MB of memory if
@@ -1452,49 +1427,22 @@ void __init kmem_cache_init(void)
         * struct kmem_cache size depends on nr_node_ids & nr_cpu_ids
         */
        create_boot_cache(kmem_cache, "kmem_cache",
-                offsetof(struct kmem_cache, array[nr_cpu_ids]) +
+                offsetof(struct kmem_cache, node) +
                                  nr_node_ids * sizeof(struct kmem_cache_node *),
                                  SLAB_HWCACHE_ALIGN);
        list_add(&kmem_cache->list, &slab_caches);
+        slab_state = PARTIAL;
-        /* 2+3) create the kmalloc caches */
        /*
-         * Initialize the caches that provide memory for the array cache and the
+         * Initialize the caches that provide memory for the  kmem_cache_node
-         * kmem_cache_node structures first.  Without this, further allocations will
+         * structures first.  Without this, further allocations will bug.
-         * bug.
         */
+        kmalloc_caches[INDEX_NODE] = create_kmalloc_cache("kmalloc-node",
-        kmalloc_caches[INDEX_AC] = create_kmalloc_cache("kmalloc-ac",
-                                        kmalloc_size(INDEX_AC), ARCH_KMALLOC_FLAGS);
-        if (INDEX_AC != INDEX_NODE)
-                kmalloc_caches[INDEX_NODE] =
-                        create_kmalloc_cache("kmalloc-node",
                                kmalloc_size(INDEX_NODE), ARCH_KMALLOC_FLAGS);
+        slab_state = PARTIAL_NODE;
        slab_early_init = 0;
-        /* 4) Replace the bootstrap head arrays */
-        {
-                struct array_cache *ptr;
-                ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
-                memcpy(ptr, cpu_cache_get(kmem_cache),
-                       sizeof(struct arraycache_init));
-                kmem_cache->array[smp_processor_id()] = ptr;
-                ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
-                BUG_ON(cpu_cache_get(kmalloc_caches[INDEX_AC])
-                       != &initarray_generic.cache);
-                memcpy(ptr, cpu_cache_get(kmalloc_caches[INDEX_AC]),
-                       sizeof(struct arraycache_init));
-                kmalloc_caches[INDEX_AC]->array[smp_processor_id()] = ptr;
-        }
        /* 5) Replace the bootstrap kmem_cache_node */
        {
                int nid;
@@ -1502,13 +1450,8 @@ void __init kmem_cache_init(void)
                for_each_online_node(nid) {
                        init_list(kmem_cache, &init_kmem_cache_node[CACHE_CACHE + nid], nid);
-                        init_list(kmalloc_caches[INDEX_AC],
+                        init_list(kmalloc_caches[INDEX_NODE],
-                                  &init_kmem_cache_node[SIZE_AC + nid], nid);
-                        if (INDEX_AC != INDEX_NODE) {
-                                init_list(kmalloc_caches[INDEX_NODE],
                                          &init_kmem_cache_node[SIZE_NODE + nid], nid);
-                        }
                }
        }
@@ -2041,56 +1984,53 @@ static size_t calculate_slab_order(struct kmem_cache *cachep,
        return left_over;
 }
+static struct array_cache __percpu *alloc_kmem_cache_cpus(
+                struct kmem_cache *cachep, int entries, int batchcount)
+{
+        int cpu;
+        size_t size;
+        struct array_cache __percpu *cpu_cache;
+        size = sizeof(void *) * entries + sizeof(struct array_cache);
+        cpu_cache = __alloc_percpu(size, 0);
+        if (!cpu_cache)
+                return NULL;
+        for_each_possible_cpu(cpu) {
+                init_arraycache(per_cpu_ptr(cpu_cache, cpu),
+                                entries, batchcount);
+        }
+        return cpu_cache;
+}
 static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
 {
        if (slab_state >= FULL)
                return enable_cpucache(cachep, gfp);
+        cachep->cpu_cache = alloc_kmem_cache_cpus(cachep, 1, 1);
+        if (!cachep->cpu_cache)
+                return 1;
        if (slab_state == DOWN) {
-                /*
+                /* Creation of first cache (kmem_cache). */
-                 * Note: Creation of first cache (kmem_cache).
+                set_up_node(kmem_cache, CACHE_CACHE);
-                 * The setup_node is taken care
-                 * of by the caller of __kmem_cache_create
-                 */
-                cachep->array[smp_processor_id()] = &initarray_generic.cache;
-                slab_state = PARTIAL;
        } else if (slab_state == PARTIAL) {
-                /*
+                /* For kmem_cache_node */
-                 * Note: the second kmem_cache_create must create the cache
+                set_up_node(cachep, SIZE_NODE);
-                 * that's used by kmalloc(24), otherwise the creation of
-                 * further caches will BUG().
-                 */
-                cachep->array[smp_processor_id()] = &initarray_generic.cache;
-                /*
-                 * If the cache that's used by kmalloc(sizeof(kmem_cache_node)) is
-                 * the second cache, then we need to set up all its node/,
-                 * otherwise the creation of further caches will BUG().
-                 */
-                set_up_node(cachep, SIZE_AC);
-                if (INDEX_AC == INDEX_NODE)
-                        slab_state = PARTIAL_NODE;
-                else
-                        slab_state = PARTIAL_ARRAYCACHE;
        } else {
-                /* Remaining boot caches */
+                int node;
-                cachep->array[smp_processor_id()] =
-                        kmalloc(sizeof(struct arraycache_init), gfp);
-                if (slab_state == PARTIAL_ARRAYCACHE) {
+                for_each_online_node(node) {
-                        set_up_node(cachep, SIZE_NODE);
+                        cachep->node[node] = kmalloc_node(
-                        slab_state = PARTIAL_NODE;
+                                sizeof(struct kmem_cache_node), gfp, node);
-                } else {
+                        BUG_ON(!cachep->node[node]);
-                        int node;
+                        kmem_cache_node_init(cachep->node[node]);
-                        for_each_online_node(node) {
-                                cachep->node[node] =
-                                    kmalloc_node(sizeof(struct kmem_cache_node),
-                                                gfp, node);
-                                BUG_ON(!cachep->node[node]);
-                                kmem_cache_node_init(cachep->node[node]);
-                        }
                }
        }
        cachep->node[numa_mem_id()]->next_reap =
                        jiffies + REAPTIMEOUT_NODE +
                        ((unsigned long)cachep) % REAPTIMEOUT_NODE;
@@ -2213,7 +2153,6 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
        else
                gfp = GFP_NOWAIT;
-        setup_node_pointer(cachep);
 #if DEBUG
        /*
@@ -2470,8 +2409,7 @@ int __kmem_cache_shutdown(struct kmem_cache *cachep)
        if (rc)
                return rc;
-        for_each_online_cpu(i)
+        free_percpu(cachep->cpu_cache);
-            kfree(cachep->array[i]);
        /* NUMA: free the node structures */
        for_each_kmem_cache_node(cachep, i, n) {
@@ -3719,72 +3657,45 @@ fail:
        return -ENOMEM;
 }
-struct ccupdate_struct {
-        struct kmem_cache *cachep;
-        struct array_cache *new[0];
-};
-static void do_ccupdate_local(void *info)
-{
-        struct ccupdate_struct *new = info;
-        struct array_cache *old;
-        check_irq_off();
-        old = cpu_cache_get(new->cachep);
-        new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()];
-        new->new[smp_processor_id()] = old;
-}
 /* Always called with the slab_mutex held */
 static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,
                                int batchcount, int shared, gfp_t gfp)
 {
-        struct ccupdate_struct *new;
+        struct array_cache __percpu *cpu_cache, *prev;
-        int i;
+        int cpu;
-        new = kzalloc(sizeof(*new) + nr_cpu_ids * sizeof(struct array_cache *),
+        cpu_cache = alloc_kmem_cache_cpus(cachep, limit, batchcount);
-                      gfp);
+        if (!cpu_cache)
-        if (!new)
                return -ENOMEM;
-        for_each_online_cpu(i) {
+        prev = cachep->cpu_cache;
-                new->new[i] = alloc_arraycache(cpu_to_mem(i), limit,
+        cachep->cpu_cache = cpu_cache;
-                                                batchcount, gfp);
+        kick_all_cpus_sync();
-                if (!new->new[i]) {
-                        for (i--; i >= 0; i--)
-                                kfree(new->new[i]);
-                        kfree(new);
-                        return -ENOMEM;
-                }
-        }
-        new->cachep = cachep;
-        on_each_cpu(do_ccupdate_local, (void *)new, 1);
        check_irq_on();
        cachep->batchcount = batchcount;
        cachep->limit = limit;
        cachep->shared = shared;
-        for_each_online_cpu(i) {
+        if (!prev)
+                goto alloc_node;
+        for_each_online_cpu(cpu) {
                LIST_HEAD(list);
-                struct array_cache *ccold = new->new[i];
                int node;
                struct kmem_cache_node *n;
+                struct array_cache *ac = per_cpu_ptr(prev, cpu);
-                if (!ccold)
+                node = cpu_to_mem(cpu);
-                        continue;
-                node = cpu_to_mem(i);
                n = get_node(cachep, node);
                spin_lock_irq(&n->list_lock);
-                free_block(cachep, ccold->entry, ccold->avail, node, &list);
+                free_block(cachep, ac->entry, ac->avail, node, &list);
                spin_unlock_irq(&n->list_lock);
                slabs_destroy(cachep, &list);
-                kfree(ccold);
        }
-        kfree(new);
+        free_percpu(prev);
+alloc_node:
        return alloc_kmem_cache_node(cachep, gfp);
 }
author	Joonsoo Kim <iamjoonsoo.kim@lge.com>	2014-10-09 18:26:27 -0400
committer	Linus Torvalds <torvalds@linux-foundation.org>	2014-10-09 22:25:51 -0400
commit	bf0dea23a9c094ae869a88bb694fbe966671bf6d (patch)
tree	420ca01f321664323b3ad0eeead8f2b4e04cd51e /mm/slab.c
parent	12220dea07f1ac6ac717707104773d771c3f3077 (diff)

diff --git a/mm/slab.c b/mm/slab.c index 328233a724af..655d65c3f010 100644 --- a/mm/slab.c +++ b/mm/slab.c
@@ -237,11 +237,10 @@ struct arraycache_init {
237	/*	237	/*
238	* Need this for bootstrapping a per node allocator.	238	* Need this for bootstrapping a per node allocator.
239	*/	239	*/
240	#define NUM_INIT_LISTS (3 * MAX_NUMNODES)	240	#define NUM_INIT_LISTS (2 * MAX_NUMNODES)
241	static struct kmem_cache_node __initdata init_kmem_cache_node[NUM_INIT_LISTS];	241	static struct kmem_cache_node __initdata init_kmem_cache_node[NUM_INIT_LISTS];
242	#define CACHE_CACHE 0	242	#define CACHE_CACHE 0
243	#define SIZE_AC MAX_NUMNODES	243	#define SIZE_NODE (MAX_NUMNODES)
244	#define SIZE_NODE (2 * MAX_NUMNODES)
245		244
246	static int drain_freelist(struct kmem_cache *cache,	245	static int drain_freelist(struct kmem_cache *cache,
247	struct kmem_cache_node *n, int tofree);	246	struct kmem_cache_node *n, int tofree);
@@ -253,7 +252,6 @@ static void cache_reap(struct work_struct *unused);
253		252
254	static int slab_early_init = 1;	253	static int slab_early_init = 1;
255		254
256	#define INDEX_AC kmalloc_index(sizeof(struct arraycache_init))
257	#define INDEX_NODE kmalloc_index(sizeof(struct kmem_cache_node))	255	#define INDEX_NODE kmalloc_index(sizeof(struct kmem_cache_node))
258		256
259	static void kmem_cache_node_init(struct kmem_cache_node *parent)	257	static void kmem_cache_node_init(struct kmem_cache_node *parent)
@@ -458,9 +456,6 @@ static inline unsigned int obj_to_index(const struct kmem_cache *cache,
458	return reciprocal_divide(offset, cache->reciprocal_buffer_size);	456	return reciprocal_divide(offset, cache->reciprocal_buffer_size);
459	}	457	}
460		458
461	static struct arraycache_init initarray_generic =
462	{ {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
463
464	/* internal cache of cache description objs */	459	/* internal cache of cache description objs */
465	static struct kmem_cache kmem_cache_boot = {	460	static struct kmem_cache kmem_cache_boot = {
466	.batchcount = 1,	461	.batchcount = 1,
@@ -476,7 +471,7 @@ static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
476		471
477	static inline struct array_cache cpu_cache_get(struct kmem_cache cachep)	472	static inline struct array_cache cpu_cache_get(struct kmem_cache cachep)
478	{	473	{
479	return cachep->array[smp_processor_id()];	474	return this_cpu_ptr(cachep->cpu_cache);
480	}	475	}
481		476
482	static size_t calculate_freelist_size(int nr_objs, size_t align)	477	static size_t calculate_freelist_size(int nr_objs, size_t align)
@@ -1096,24 +1091,25 @@ static void cpuup_canceled(long cpu)
1096	struct alien_cache **alien;	1091	struct alien_cache **alien;
1097	LIST_HEAD(list);	1092	LIST_HEAD(list);
1098		1093
1099	/* cpu is dead; no one can alloc from it. */
1100	nc = cachep->array[cpu];
1101	cachep->array[cpu] = NULL;
1102	n = get_node(cachep, node);	1094	n = get_node(cachep, node);
1103
1104	if (!n)	1095	if (!n)
1105	goto free_array_cache;	1096	continue;
1106		1097
1107	spin_lock_irq(&n->list_lock);	1098	spin_lock_irq(&n->list_lock);
1108		1099
1109	/* Free limit for this kmem_cache_node */	1100	/* Free limit for this kmem_cache_node */
1110	n->free_limit -= cachep->batchcount;	1101	n->free_limit -= cachep->batchcount;
1111	if (nc)	1102
		1103	/* cpu is dead; no one can alloc from it. */
		1104	nc = per_cpu_ptr(cachep->cpu_cache, cpu);
		1105	if (nc) {
1112	free_block(cachep, nc->entry, nc->avail, node, &list);	1106	free_block(cachep, nc->entry, nc->avail, node, &list);
		1107	nc->avail = 0;
		1108	}
1113		1109
1114	if (!cpumask_empty(mask)) {	1110	if (!cpumask_empty(mask)) {
1115	spin_unlock_irq(&n->list_lock);	1111	spin_unlock_irq(&n->list_lock);
1116	goto free_array_cache;	1112	goto free_slab;
1117	}	1113	}
1118		1114
1119	shared = n->shared;	1115	shared = n->shared;
@@ -1133,9 +1129,9 @@ static void cpuup_canceled(long cpu)
1133	drain_alien_cache(cachep, alien);	1129	drain_alien_cache(cachep, alien);
1134	free_alien_cache(alien);	1130	free_alien_cache(alien);
1135	}	1131	}
1136	free_array_cache:	1132
		1133	free_slab:
1137	slabs_destroy(cachep, &list);	1134	slabs_destroy(cachep, &list);
1138	kfree(nc);
1139	}	1135	}
1140	/*	1136	/*
1141	* In the previous loop, all the objects were freed to	1137	* In the previous loop, all the objects were freed to
@@ -1172,32 +1168,23 @@ static int cpuup_prepare(long cpu)
1172	* array caches	1168	* array caches
1173	*/	1169	*/
1174	list_for_each_entry(cachep, &slab_caches, list) {	1170	list_for_each_entry(cachep, &slab_caches, list) {
1175	struct array_cache *nc;
1176	struct array_cache *shared = NULL;	1171	struct array_cache *shared = NULL;
1177	struct alien_cache **alien = NULL;	1172	struct alien_cache **alien = NULL;
1178		1173
1179	nc = alloc_arraycache(node, cachep->limit,
1180	cachep->batchcount, GFP_KERNEL);
1181	if (!nc)
1182	goto bad;
1183	if (cachep->shared) {	1174	if (cachep->shared) {
1184	shared = alloc_arraycache(node,	1175	shared = alloc_arraycache(node,
1185	cachep->shared * cachep->batchcount,	1176	cachep->shared * cachep->batchcount,
1186	0xbaadf00d, GFP_KERNEL);	1177	0xbaadf00d, GFP_KERNEL);
1187	if (!shared) {	1178	if (!shared)
1188	kfree(nc);
1189	goto bad;	1179	goto bad;
1190	}
1191	}	1180	}
1192	if (use_alien_caches) {	1181	if (use_alien_caches) {
1193	alien = alloc_alien_cache(node, cachep->limit, GFP_KERNEL);	1182	alien = alloc_alien_cache(node, cachep->limit, GFP_KERNEL);
1194	if (!alien) {	1183	if (!alien) {
1195	kfree(shared);	1184	kfree(shared);
1196	kfree(nc);
1197	goto bad;	1185	goto bad;
1198	}	1186	}
1199	}	1187	}
1200	cachep->array[cpu] = nc;
1201	n = get_node(cachep, node);	1188	n = get_node(cachep, node);
1202	BUG_ON(!n);	1189	BUG_ON(!n);
1203		1190
@@ -1389,15 +1376,6 @@ static void __init set_up_node(struct kmem_cache *cachep, int index)
1389	}	1376	}
1390		1377
1391	/*	1378	/*
1392	* The memory after the last cpu cache pointer is used for the
1393	* the node pointer.
1394	*/
1395	static void setup_node_pointer(struct kmem_cache *cachep)
1396	{
1397	cachep->node = (struct kmem_cache_node **)&cachep->array[nr_cpu_ids];
1398	}
1399
1400	/*
1401	* Initialisation. Called after the page allocator have been initialised and	1379	* Initialisation. Called after the page allocator have been initialised and
1402	* before smp_init().	1380	* before smp_init().
1403	*/	1381	*/
@@ -1408,7 +1386,6 @@ void __init kmem_cache_init(void)
1408	BUILD_BUG_ON(sizeof(((struct page *)NULL)->lru) <	1386	BUILD_BUG_ON(sizeof(((struct page *)NULL)->lru) <
1409	sizeof(struct rcu_head));	1387	sizeof(struct rcu_head));
1410	kmem_cache = &kmem_cache_boot;	1388	kmem_cache = &kmem_cache_boot;
1411	setup_node_pointer(kmem_cache);
1412		1389
1413	if (num_possible_nodes() == 1)	1390	if (num_possible_nodes() == 1)
1414	use_alien_caches = 0;	1391	use_alien_caches = 0;
@@ -1416,8 +1393,6 @@ void __init kmem_cache_init(void)
1416	for (i = 0; i < NUM_INIT_LISTS; i++)	1393	for (i = 0; i < NUM_INIT_LISTS; i++)
1417	kmem_cache_node_init(&init_kmem_cache_node[i]);	1394	kmem_cache_node_init(&init_kmem_cache_node[i]);
1418		1395
1419	set_up_node(kmem_cache, CACHE_CACHE);
1420
1421	/*	1396	/*
1422	* Fragmentation resistance on low memory - only use bigger	1397	* Fragmentation resistance on low memory - only use bigger
1423	* page orders on machines with more than 32MB of memory if	1398	* page orders on machines with more than 32MB of memory if
@@ -1452,49 +1427,22 @@ void __init kmem_cache_init(void)
1452	* struct kmem_cache size depends on nr_node_ids & nr_cpu_ids	1427	* struct kmem_cache size depends on nr_node_ids & nr_cpu_ids
1453	*/	1428	*/
1454	create_boot_cache(kmem_cache, "kmem_cache",	1429	create_boot_cache(kmem_cache, "kmem_cache",
1455	offsetof(struct kmem_cache, array[nr_cpu_ids]) +	1430	offsetof(struct kmem_cache, node) +
1456	nr_node_ids * sizeof(struct kmem_cache_node *),	1431	nr_node_ids * sizeof(struct kmem_cache_node *),
1457	SLAB_HWCACHE_ALIGN);	1432	SLAB_HWCACHE_ALIGN);
1458	list_add(&kmem_cache->list, &slab_caches);	1433	list_add(&kmem_cache->list, &slab_caches);
1459		1434	slab_state = PARTIAL;
1460	/* 2+3) create the kmalloc caches */
1461		1435
1462	/*	1436	/*
1463	* Initialize the caches that provide memory for the array cache and the	1437	* Initialize the caches that provide memory for the kmem_cache_node
1464	* kmem_cache_node structures first. Without this, further allocations will	1438	* structures first. Without this, further allocations will bug.
1465	* bug.
1466	*/	1439	*/
1467		1440	kmalloc_caches[INDEX_NODE] = create_kmalloc_cache("kmalloc-node",
1468	kmalloc_caches[INDEX_AC] = create_kmalloc_cache("kmalloc-ac",
1469	kmalloc_size(INDEX_AC), ARCH_KMALLOC_FLAGS);
1470
1471	if (INDEX_AC != INDEX_NODE)
1472	kmalloc_caches[INDEX_NODE] =
1473	create_kmalloc_cache("kmalloc-node",
1474	kmalloc_size(INDEX_NODE), ARCH_KMALLOC_FLAGS);	1441	kmalloc_size(INDEX_NODE), ARCH_KMALLOC_FLAGS);
		1442	slab_state = PARTIAL_NODE;
1475		1443
1476	slab_early_init = 0;	1444	slab_early_init = 0;
1477		1445
1478	/* 4) Replace the bootstrap head arrays */
1479	{
1480	struct array_cache *ptr;
1481
1482	ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
1483
1484	memcpy(ptr, cpu_cache_get(kmem_cache),
1485	sizeof(struct arraycache_init));
1486
1487	kmem_cache->array[smp_processor_id()] = ptr;
1488
1489	ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
1490
1491	BUG_ON(cpu_cache_get(kmalloc_caches[INDEX_AC])
1492	!= &initarray_generic.cache);
1493	memcpy(ptr, cpu_cache_get(kmalloc_caches[INDEX_AC]),
1494	sizeof(struct arraycache_init));
1495
1496	kmalloc_caches[INDEX_AC]->array[smp_processor_id()] = ptr;
1497	}
1498	/* 5) Replace the bootstrap kmem_cache_node */	1446	/* 5) Replace the bootstrap kmem_cache_node */
1499	{	1447	{
1500	int nid;	1448	int nid;
@@ -1502,13 +1450,8 @@ void __init kmem_cache_init(void)
1502	for_each_online_node(nid) {	1450	for_each_online_node(nid) {
1503	init_list(kmem_cache, &init_kmem_cache_node[CACHE_CACHE + nid], nid);	1451	init_list(kmem_cache, &init_kmem_cache_node[CACHE_CACHE + nid], nid);
1504		1452
1505	init_list(kmalloc_caches[INDEX_AC],	1453	init_list(kmalloc_caches[INDEX_NODE],
1506	&init_kmem_cache_node[SIZE_AC + nid], nid);
1507
1508	if (INDEX_AC != INDEX_NODE) {
1509	init_list(kmalloc_caches[INDEX_NODE],
1510	&init_kmem_cache_node[SIZE_NODE + nid], nid);	1454	&init_kmem_cache_node[SIZE_NODE + nid], nid);
1511	}
1512	}	1455	}
1513	}	1456	}
1514		1457
@@ -2041,56 +1984,53 @@ static size_t calculate_slab_order(struct kmem_cache *cachep,
2041	return left_over;	1984	return left_over;
2042	}	1985	}
2043		1986
		1987	static struct array_cache __percpu *alloc_kmem_cache_cpus(
		1988	struct kmem_cache *cachep, int entries, int batchcount)
		1989	{
		1990	int cpu;
		1991	size_t size;
		1992	struct array_cache __percpu *cpu_cache;
		1993
		1994	size = sizeof(void ) entries + sizeof(struct array_cache);
		1995	cpu_cache = __alloc_percpu(size, 0);
		1996
		1997	if (!cpu_cache)
		1998	return NULL;
		1999
		2000	for_each_possible_cpu(cpu) {
		2001	init_arraycache(per_cpu_ptr(cpu_cache, cpu),
		2002	entries, batchcount);
		2003	}
		2004
		2005	return cpu_cache;
		2006	}
		2007
2044	static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)	2008	static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
2045	{	2009	{
2046	if (slab_state >= FULL)	2010	if (slab_state >= FULL)
2047	return enable_cpucache(cachep, gfp);	2011	return enable_cpucache(cachep, gfp);
2048		2012
		2013	cachep->cpu_cache = alloc_kmem_cache_cpus(cachep, 1, 1);
		2014	if (!cachep->cpu_cache)
		2015	return 1;
		2016
2049	if (slab_state == DOWN) {	2017	if (slab_state == DOWN) {
2050	/*	2018	/* Creation of first cache (kmem_cache). */
2051	* Note: Creation of first cache (kmem_cache).	2019	set_up_node(kmem_cache, CACHE_CACHE);
2052	* The setup_node is taken care
2053	* of by the caller of __kmem_cache_create
2054	*/
2055	cachep->array[smp_processor_id()] = &initarray_generic.cache;
2056	slab_state = PARTIAL;
2057	} else if (slab_state == PARTIAL) {	2020	} else if (slab_state == PARTIAL) {
2058	/*	2021	/* For kmem_cache_node */
2059	* Note: the second kmem_cache_create must create the cache	2022	set_up_node(cachep, SIZE_NODE);
2060	* that's used by kmalloc(24), otherwise the creation of
2061	* further caches will BUG().
2062	*/
2063	cachep->array[smp_processor_id()] = &initarray_generic.cache;
2064
2065	/*
2066	* If the cache that's used by kmalloc(sizeof(kmem_cache_node)) is
2067	* the second cache, then we need to set up all its node/,
2068	* otherwise the creation of further caches will BUG().
2069	*/
2070	set_up_node(cachep, SIZE_AC);
2071	if (INDEX_AC == INDEX_NODE)
2072	slab_state = PARTIAL_NODE;
2073	else
2074	slab_state = PARTIAL_ARRAYCACHE;
2075	} else {	2023	} else {
2076	/* Remaining boot caches */	2024	int node;
2077	cachep->array[smp_processor_id()] =
2078	kmalloc(sizeof(struct arraycache_init), gfp);
2079		2025
2080	if (slab_state == PARTIAL_ARRAYCACHE) {	2026	for_each_online_node(node) {
2081	set_up_node(cachep, SIZE_NODE);	2027	cachep->node[node] = kmalloc_node(
2082	slab_state = PARTIAL_NODE;	2028	sizeof(struct kmem_cache_node), gfp, node);
2083	} else {	2029	BUG_ON(!cachep->node[node]);
2084	int node;	2030	kmem_cache_node_init(cachep->node[node]);
2085	for_each_online_node(node) {
2086	cachep->node[node] =
2087	kmalloc_node(sizeof(struct kmem_cache_node),
2088	gfp, node);
2089	BUG_ON(!cachep->node[node]);
2090	kmem_cache_node_init(cachep->node[node]);
2091	}
2092	}	2031	}
2093	}	2032	}
		2033
2094	cachep->node[numa_mem_id()]->next_reap =	2034	cachep->node[numa_mem_id()]->next_reap =
2095	jiffies + REAPTIMEOUT_NODE +	2035	jiffies + REAPTIMEOUT_NODE +
2096	((unsigned long)cachep) % REAPTIMEOUT_NODE;	2036	((unsigned long)cachep) % REAPTIMEOUT_NODE;
@@ -2213,7 +2153,6 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
2213	else	2153	else
2214	gfp = GFP_NOWAIT;	2154	gfp = GFP_NOWAIT;
2215		2155
2216	setup_node_pointer(cachep);
2217	#if DEBUG	2156	#if DEBUG
2218		2157
2219	/*	2158	/*
@@ -2470,8 +2409,7 @@ int __kmem_cache_shutdown(struct kmem_cache *cachep)
2470	if (rc)	2409	if (rc)
2471	return rc;	2410	return rc;
2472		2411
2473	for_each_online_cpu(i)	2412	free_percpu(cachep->cpu_cache);
2474	kfree(cachep->array[i]);
2475		2413
2476	/* NUMA: free the node structures */	2414	/* NUMA: free the node structures */
2477	for_each_kmem_cache_node(cachep, i, n) {	2415	for_each_kmem_cache_node(cachep, i, n) {
@@ -3719,72 +3657,45 @@ fail:
3719	return -ENOMEM;	3657	return -ENOMEM;
3720	}	3658	}
3721		3659
3722	struct ccupdate_struct {
3723	struct kmem_cache *cachep;
3724	struct array_cache *new[0];
3725	};
3726
3727	static void do_ccupdate_local(void *info)
3728	{
3729	struct ccupdate_struct *new = info;
3730	struct array_cache *old;
3731
3732	check_irq_off();
3733	old = cpu_cache_get(new->cachep);
3734
3735	new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()];
3736	new->new[smp_processor_id()] = old;
3737	}
3738
3739	/* Always called with the slab_mutex held */	3660	/* Always called with the slab_mutex held */
3740	static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,	3661	static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,
3741	int batchcount, int shared, gfp_t gfp)	3662	int batchcount, int shared, gfp_t gfp)
3742	{	3663	{
3743	struct ccupdate_struct *new;	3664	struct array_cache __percpu cpu_cache, prev;
3744	int i;	3665	int cpu;
3745		3666
3746	new = kzalloc(sizeof(new) + nr_cpu_ids sizeof(struct array_cache *),	3667	cpu_cache = alloc_kmem_cache_cpus(cachep, limit, batchcount);
3747	gfp);	3668	if (!cpu_cache)
3748	if (!new)
3749	return -ENOMEM;	3669	return -ENOMEM;
3750		3670
3751	for_each_online_cpu(i) {	3671	prev = cachep->cpu_cache;
3752	new->new[i] = alloc_arraycache(cpu_to_mem(i), limit,	3672	cachep->cpu_cache = cpu_cache;
3753	batchcount, gfp);	3673	kick_all_cpus_sync();
3754	if (!new->new[i]) {
3755	for (i--; i >= 0; i--)
3756	kfree(new->new[i]);
3757	kfree(new);
3758	return -ENOMEM;
3759	}
3760	}
3761	new->cachep = cachep;
3762
3763	on_each_cpu(do_ccupdate_local, (void *)new, 1);
3764		3674
3765	check_irq_on();	3675	check_irq_on();
3766	cachep->batchcount = batchcount;	3676	cachep->batchcount = batchcount;
3767	cachep->limit = limit;	3677	cachep->limit = limit;
3768	cachep->shared = shared;	3678	cachep->shared = shared;
3769		3679
3770	for_each_online_cpu(i) {	3680	if (!prev)
		3681	goto alloc_node;
		3682
		3683	for_each_online_cpu(cpu) {
3771	LIST_HEAD(list);	3684	LIST_HEAD(list);
3772	struct array_cache *ccold = new->new[i];
3773	int node;	3685	int node;
3774	struct kmem_cache_node *n;	3686	struct kmem_cache_node *n;
		3687	struct array_cache *ac = per_cpu_ptr(prev, cpu);
3775		3688
3776	if (!ccold)	3689	node = cpu_to_mem(cpu);
3777	continue;
3778
3779	node = cpu_to_mem(i);
3780	n = get_node(cachep, node);	3690	n = get_node(cachep, node);
3781	spin_lock_irq(&n->list_lock);	3691	spin_lock_irq(&n->list_lock);
3782	free_block(cachep, ccold->entry, ccold->avail, node, &list);	3692	free_block(cachep, ac->entry, ac->avail, node, &list);
3783	spin_unlock_irq(&n->list_lock);	3693	spin_unlock_irq(&n->list_lock);
3784	slabs_destroy(cachep, &list);	3694	slabs_destroy(cachep, &list);
3785	kfree(ccold);
3786	}	3695	}
3787	kfree(new);	3696	free_percpu(prev);
		3697
		3698	alloc_node:
3788	return alloc_kmem_cache_node(cachep, gfp);	3699	return alloc_kmem_cache_node(cachep, gfp);
3789	}	3700	}
3790		3701