2 files changed, 49 insertions, 159 deletions
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 1e14beb23f9b..17ebe0f89bf3 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -69,6 +69,7 @@ struct kmem_cache_order_objects {
 * Slab cache management.
 */
 struct kmem_cache {
+        struct kmem_cache_cpu *cpu_slab;
        /* Used for retriving partial slabs etc */
        unsigned long flags;
        int size;               /* The size of an object including meta data */
@@ -104,11 +105,6 @@ struct kmem_cache {
        int remote_node_defrag_ratio;
        struct kmem_cache_node *node[MAX_NUMNODES];
 #endif
-#ifdef CONFIG_SMP
-        struct kmem_cache_cpu *cpu_slab[NR_CPUS];
-#else
-        struct kmem_cache_cpu cpu_slab;
-#endif
 };
 /*
diff --git a/mm/slub.c b/mm/slub.c
index 8d71aaf888d7..d6c9ecf629d5 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -242,15 +242,6 @@ static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
 #endif
 }
-static inline struct kmem_cache_cpu *get_cpu_slab(struct kmem_cache *s, int cpu)
-{
-#ifdef CONFIG_SMP
-        return s->cpu_slab[cpu];
-#else
-        return &s->cpu_slab;
-#endif
-}
 /* Verify that a pointer has an address that is valid within a slab page */
 static inline int check_valid_pointer(struct kmem_cache *s,
                                struct page *page, const void *object)
@@ -1124,7 +1115,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
                if (!page)
                        return NULL;
-                stat(get_cpu_slab(s, raw_smp_processor_id()), ORDER_FALLBACK);
+                stat(this_cpu_ptr(s->cpu_slab), ORDER_FALLBACK);
        }
        if (kmemcheck_enabled
@@ -1422,7 +1413,7 @@ static struct page *get_partial(struct kmem_cache *s, gfp_t flags, int node)
 static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
 {
        struct kmem_cache_node *n = get_node(s, page_to_nid(page));
-        struct kmem_cache_cpu *c = get_cpu_slab(s, smp_processor_id());
+        struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab);
        __ClearPageSlubFrozen(page);
        if (page->inuse) {
@@ -1454,7 +1445,7 @@ static void unfreeze_slab(struct kmem_cache *s, struct page *page, int tail)
                        slab_unlock(page);
                } else {
                        slab_unlock(page);
-                        stat(get_cpu_slab(s, raw_smp_processor_id()), FREE_SLAB);
+                        stat(__this_cpu_ptr(s->cpu_slab), FREE_SLAB);
                        discard_slab(s, page);
                }
        }
@@ -1507,7 +1498,7 @@ static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
 */
 static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
 {
-        struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
+        struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
        if (likely(c && c->page))
                flush_slab(s, c);
@@ -1673,7 +1664,7 @@ new_slab:
                local_irq_disable();
        if (new) {
-                c = get_cpu_slab(s, smp_processor_id());
+                c = __this_cpu_ptr(s->cpu_slab);
                stat(c, ALLOC_SLAB);
                if (c->page)
                        flush_slab(s, c);
@@ -1711,7 +1702,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
        void **object;
        struct kmem_cache_cpu *c;
        unsigned long flags;
-        unsigned int objsize;
+        unsigned long objsize;
        gfpflags &= gfp_allowed_mask;
@@ -1722,14 +1713,14 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
                return NULL;
        local_irq_save(flags);
-        c = get_cpu_slab(s, smp_processor_id());
+        c = __this_cpu_ptr(s->cpu_slab);
+        object = c->freelist;
        objsize = c->objsize;
-        if (unlikely(!c->freelist || !node_match(c, node)))
+        if (unlikely(!object || !node_match(c, node)))
                object = __slab_alloc(s, gfpflags, node, addr, c);
        else {
-                object = c->freelist;
                c->freelist = object[c->offset];
                stat(c, ALLOC_FASTPATH);
        }
@@ -1800,7 +1791,7 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
        void **object = (void *)x;
        struct kmem_cache_cpu *c;
-        c = get_cpu_slab(s, raw_smp_processor_id());
+        c = __this_cpu_ptr(s->cpu_slab);
        stat(c, FREE_SLOWPATH);
        slab_lock(page);
@@ -1872,7 +1863,7 @@ static __always_inline void slab_free(struct kmem_cache *s,
        kmemleak_free_recursive(x, s->flags);
        local_irq_save(flags);
-        c = get_cpu_slab(s, smp_processor_id());
+        c = __this_cpu_ptr(s->cpu_slab);
        kmemcheck_slab_free(s, object, c->objsize);
        debug_check_no_locks_freed(object, c->objsize);
        if (!(s->flags & SLAB_DEBUG_OBJECTS))
@@ -2095,130 +2086,28 @@ init_kmem_cache_node(struct kmem_cache_node *n, struct kmem_cache *s)
 #endif
 }
-#ifdef CONFIG_SMP
+static DEFINE_PER_CPU(struct kmem_cache_cpu, kmalloc_percpu[SLUB_PAGE_SHIFT]);
-/*
- * Per cpu array for per cpu structures.
- *
- * The per cpu array places all kmem_cache_cpu structures from one processor
- * close together meaning that it becomes possible that multiple per cpu
- * structures are contained in one cacheline. This may be particularly
- * beneficial for the kmalloc caches.
- *
- * A desktop system typically has around 60-80 slabs. With 100 here we are
- * likely able to get per cpu structures for all caches from the array defined
- * here. We must be able to cover all kmalloc caches during bootstrap.
- *
- * If the per cpu array is exhausted then fall back to kmalloc
- * of individual cachelines. No sharing is possible then.
- */
-#define NR_KMEM_CACHE_CPU 100
-static DEFINE_PER_CPU(struct kmem_cache_cpu [NR_KMEM_CACHE_CPU],
-                      kmem_cache_cpu);
-static DEFINE_PER_CPU(struct kmem_cache_cpu *, kmem_cache_cpu_free);
-static DECLARE_BITMAP(kmem_cach_cpu_free_init_once, CONFIG_NR_CPUS);
-static struct kmem_cache_cpu *alloc_kmem_cache_cpu(struct kmem_cache *s,
-                                                        int cpu, gfp_t flags)
-{
-        struct kmem_cache_cpu *c = per_cpu(kmem_cache_cpu_free, cpu);
-        if (c)
-                per_cpu(kmem_cache_cpu_free, cpu) =
-                                (void *)c->freelist;
-        else {
-                /* Table overflow: So allocate ourselves */
-                c = kmalloc_node(
-                        ALIGN(sizeof(struct kmem_cache_cpu), cache_line_size()),
-                        flags, cpu_to_node(cpu));
-                if (!c)
-                        return NULL;
-        }
-        init_kmem_cache_cpu(s, c);
-        return c;
-}
-static void free_kmem_cache_cpu(struct kmem_cache_cpu *c, int cpu)
-{
-        if (c < per_cpu(kmem_cache_cpu, cpu) ||
-                        c >= per_cpu(kmem_cache_cpu, cpu) + NR_KMEM_CACHE_CPU) {
-                kfree(c);
-                return;
-        }
-        c->freelist = (void *)per_cpu(kmem_cache_cpu_free, cpu);
-        per_cpu(kmem_cache_cpu_free, cpu) = c;
-}
-static void free_kmem_cache_cpus(struct kmem_cache *s)
-{
-        int cpu;
-        for_each_online_cpu(cpu) {
-                struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
-                if (c) {
-                        s->cpu_slab[cpu] = NULL;
-                        free_kmem_cache_cpu(c, cpu);
-                }
-        }
-}
-static int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)
-{
-        int cpu;
-        for_each_online_cpu(cpu) {
-                struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
-                if (c)
+static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)
-                        continue;
-                c = alloc_kmem_cache_cpu(s, cpu, flags);
-                if (!c) {
-                        free_kmem_cache_cpus(s);
-                        return 0;
-                }
-                s->cpu_slab[cpu] = c;
-        }
-        return 1;
-}
-/*
- * Initialize the per cpu array.
- */
-static void init_alloc_cpu_cpu(int cpu)
-{
-        int i;
-        if (cpumask_test_cpu(cpu, to_cpumask(kmem_cach_cpu_free_init_once)))
-                return;
-        for (i = NR_KMEM_CACHE_CPU - 1; i >= 0; i--)
-                free_kmem_cache_cpu(&per_cpu(kmem_cache_cpu, cpu)[i], cpu);
-        cpumask_set_cpu(cpu, to_cpumask(kmem_cach_cpu_free_init_once));
-}
-static void __init init_alloc_cpu(void)
 {
        int cpu;
-        for_each_online_cpu(cpu)
+        if (s < kmalloc_caches + SLUB_PAGE_SHIFT && s >= kmalloc_caches)
-                init_alloc_cpu_cpu(cpu);
+                /*
-  }
+                 * Boot time creation of the kmalloc array. Use static per cpu data
+                 * since the per cpu allocator is not available yet.
+                 */
+                s->cpu_slab = per_cpu_var(kmalloc_percpu) + (s - kmalloc_caches);
+        else
+                s->cpu_slab =  alloc_percpu(struct kmem_cache_cpu);
-#else
+        if (!s->cpu_slab)
-static inline void free_kmem_cache_cpus(struct kmem_cache *s) {}
+                return 0;
-static inline void init_alloc_cpu(void) {}
-static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)
+        for_each_possible_cpu(cpu)
-{
+                init_kmem_cache_cpu(s, per_cpu_ptr(s->cpu_slab, cpu));
-        init_kmem_cache_cpu(s, &s->cpu_slab);
        return 1;
 }
-#endif
 #ifdef CONFIG_NUMA
 /*
@@ -2609,9 +2498,8 @@ static inline int kmem_cache_close(struct kmem_cache *s)
        int node;
        flush_all(s);
+        free_percpu(s->cpu_slab);
        /* Attempt to free all objects */
-        free_kmem_cache_cpus(s);
        for_each_node_state(node, N_NORMAL_MEMORY) {
                struct kmem_cache_node *n = get_node(s, node);
@@ -2760,7 +2648,19 @@ static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags)
        realsize = kmalloc_caches[index].objsize;
        text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d",
                         (unsigned int)realsize);
-        s = kmalloc(kmem_size, flags & ~SLUB_DMA);
+        if (flags & __GFP_WAIT)
+                s = kmalloc(kmem_size, flags & ~SLUB_DMA);
+        else {
+                int i;
+                s = NULL;
+                for (i = 0; i < SLUB_PAGE_SHIFT; i++)
+                        if (kmalloc_caches[i].size) {
+                                s = kmalloc_caches + i;
+                                break;
+                        }
+        }
        /*
         * Must defer sysfs creation to a workqueue because we don't know
@@ -3176,8 +3076,6 @@ void __init kmem_cache_init(void)
        int i;
        int caches = 0;
-        init_alloc_cpu();
 #ifdef CONFIG_NUMA
        /*
         * Must first have the slab cache available for the allocations of the
@@ -3261,8 +3159,10 @@ void __init kmem_cache_init(void)
 #ifdef CONFIG_SMP
        register_cpu_notifier(&slab_notifier);
-        kmem_size = offsetof(struct kmem_cache, cpu_slab) +
+#endif
-                                nr_cpu_ids * sizeof(struct kmem_cache_cpu *);
+#ifdef CONFIG_NUMA
+        kmem_size = offsetof(struct kmem_cache, node) +
+                                nr_node_ids * sizeof(struct kmem_cache_node *);
 #else
        kmem_size = sizeof(struct kmem_cache);
 #endif
@@ -3365,7 +3265,7 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size,
                 * per cpu structures
                 */
                for_each_online_cpu(cpu)
-                        get_cpu_slab(s, cpu)->objsize = s->objsize;
+                        per_cpu_ptr(s->cpu_slab, cpu)->objsize = s->objsize;
                s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
                up_write(&slub_lock);
@@ -3422,11 +3322,9 @@ static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb,
        switch (action) {
        case CPU_UP_PREPARE:
        case CPU_UP_PREPARE_FROZEN:
-                init_alloc_cpu_cpu(cpu);
                down_read(&slub_lock);
                list_for_each_entry(s, &slab_caches, list)
-                        s->cpu_slab[cpu] = alloc_kmem_cache_cpu(s, cpu,
+                        init_kmem_cache_cpu(s, per_cpu_ptr(s->cpu_slab, cpu));
-                                                        GFP_KERNEL);
                up_read(&slub_lock);
                break;
@@ -3436,13 +3334,9 @@ static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb,
        case CPU_DEAD_FROZEN:
                down_read(&slub_lock);
                list_for_each_entry(s, &slab_caches, list) {
-                        struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
                        local_irq_save(flags);
                        __flush_cpu_slab(s, cpu);
                        local_irq_restore(flags);
-                        free_kmem_cache_cpu(c, cpu);
-                        s->cpu_slab[cpu] = NULL;
                }
                up_read(&slub_lock);
                break;
@@ -3928,7 +3822,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
                int cpu;
                for_each_possible_cpu(cpu) {
-                        struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
+                        struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
                        if (!c || c->node < 0)
                                continue;
@@ -4353,7 +4247,7 @@ static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
                return -ENOMEM;
        for_each_online_cpu(cpu) {
-                unsigned x = get_cpu_slab(s, cpu)->stat[si];
+                unsigned x = per_cpu_ptr(s->cpu_slab, cpu)->stat[si];
                data[cpu] = x;
                sum += x;
@@ -4376,7 +4270,7 @@ static void clear_stat(struct kmem_cache *s, enum stat_item si)
        int cpu;
        for_each_online_cpu(cpu)
-                get_cpu_slab(s, cpu)->stat[si] = 0;
+                per_cpu_ptr(s->cpu_slab, cpu)->stat[si] = 0;
 }
 #define STAT_ATTR(si, text)                                     \

diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 1e14beb23f9b..17ebe0f89bf3 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h
@@ -69,6 +69,7 @@ struct kmem_cache_order_objects {
69	* Slab cache management.	69	* Slab cache management.
70	*/	70	*/
71	struct kmem_cache {	71	struct kmem_cache {
		72	struct kmem_cache_cpu *cpu_slab;
72	/* Used for retriving partial slabs etc */	73	/* Used for retriving partial slabs etc */
73	unsigned long flags;	74	unsigned long flags;
74	int size; /* The size of an object including meta data */	75	int size; /* The size of an object including meta data */
@@ -104,11 +105,6 @@ struct kmem_cache {
104	int remote_node_defrag_ratio;	105	int remote_node_defrag_ratio;
105	struct kmem_cache_node *node[MAX_NUMNODES];	106	struct kmem_cache_node *node[MAX_NUMNODES];
106	#endif	107	#endif
107	#ifdef CONFIG_SMP
108	struct kmem_cache_cpu *cpu_slab[NR_CPUS];
109	#else
110	struct kmem_cache_cpu cpu_slab;
111	#endif
112	};	108	};
113		109
114	/*	110	/*


diff --git a/mm/slub.c b/mm/slub.c index 8d71aaf888d7..d6c9ecf629d5 100644 --- a/mm/slub.c +++ b/mm/slub.c
@@ -242,15 +242,6 @@ static inline struct kmem_cache_node get_node(struct kmem_cache s, int node)
242	#endif	242	#endif
243	}	243	}
244		244
245	static inline struct kmem_cache_cpu get_cpu_slab(struct kmem_cache s, int cpu)
246	{
247	#ifdef CONFIG_SMP
248	return s->cpu_slab[cpu];
249	#else
250	return &s->cpu_slab;
251	#endif
252	}
253
254	/* Verify that a pointer has an address that is valid within a slab page */	245	/* Verify that a pointer has an address that is valid within a slab page */
255	static inline int check_valid_pointer(struct kmem_cache *s,	246	static inline int check_valid_pointer(struct kmem_cache *s,
256	struct page page, const void object)	247	struct page page, const void object)
@@ -1124,7 +1115,7 @@ static struct page allocate_slab(struct kmem_cache s, gfp_t flags, int node)
1124	if (!page)	1115	if (!page)
1125	return NULL;	1116	return NULL;
1126		1117
1127	stat(get_cpu_slab(s, raw_smp_processor_id()), ORDER_FALLBACK);	1118	stat(this_cpu_ptr(s->cpu_slab), ORDER_FALLBACK);
1128	}	1119	}
1129		1120
1130	if (kmemcheck_enabled	1121	if (kmemcheck_enabled
@@ -1422,7 +1413,7 @@ static struct page get_partial(struct kmem_cache s, gfp_t flags, int node)
1422	static void unfreeze_slab(struct kmem_cache s, struct page page, int tail)	1413	static void unfreeze_slab(struct kmem_cache s, struct page page, int tail)
1423	{	1414	{
1424	struct kmem_cache_node *n = get_node(s, page_to_nid(page));	1415	struct kmem_cache_node *n = get_node(s, page_to_nid(page));
1425	struct kmem_cache_cpu *c = get_cpu_slab(s, smp_processor_id());	1416	struct kmem_cache_cpu *c = this_cpu_ptr(s->cpu_slab);
1426		1417
1427	__ClearPageSlubFrozen(page);	1418	__ClearPageSlubFrozen(page);
1428	if (page->inuse) {	1419	if (page->inuse) {
@@ -1454,7 +1445,7 @@ static void unfreeze_slab(struct kmem_cache s, struct page page, int tail)
1454	slab_unlock(page);	1445	slab_unlock(page);
1455	} else {	1446	} else {
1456	slab_unlock(page);	1447	slab_unlock(page);
1457	stat(get_cpu_slab(s, raw_smp_processor_id()), FREE_SLAB);	1448	stat(__this_cpu_ptr(s->cpu_slab), FREE_SLAB);
1458	discard_slab(s, page);	1449	discard_slab(s, page);
1459	}	1450	}
1460	}	1451	}
@@ -1507,7 +1498,7 @@ static inline void flush_slab(struct kmem_cache s, struct kmem_cache_cpu c)
1507	*/	1498	*/
1508	static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)	1499	static inline void __flush_cpu_slab(struct kmem_cache *s, int cpu)
1509	{	1500	{
1510	struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);	1501	struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
1511		1502
1512	if (likely(c && c->page))	1503	if (likely(c && c->page))
1513	flush_slab(s, c);	1504	flush_slab(s, c);
@@ -1673,7 +1664,7 @@ new_slab:
1673	local_irq_disable();	1664	local_irq_disable();
1674		1665
1675	if (new) {	1666	if (new) {
1676	c = get_cpu_slab(s, smp_processor_id());	1667	c = __this_cpu_ptr(s->cpu_slab);
1677	stat(c, ALLOC_SLAB);	1668	stat(c, ALLOC_SLAB);
1678	if (c->page)	1669	if (c->page)
1679	flush_slab(s, c);	1670	flush_slab(s, c);
@@ -1711,7 +1702,7 @@ static __always_inline void slab_alloc(struct kmem_cache s,
1711	void **object;	1702	void **object;
1712	struct kmem_cache_cpu *c;	1703	struct kmem_cache_cpu *c;
1713	unsigned long flags;	1704	unsigned long flags;
1714	unsigned int objsize;	1705	unsigned long objsize;
1715		1706
1716	gfpflags &= gfp_allowed_mask;	1707	gfpflags &= gfp_allowed_mask;
1717		1708
@@ -1722,14 +1713,14 @@ static __always_inline void slab_alloc(struct kmem_cache s,
1722	return NULL;	1713	return NULL;
1723		1714
1724	local_irq_save(flags);	1715	local_irq_save(flags);
1725	c = get_cpu_slab(s, smp_processor_id());	1716	c = __this_cpu_ptr(s->cpu_slab);
		1717	object = c->freelist;
1726	objsize = c->objsize;	1718	objsize = c->objsize;
1727	if (unlikely(!c->freelist \|\| !node_match(c, node)))	1719	if (unlikely(!object \|\| !node_match(c, node)))
1728		1720
1729	object = __slab_alloc(s, gfpflags, node, addr, c);	1721	object = __slab_alloc(s, gfpflags, node, addr, c);
1730		1722
1731	else {	1723	else {
1732	object = c->freelist;
1733	c->freelist = object[c->offset];	1724	c->freelist = object[c->offset];
1734	stat(c, ALLOC_FASTPATH);	1725	stat(c, ALLOC_FASTPATH);
1735	}	1726	}
@@ -1800,7 +1791,7 @@ static void __slab_free(struct kmem_cache s, struct page page,
1800	void *object = (void )x;	1791	void *object = (void )x;
1801	struct kmem_cache_cpu *c;	1792	struct kmem_cache_cpu *c;
1802		1793
1803	c = get_cpu_slab(s, raw_smp_processor_id());	1794	c = __this_cpu_ptr(s->cpu_slab);
1804	stat(c, FREE_SLOWPATH);	1795	stat(c, FREE_SLOWPATH);
1805	slab_lock(page);	1796	slab_lock(page);
1806		1797
@@ -1872,7 +1863,7 @@ static __always_inline void slab_free(struct kmem_cache *s,
1872		1863
1873	kmemleak_free_recursive(x, s->flags);	1864	kmemleak_free_recursive(x, s->flags);
1874	local_irq_save(flags);	1865	local_irq_save(flags);
1875	c = get_cpu_slab(s, smp_processor_id());	1866	c = __this_cpu_ptr(s->cpu_slab);
1876	kmemcheck_slab_free(s, object, c->objsize);	1867	kmemcheck_slab_free(s, object, c->objsize);
1877	debug_check_no_locks_freed(object, c->objsize);	1868	debug_check_no_locks_freed(object, c->objsize);
1878	if (!(s->flags & SLAB_DEBUG_OBJECTS))	1869	if (!(s->flags & SLAB_DEBUG_OBJECTS))
@@ -2095,130 +2086,28 @@ init_kmem_cache_node(struct kmem_cache_node n, struct kmem_cache s)
2095	#endif	2086	#endif
2096	}	2087	}
2097		2088
2098	#ifdef CONFIG_SMP	2089	static DEFINE_PER_CPU(struct kmem_cache_cpu, kmalloc_percpu[SLUB_PAGE_SHIFT]);
2099	/*
2100	* Per cpu array for per cpu structures.
2101	*
2102	* The per cpu array places all kmem_cache_cpu structures from one processor
2103	* close together meaning that it becomes possible that multiple per cpu
2104	* structures are contained in one cacheline. This may be particularly
2105	* beneficial for the kmalloc caches.
2106	*
2107	* A desktop system typically has around 60-80 slabs. With 100 here we are
2108	* likely able to get per cpu structures for all caches from the array defined
2109	* here. We must be able to cover all kmalloc caches during bootstrap.
2110	*
2111	* If the per cpu array is exhausted then fall back to kmalloc
2112	* of individual cachelines. No sharing is possible then.
2113	*/
2114	#define NR_KMEM_CACHE_CPU 100
2115
2116	static DEFINE_PER_CPU(struct kmem_cache_cpu [NR_KMEM_CACHE_CPU],
2117	kmem_cache_cpu);
2118
2119	static DEFINE_PER_CPU(struct kmem_cache_cpu *, kmem_cache_cpu_free);
2120	static DECLARE_BITMAP(kmem_cach_cpu_free_init_once, CONFIG_NR_CPUS);
2121
2122	static struct kmem_cache_cpu alloc_kmem_cache_cpu(struct kmem_cache s,
2123	int cpu, gfp_t flags)
2124	{
2125	struct kmem_cache_cpu *c = per_cpu(kmem_cache_cpu_free, cpu);
2126
2127	if (c)
2128	per_cpu(kmem_cache_cpu_free, cpu) =
2129	(void *)c->freelist;
2130	else {
2131	/* Table overflow: So allocate ourselves */
2132	c = kmalloc_node(
2133	ALIGN(sizeof(struct kmem_cache_cpu), cache_line_size()),
2134	flags, cpu_to_node(cpu));
2135	if (!c)
2136	return NULL;
2137	}
2138
2139	init_kmem_cache_cpu(s, c);
2140	return c;
2141	}
2142
2143	static void free_kmem_cache_cpu(struct kmem_cache_cpu *c, int cpu)
2144	{
2145	if (c < per_cpu(kmem_cache_cpu, cpu) \|\|
2146	c >= per_cpu(kmem_cache_cpu, cpu) + NR_KMEM_CACHE_CPU) {
2147	kfree(c);
2148	return;
2149	}
2150	c->freelist = (void *)per_cpu(kmem_cache_cpu_free, cpu);
2151	per_cpu(kmem_cache_cpu_free, cpu) = c;
2152	}
2153
2154	static void free_kmem_cache_cpus(struct kmem_cache *s)
2155	{
2156	int cpu;
2157
2158	for_each_online_cpu(cpu) {
2159	struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
2160
2161	if (c) {
2162	s->cpu_slab[cpu] = NULL;
2163	free_kmem_cache_cpu(c, cpu);
2164	}
2165	}
2166	}
2167
2168	static int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)
2169	{
2170	int cpu;
2171
2172	for_each_online_cpu(cpu) {
2173	struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
2174		2090
2175	if (c)	2091	static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)
2176	continue;
2177
2178	c = alloc_kmem_cache_cpu(s, cpu, flags);
2179	if (!c) {
2180	free_kmem_cache_cpus(s);
2181	return 0;
2182	}
2183	s->cpu_slab[cpu] = c;
2184	}
2185	return 1;
2186	}
2187
2188	/*
2189	* Initialize the per cpu array.
2190	*/
2191	static void init_alloc_cpu_cpu(int cpu)
2192	{
2193	int i;
2194
2195	if (cpumask_test_cpu(cpu, to_cpumask(kmem_cach_cpu_free_init_once)))
2196	return;
2197
2198	for (i = NR_KMEM_CACHE_CPU - 1; i >= 0; i--)
2199	free_kmem_cache_cpu(&per_cpu(kmem_cache_cpu, cpu)[i], cpu);
2200
2201	cpumask_set_cpu(cpu, to_cpumask(kmem_cach_cpu_free_init_once));
2202	}
2203
2204	static void __init init_alloc_cpu(void)
2205	{	2092	{
2206	int cpu;	2093	int cpu;
2207		2094
2208	for_each_online_cpu(cpu)	2095	if (s < kmalloc_caches + SLUB_PAGE_SHIFT && s >= kmalloc_caches)
2209	init_alloc_cpu_cpu(cpu);	2096	/*
2210	}	2097	* Boot time creation of the kmalloc array. Use static per cpu data
		2098	* since the per cpu allocator is not available yet.
		2099	*/
		2100	s->cpu_slab = per_cpu_var(kmalloc_percpu) + (s - kmalloc_caches);
		2101	else
		2102	s->cpu_slab = alloc_percpu(struct kmem_cache_cpu);
2211		2103
2212	#else	2104	if (!s->cpu_slab)
2213	static inline void free_kmem_cache_cpus(struct kmem_cache *s) {}	2105	return 0;
2214	static inline void init_alloc_cpu(void) {}
2215		2106
2216	static inline int alloc_kmem_cache_cpus(struct kmem_cache *s, gfp_t flags)	2107	for_each_possible_cpu(cpu)
2217	{	2108	init_kmem_cache_cpu(s, per_cpu_ptr(s->cpu_slab, cpu));
2218	init_kmem_cache_cpu(s, &s->cpu_slab);
2219	return 1;	2109	return 1;
2220	}	2110	}
2221	#endif
2222		2111
2223	#ifdef CONFIG_NUMA	2112	#ifdef CONFIG_NUMA
2224	/*	2113	/*
@@ -2609,9 +2498,8 @@ static inline int kmem_cache_close(struct kmem_cache *s)
2609	int node;	2498	int node;
2610		2499
2611	flush_all(s);	2500	flush_all(s);
2612		2501	free_percpu(s->cpu_slab);
2613	/* Attempt to free all objects */	2502	/* Attempt to free all objects */
2614	free_kmem_cache_cpus(s);
2615	for_each_node_state(node, N_NORMAL_MEMORY) {	2503	for_each_node_state(node, N_NORMAL_MEMORY) {
2616	struct kmem_cache_node *n = get_node(s, node);	2504	struct kmem_cache_node *n = get_node(s, node);
2617		2505
@@ -2760,7 +2648,19 @@ static noinline struct kmem_cache *dma_kmalloc_cache(int index, gfp_t flags)
2760	realsize = kmalloc_caches[index].objsize;	2648	realsize = kmalloc_caches[index].objsize;
2761	text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d",	2649	text = kasprintf(flags & ~SLUB_DMA, "kmalloc_dma-%d",
2762	(unsigned int)realsize);	2650	(unsigned int)realsize);
2763	s = kmalloc(kmem_size, flags & ~SLUB_DMA);	2651
		2652	if (flags & __GFP_WAIT)
		2653	s = kmalloc(kmem_size, flags & ~SLUB_DMA);
		2654	else {
		2655	int i;
		2656
		2657	s = NULL;
		2658	for (i = 0; i < SLUB_PAGE_SHIFT; i++)
		2659	if (kmalloc_caches[i].size) {
		2660	s = kmalloc_caches + i;
		2661	break;
		2662	}
		2663	}
2764		2664
2765	/*	2665	/*
2766	* Must defer sysfs creation to a workqueue because we don't know	2666	* Must defer sysfs creation to a workqueue because we don't know
@@ -3176,8 +3076,6 @@ void __init kmem_cache_init(void)
3176	int i;	3076	int i;
3177	int caches = 0;	3077	int caches = 0;
3178		3078
3179	init_alloc_cpu();
3180
3181	#ifdef CONFIG_NUMA	3079	#ifdef CONFIG_NUMA
3182	/*	3080	/*
3183	* Must first have the slab cache available for the allocations of the	3081	* Must first have the slab cache available for the allocations of the
@@ -3261,8 +3159,10 @@ void __init kmem_cache_init(void)
3261		3159
3262	#ifdef CONFIG_SMP	3160	#ifdef CONFIG_SMP
3263	register_cpu_notifier(&slab_notifier);	3161	register_cpu_notifier(&slab_notifier);
3264	kmem_size = offsetof(struct kmem_cache, cpu_slab) +	3162	#endif
3265	nr_cpu_ids * sizeof(struct kmem_cache_cpu *);	3163	#ifdef CONFIG_NUMA
		3164	kmem_size = offsetof(struct kmem_cache, node) +
		3165	nr_node_ids * sizeof(struct kmem_cache_node *);
3266	#else	3166	#else
3267	kmem_size = sizeof(struct kmem_cache);	3167	kmem_size = sizeof(struct kmem_cache);
3268	#endif	3168	#endif
@@ -3365,7 +3265,7 @@ struct kmem_cache kmem_cache_create(const char name, size_t size,
3365	* per cpu structures	3265	* per cpu structures
3366	*/	3266	*/
3367	for_each_online_cpu(cpu)	3267	for_each_online_cpu(cpu)
3368	get_cpu_slab(s, cpu)->objsize = s->objsize;	3268	per_cpu_ptr(s->cpu_slab, cpu)->objsize = s->objsize;
3369		3269
3370	s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));	3270	s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
3371	up_write(&slub_lock);	3271	up_write(&slub_lock);
@@ -3422,11 +3322,9 @@ static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb,
3422	switch (action) {	3322	switch (action) {
3423	case CPU_UP_PREPARE:	3323	case CPU_UP_PREPARE:
3424	case CPU_UP_PREPARE_FROZEN:	3324	case CPU_UP_PREPARE_FROZEN:
3425	init_alloc_cpu_cpu(cpu);
3426	down_read(&slub_lock);	3325	down_read(&slub_lock);
3427	list_for_each_entry(s, &slab_caches, list)	3326	list_for_each_entry(s, &slab_caches, list)
3428	s->cpu_slab[cpu] = alloc_kmem_cache_cpu(s, cpu,	3327	init_kmem_cache_cpu(s, per_cpu_ptr(s->cpu_slab, cpu));
3429	GFP_KERNEL);
3430	up_read(&slub_lock);	3328	up_read(&slub_lock);
3431	break;	3329	break;
3432		3330
@@ -3436,13 +3334,9 @@ static int __cpuinit slab_cpuup_callback(struct notifier_block *nfb,
3436	case CPU_DEAD_FROZEN:	3334	case CPU_DEAD_FROZEN:
3437	down_read(&slub_lock);	3335	down_read(&slub_lock);
3438	list_for_each_entry(s, &slab_caches, list) {	3336	list_for_each_entry(s, &slab_caches, list) {
3439	struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);
3440
3441	local_irq_save(flags);	3337	local_irq_save(flags);
3442	__flush_cpu_slab(s, cpu);	3338	__flush_cpu_slab(s, cpu);
3443	local_irq_restore(flags);	3339	local_irq_restore(flags);
3444	free_kmem_cache_cpu(c, cpu);
3445	s->cpu_slab[cpu] = NULL;
3446	}	3340	}
3447	up_read(&slub_lock);	3341	up_read(&slub_lock);
3448	break;	3342	break;
@@ -3928,7 +3822,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s,
3928	int cpu;	3822	int cpu;
3929		3823
3930	for_each_possible_cpu(cpu) {	3824	for_each_possible_cpu(cpu) {
3931	struct kmem_cache_cpu *c = get_cpu_slab(s, cpu);	3825	struct kmem_cache_cpu *c = per_cpu_ptr(s->cpu_slab, cpu);
3932		3826
3933	if (!c \|\| c->node < 0)	3827	if (!c \|\| c->node < 0)
3934	continue;	3828	continue;
@@ -4353,7 +4247,7 @@ static int show_stat(struct kmem_cache s, char buf, enum stat_item si)
4353	return -ENOMEM;	4247	return -ENOMEM;
4354		4248
4355	for_each_online_cpu(cpu) {	4249	for_each_online_cpu(cpu) {
4356	unsigned x = get_cpu_slab(s, cpu)->stat[si];	4250	unsigned x = per_cpu_ptr(s->cpu_slab, cpu)->stat[si];
4357		4251
4358	data[cpu] = x;	4252	data[cpu] = x;
4359	sum += x;	4253	sum += x;
@@ -4376,7 +4270,7 @@ static void clear_stat(struct kmem_cache *s, enum stat_item si)
4376	int cpu;	4270	int cpu;
4377		4271
4378	for_each_online_cpu(cpu)	4272	for_each_online_cpu(cpu)
4379	get_cpu_slab(s, cpu)->stat[si] = 0;	4273	per_cpu_ptr(s->cpu_slab, cpu)->stat[si] = 0;
4380	}	4274	}
4381		4275
4382	#define STAT_ATTR(si, text) \	4276	#define STAT_ATTR(si, text) \