aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-05-07 11:42:20 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-05-07 11:42:20 -0400
commit0f47c9423c0fe468d0b5b153f9b9d6e8e20707eb (patch)
tree9eaec7fb4dc5fbfae07d168d0493a0a0a67c7d47
parentb9e306e07ed58fc354bbd58124b281dd7dc697b7 (diff)
parent69df2ac1288b456a95aceadafbf88cd891a577c8 (diff)
Merge branch 'slab/for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/linux
Pull slab changes from Pekka Enberg: "The bulk of the changes are more slab unification from Christoph. There's also few fixes from Aaron, Glauber, and Joonsoo thrown into the mix." * 'slab/for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/linux: (24 commits) mm, slab_common: Fix bootstrap creation of kmalloc caches slab: Return NULL for oversized allocations mm: slab: Verify the nodeid passed to ____cache_alloc_node slub: tid must be retrieved from the percpu area of the current processor slub: Do not dereference NULL pointer in node_match slub: add 'likely' macro to inc_slabs_node() slub: correct to calculate num of acquired objects in get_partial_node() slub: correctly bootstrap boot caches mm/sl[au]b: correct allocation type check in kmalloc_slab() slab: Fixup CONFIG_PAGE_ALLOC/DEBUG_SLAB_LEAK sections slab: Handle ARCH_DMA_MINALIGN correctly slab: Common definition for kmem_cache_node slab: Rename list3/l3 to node slab: Common Kmalloc cache determination stat: Use size_t for sizes instead of unsigned slab: Common function to create the kmalloc array slab: Common definition for the array of kmalloc caches slab: Common constants for kmalloc boundaries slab: Rename nodelists to node slab: Common name for the per node structures ...
-rw-r--r--fs/proc/stat.c2
-rw-r--r--include/linux/kmalloc_sizes.h45
-rw-r--r--include/linux/slab.h231
-rw-r--r--include/linux/slab_def.h54
-rw-r--r--include/linux/slub_def.h136
-rw-r--r--mm/slab.c790
-rw-r--r--mm/slab.h43
-rw-r--r--mm/slab_common.c174
-rw-r--r--mm/slub.c221
9 files changed, 781 insertions, 915 deletions
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index e296572c73ed..1cf86c0e8689 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -184,7 +184,7 @@ static int show_stat(struct seq_file *p, void *v)
184 184
185static int stat_open(struct inode *inode, struct file *file) 185static int stat_open(struct inode *inode, struct file *file)
186{ 186{
187 unsigned size = 1024 + 128 * num_possible_cpus(); 187 size_t size = 1024 + 128 * num_possible_cpus();
188 char *buf; 188 char *buf;
189 struct seq_file *m; 189 struct seq_file *m;
190 int res; 190 int res;
diff --git a/include/linux/kmalloc_sizes.h b/include/linux/kmalloc_sizes.h
deleted file mode 100644
index e576b848ce10..000000000000
--- a/include/linux/kmalloc_sizes.h
+++ /dev/null
@@ -1,45 +0,0 @@
1#if (PAGE_SIZE == 4096)
2 CACHE(32)
3#endif
4 CACHE(64)
5#if L1_CACHE_BYTES < 64
6 CACHE(96)
7#endif
8 CACHE(128)
9#if L1_CACHE_BYTES < 128
10 CACHE(192)
11#endif
12 CACHE(256)
13 CACHE(512)
14 CACHE(1024)
15 CACHE(2048)
16 CACHE(4096)
17 CACHE(8192)
18 CACHE(16384)
19 CACHE(32768)
20 CACHE(65536)
21 CACHE(131072)
22#if KMALLOC_MAX_SIZE >= 262144
23 CACHE(262144)
24#endif
25#if KMALLOC_MAX_SIZE >= 524288
26 CACHE(524288)
27#endif
28#if KMALLOC_MAX_SIZE >= 1048576
29 CACHE(1048576)
30#endif
31#if KMALLOC_MAX_SIZE >= 2097152
32 CACHE(2097152)
33#endif
34#if KMALLOC_MAX_SIZE >= 4194304
35 CACHE(4194304)
36#endif
37#if KMALLOC_MAX_SIZE >= 8388608
38 CACHE(8388608)
39#endif
40#if KMALLOC_MAX_SIZE >= 16777216
41 CACHE(16777216)
42#endif
43#if KMALLOC_MAX_SIZE >= 33554432
44 CACHE(33554432)
45#endif
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 5d168d7e0a28..0c621752caa6 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -94,29 +94,6 @@
94#define ZERO_OR_NULL_PTR(x) ((unsigned long)(x) <= \ 94#define ZERO_OR_NULL_PTR(x) ((unsigned long)(x) <= \
95 (unsigned long)ZERO_SIZE_PTR) 95 (unsigned long)ZERO_SIZE_PTR)
96 96
97/*
98 * Common fields provided in kmem_cache by all slab allocators
99 * This struct is either used directly by the allocator (SLOB)
100 * or the allocator must include definitions for all fields
101 * provided in kmem_cache_common in their definition of kmem_cache.
102 *
103 * Once we can do anonymous structs (C11 standard) we could put a
104 * anonymous struct definition in these allocators so that the
105 * separate allocations in the kmem_cache structure of SLAB and
106 * SLUB is no longer needed.
107 */
108#ifdef CONFIG_SLOB
109struct kmem_cache {
110 unsigned int object_size;/* The original size of the object */
111 unsigned int size; /* The aligned/padded/added on size */
112 unsigned int align; /* Alignment as calculated */
113 unsigned long flags; /* Active flags on the slab */
114 const char *name; /* Slab name for sysfs */
115 int refcount; /* Use counter */
116 void (*ctor)(void *); /* Called on object slot creation */
117 struct list_head list; /* List of all slab caches on the system */
118};
119#endif
120 97
121struct mem_cgroup; 98struct mem_cgroup;
122/* 99/*
@@ -148,7 +125,63 @@ void kmem_cache_free(struct kmem_cache *, void *);
148 (__flags), NULL) 125 (__flags), NULL)
149 126
150/* 127/*
151 * The largest kmalloc size supported by the slab allocators is 128 * Common kmalloc functions provided by all allocators
129 */
130void * __must_check __krealloc(const void *, size_t, gfp_t);
131void * __must_check krealloc(const void *, size_t, gfp_t);
132void kfree(const void *);
133void kzfree(const void *);
134size_t ksize(const void *);
135
136/*
137 * Some archs want to perform DMA into kmalloc caches and need a guaranteed
138 * alignment larger than the alignment of a 64-bit integer.
139 * Setting ARCH_KMALLOC_MINALIGN in arch headers allows that.
140 */
141#if defined(ARCH_DMA_MINALIGN) && ARCH_DMA_MINALIGN > 8
142#define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN
143#define KMALLOC_MIN_SIZE ARCH_DMA_MINALIGN
144#define KMALLOC_SHIFT_LOW ilog2(ARCH_DMA_MINALIGN)
145#else
146#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
147#endif
148
149#ifdef CONFIG_SLOB
150/*
151 * Common fields provided in kmem_cache by all slab allocators
152 * This struct is either used directly by the allocator (SLOB)
153 * or the allocator must include definitions for all fields
154 * provided in kmem_cache_common in their definition of kmem_cache.
155 *
156 * Once we can do anonymous structs (C11 standard) we could put a
157 * anonymous struct definition in these allocators so that the
158 * separate allocations in the kmem_cache structure of SLAB and
159 * SLUB is no longer needed.
160 */
161struct kmem_cache {
162 unsigned int object_size;/* The original size of the object */
163 unsigned int size; /* The aligned/padded/added on size */
164 unsigned int align; /* Alignment as calculated */
165 unsigned long flags; /* Active flags on the slab */
166 const char *name; /* Slab name for sysfs */
167 int refcount; /* Use counter */
168 void (*ctor)(void *); /* Called on object slot creation */
169 struct list_head list; /* List of all slab caches on the system */
170};
171
172#define KMALLOC_MAX_SIZE (1UL << 30)
173
174#include <linux/slob_def.h>
175
176#else /* CONFIG_SLOB */
177
178/*
179 * Kmalloc array related definitions
180 */
181
182#ifdef CONFIG_SLAB
183/*
184 * The largest kmalloc size supported by the SLAB allocators is
152 * 32 megabyte (2^25) or the maximum allocatable page order if that is 185 * 32 megabyte (2^25) or the maximum allocatable page order if that is
153 * less than 32 MB. 186 * less than 32 MB.
154 * 187 *
@@ -158,22 +191,120 @@ void kmem_cache_free(struct kmem_cache *, void *);
158 */ 191 */
159#define KMALLOC_SHIFT_HIGH ((MAX_ORDER + PAGE_SHIFT - 1) <= 25 ? \ 192#define KMALLOC_SHIFT_HIGH ((MAX_ORDER + PAGE_SHIFT - 1) <= 25 ? \
160 (MAX_ORDER + PAGE_SHIFT - 1) : 25) 193 (MAX_ORDER + PAGE_SHIFT - 1) : 25)
194#define KMALLOC_SHIFT_MAX KMALLOC_SHIFT_HIGH
195#ifndef KMALLOC_SHIFT_LOW
196#define KMALLOC_SHIFT_LOW 5
197#endif
198#else
199/*
200 * SLUB allocates up to order 2 pages directly and otherwise
201 * passes the request to the page allocator.
202 */
203#define KMALLOC_SHIFT_HIGH (PAGE_SHIFT + 1)
204#define KMALLOC_SHIFT_MAX (MAX_ORDER + PAGE_SHIFT)
205#ifndef KMALLOC_SHIFT_LOW
206#define KMALLOC_SHIFT_LOW 3
207#endif
208#endif
161 209
162#define KMALLOC_MAX_SIZE (1UL << KMALLOC_SHIFT_HIGH) 210/* Maximum allocatable size */
163#define KMALLOC_MAX_ORDER (KMALLOC_SHIFT_HIGH - PAGE_SHIFT) 211#define KMALLOC_MAX_SIZE (1UL << KMALLOC_SHIFT_MAX)
212/* Maximum size for which we actually use a slab cache */
213#define KMALLOC_MAX_CACHE_SIZE (1UL << KMALLOC_SHIFT_HIGH)
214/* Maximum order allocatable via the slab allocagtor */
215#define KMALLOC_MAX_ORDER (KMALLOC_SHIFT_MAX - PAGE_SHIFT)
164 216
165/* 217/*
166 * Some archs want to perform DMA into kmalloc caches and need a guaranteed 218 * Kmalloc subsystem.
167 * alignment larger than the alignment of a 64-bit integer.
168 * Setting ARCH_KMALLOC_MINALIGN in arch headers allows that.
169 */ 219 */
170#ifdef ARCH_DMA_MINALIGN 220#ifndef KMALLOC_MIN_SIZE
171#define ARCH_KMALLOC_MINALIGN ARCH_DMA_MINALIGN 221#define KMALLOC_MIN_SIZE (1 << KMALLOC_SHIFT_LOW)
222#endif
223
224extern struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1];
225#ifdef CONFIG_ZONE_DMA
226extern struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1];
227#endif
228
229/*
230 * Figure out which kmalloc slab an allocation of a certain size
231 * belongs to.
232 * 0 = zero alloc
233 * 1 = 65 .. 96 bytes
234 * 2 = 120 .. 192 bytes
235 * n = 2^(n-1) .. 2^n -1
236 */
237static __always_inline int kmalloc_index(size_t size)
238{
239 if (!size)
240 return 0;
241
242 if (size <= KMALLOC_MIN_SIZE)
243 return KMALLOC_SHIFT_LOW;
244
245 if (KMALLOC_MIN_SIZE <= 32 && size > 64 && size <= 96)
246 return 1;
247 if (KMALLOC_MIN_SIZE <= 64 && size > 128 && size <= 192)
248 return 2;
249 if (size <= 8) return 3;
250 if (size <= 16) return 4;
251 if (size <= 32) return 5;
252 if (size <= 64) return 6;
253 if (size <= 128) return 7;
254 if (size <= 256) return 8;
255 if (size <= 512) return 9;
256 if (size <= 1024) return 10;
257 if (size <= 2 * 1024) return 11;
258 if (size <= 4 * 1024) return 12;
259 if (size <= 8 * 1024) return 13;
260 if (size <= 16 * 1024) return 14;
261 if (size <= 32 * 1024) return 15;
262 if (size <= 64 * 1024) return 16;
263 if (size <= 128 * 1024) return 17;
264 if (size <= 256 * 1024) return 18;
265 if (size <= 512 * 1024) return 19;
266 if (size <= 1024 * 1024) return 20;
267 if (size <= 2 * 1024 * 1024) return 21;
268 if (size <= 4 * 1024 * 1024) return 22;
269 if (size <= 8 * 1024 * 1024) return 23;
270 if (size <= 16 * 1024 * 1024) return 24;
271 if (size <= 32 * 1024 * 1024) return 25;
272 if (size <= 64 * 1024 * 1024) return 26;
273 BUG();
274
275 /* Will never be reached. Needed because the compiler may complain */
276 return -1;
277}
278
279#ifdef CONFIG_SLAB
280#include <linux/slab_def.h>
281#elif defined(CONFIG_SLUB)
282#include <linux/slub_def.h>
172#else 283#else
173#define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long) 284#error "Unknown slab allocator"
174#endif 285#endif
175 286
176/* 287/*
288 * Determine size used for the nth kmalloc cache.
289 * return size or 0 if a kmalloc cache for that
290 * size does not exist
291 */
292static __always_inline int kmalloc_size(int n)
293{
294 if (n > 2)
295 return 1 << n;
296
297 if (n == 1 && KMALLOC_MIN_SIZE <= 32)
298 return 96;
299
300 if (n == 2 && KMALLOC_MIN_SIZE <= 64)
301 return 192;
302
303 return 0;
304}
305#endif /* !CONFIG_SLOB */
306
307/*
177 * Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment. 308 * Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment.
178 * Intended for arches that get misalignment faults even for 64 bit integer 309 * Intended for arches that get misalignment faults even for 64 bit integer
179 * aligned buffers. 310 * aligned buffers.
@@ -224,42 +355,6 @@ struct seq_file;
224int cache_show(struct kmem_cache *s, struct seq_file *m); 355int cache_show(struct kmem_cache *s, struct seq_file *m);
225void print_slabinfo_header(struct seq_file *m); 356void print_slabinfo_header(struct seq_file *m);
226 357
227/*
228 * Common kmalloc functions provided by all allocators
229 */
230void * __must_check __krealloc(const void *, size_t, gfp_t);
231void * __must_check krealloc(const void *, size_t, gfp_t);
232void kfree(const void *);
233void kzfree(const void *);
234size_t ksize(const void *);
235
236/*
237 * Allocator specific definitions. These are mainly used to establish optimized
238 * ways to convert kmalloc() calls to kmem_cache_alloc() invocations by
239 * selecting the appropriate general cache at compile time.
240 *
241 * Allocators must define at least:
242 *
243 * kmem_cache_alloc()
244 * __kmalloc()
245 * kmalloc()
246 *
247 * Those wishing to support NUMA must also define:
248 *
249 * kmem_cache_alloc_node()
250 * kmalloc_node()
251 *
252 * See each allocator definition file for additional comments and
253 * implementation notes.
254 */
255#ifdef CONFIG_SLUB
256#include <linux/slub_def.h>
257#elif defined(CONFIG_SLOB)
258#include <linux/slob_def.h>
259#else
260#include <linux/slab_def.h>
261#endif
262
263/** 358/**
264 * kmalloc_array - allocate memory for an array. 359 * kmalloc_array - allocate memory for an array.
265 * @n: number of elements. 360 * @n: number of elements.
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index 8bb6e0eaf3c6..cd401580bdd3 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -11,8 +11,6 @@
11 */ 11 */
12 12
13#include <linux/init.h> 13#include <linux/init.h>
14#include <asm/page.h> /* kmalloc_sizes.h needs PAGE_SIZE */
15#include <asm/cache.h> /* kmalloc_sizes.h needs L1_CACHE_BYTES */
16#include <linux/compiler.h> 14#include <linux/compiler.h>
17 15
18/* 16/*
@@ -97,23 +95,13 @@ struct kmem_cache {
97 * pointer for each node since "nodelists" uses the remainder of 95 * pointer for each node since "nodelists" uses the remainder of
98 * available pointers. 96 * available pointers.
99 */ 97 */
100 struct kmem_list3 **nodelists; 98 struct kmem_cache_node **node;
101 struct array_cache *array[NR_CPUS + MAX_NUMNODES]; 99 struct array_cache *array[NR_CPUS + MAX_NUMNODES];
102 /* 100 /*
103 * Do not add fields after array[] 101 * Do not add fields after array[]
104 */ 102 */
105}; 103};
106 104
107/* Size description struct for general caches. */
108struct cache_sizes {
109 size_t cs_size;
110 struct kmem_cache *cs_cachep;
111#ifdef CONFIG_ZONE_DMA
112 struct kmem_cache *cs_dmacachep;
113#endif
114};
115extern struct cache_sizes malloc_sizes[];
116
117void *kmem_cache_alloc(struct kmem_cache *, gfp_t); 105void *kmem_cache_alloc(struct kmem_cache *, gfp_t);
118void *__kmalloc(size_t size, gfp_t flags); 106void *__kmalloc(size_t size, gfp_t flags);
119 107
@@ -133,26 +121,22 @@ static __always_inline void *kmalloc(size_t size, gfp_t flags)
133 void *ret; 121 void *ret;
134 122
135 if (__builtin_constant_p(size)) { 123 if (__builtin_constant_p(size)) {
136 int i = 0; 124 int i;
137 125
138 if (!size) 126 if (!size)
139 return ZERO_SIZE_PTR; 127 return ZERO_SIZE_PTR;
140 128
141#define CACHE(x) \ 129 if (WARN_ON_ONCE(size > KMALLOC_MAX_SIZE))
142 if (size <= x) \ 130 return NULL;
143 goto found; \ 131
144 else \ 132 i = kmalloc_index(size);
145 i++; 133
146#include <linux/kmalloc_sizes.h>
147#undef CACHE
148 return NULL;
149found:
150#ifdef CONFIG_ZONE_DMA 134#ifdef CONFIG_ZONE_DMA
151 if (flags & GFP_DMA) 135 if (flags & GFP_DMA)
152 cachep = malloc_sizes[i].cs_dmacachep; 136 cachep = kmalloc_dma_caches[i];
153 else 137 else
154#endif 138#endif
155 cachep = malloc_sizes[i].cs_cachep; 139 cachep = kmalloc_caches[i];
156 140
157 ret = kmem_cache_alloc_trace(cachep, flags, size); 141 ret = kmem_cache_alloc_trace(cachep, flags, size);
158 142
@@ -186,26 +170,22 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
186 struct kmem_cache *cachep; 170 struct kmem_cache *cachep;
187 171
188 if (__builtin_constant_p(size)) { 172 if (__builtin_constant_p(size)) {
189 int i = 0; 173 int i;
190 174
191 if (!size) 175 if (!size)
192 return ZERO_SIZE_PTR; 176 return ZERO_SIZE_PTR;
193 177
194#define CACHE(x) \ 178 if (WARN_ON_ONCE(size > KMALLOC_MAX_SIZE))
195 if (size <= x) \ 179 return NULL;
196 goto found; \ 180
197 else \ 181 i = kmalloc_index(size);
198 i++; 182
199#include <linux/kmalloc_sizes.h>
200#undef CACHE
201 return NULL;
202found:
203#ifdef CONFIG_ZONE_DMA 183#ifdef CONFIG_ZONE_DMA
204 if (flags & GFP_DMA) 184 if (flags & GFP_DMA)
205 cachep = malloc_sizes[i].cs_dmacachep; 185 cachep = kmalloc_dma_caches[i];
206 else 186 else
207#endif 187#endif
208 cachep = malloc_sizes[i].cs_cachep; 188 cachep = kmalloc_caches[i];
209 189
210 return kmem_cache_alloc_node_trace(cachep, flags, node, size); 190 return kmem_cache_alloc_node_trace(cachep, flags, node, size);
211 } 191 }
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h
index 9db4825cd393..027276fa8713 100644
--- a/include/linux/slub_def.h
+++ b/include/linux/slub_def.h
@@ -53,17 +53,6 @@ struct kmem_cache_cpu {
53#endif 53#endif
54}; 54};
55 55
56struct kmem_cache_node {
57 spinlock_t list_lock; /* Protect partial list and nr_partial */
58 unsigned long nr_partial;
59 struct list_head partial;
60#ifdef CONFIG_SLUB_DEBUG
61 atomic_long_t nr_slabs;
62 atomic_long_t total_objects;
63 struct list_head full;
64#endif
65};
66
67/* 56/*
68 * Word size structure that can be atomically updated or read and that 57 * Word size structure that can be atomically updated or read and that
69 * contains both the order and the number of objects that a slab of the 58 * contains both the order and the number of objects that a slab of the
@@ -115,111 +104,6 @@ struct kmem_cache {
115 struct kmem_cache_node *node[MAX_NUMNODES]; 104 struct kmem_cache_node *node[MAX_NUMNODES];
116}; 105};
117 106
118/*
119 * Kmalloc subsystem.
120 */
121#if defined(ARCH_DMA_MINALIGN) && ARCH_DMA_MINALIGN > 8
122#define KMALLOC_MIN_SIZE ARCH_DMA_MINALIGN
123#else
124#define KMALLOC_MIN_SIZE 8
125#endif
126
127#define KMALLOC_SHIFT_LOW ilog2(KMALLOC_MIN_SIZE)
128
129/*
130 * Maximum kmalloc object size handled by SLUB. Larger object allocations
131 * are passed through to the page allocator. The page allocator "fastpath"
132 * is relatively slow so we need this value sufficiently high so that
133 * performance critical objects are allocated through the SLUB fastpath.
134 *
135 * This should be dropped to PAGE_SIZE / 2 once the page allocator
136 * "fastpath" becomes competitive with the slab allocator fastpaths.
137 */
138#define SLUB_MAX_SIZE (2 * PAGE_SIZE)
139
140#define SLUB_PAGE_SHIFT (PAGE_SHIFT + 2)
141
142#ifdef CONFIG_ZONE_DMA
143#define SLUB_DMA __GFP_DMA
144#else
145/* Disable DMA functionality */
146#define SLUB_DMA (__force gfp_t)0
147#endif
148
149/*
150 * We keep the general caches in an array of slab caches that are used for
151 * 2^x bytes of allocations.
152 */
153extern struct kmem_cache *kmalloc_caches[SLUB_PAGE_SHIFT];
154
155/*
156 * Sorry that the following has to be that ugly but some versions of GCC
157 * have trouble with constant propagation and loops.
158 */
159static __always_inline int kmalloc_index(size_t size)
160{
161 if (!size)
162 return 0;
163
164 if (size <= KMALLOC_MIN_SIZE)
165 return KMALLOC_SHIFT_LOW;
166
167 if (KMALLOC_MIN_SIZE <= 32 && size > 64 && size <= 96)
168 return 1;
169 if (KMALLOC_MIN_SIZE <= 64 && size > 128 && size <= 192)
170 return 2;
171 if (size <= 8) return 3;
172 if (size <= 16) return 4;
173 if (size <= 32) return 5;
174 if (size <= 64) return 6;
175 if (size <= 128) return 7;
176 if (size <= 256) return 8;
177 if (size <= 512) return 9;
178 if (size <= 1024) return 10;
179 if (size <= 2 * 1024) return 11;
180 if (size <= 4 * 1024) return 12;
181/*
182 * The following is only needed to support architectures with a larger page
183 * size than 4k. We need to support 2 * PAGE_SIZE here. So for a 64k page
184 * size we would have to go up to 128k.
185 */
186 if (size <= 8 * 1024) return 13;
187 if (size <= 16 * 1024) return 14;
188 if (size <= 32 * 1024) return 15;
189 if (size <= 64 * 1024) return 16;
190 if (size <= 128 * 1024) return 17;
191 if (size <= 256 * 1024) return 18;
192 if (size <= 512 * 1024) return 19;
193 if (size <= 1024 * 1024) return 20;
194 if (size <= 2 * 1024 * 1024) return 21;
195 BUG();
196 return -1; /* Will never be reached */
197
198/*
199 * What we really wanted to do and cannot do because of compiler issues is:
200 * int i;
201 * for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++)
202 * if (size <= (1 << i))
203 * return i;
204 */
205}
206
207/*
208 * Find the slab cache for a given combination of allocation flags and size.
209 *
210 * This ought to end up with a global pointer to the right cache
211 * in kmalloc_caches.
212 */
213static __always_inline struct kmem_cache *kmalloc_slab(size_t size)
214{
215 int index = kmalloc_index(size);
216
217 if (index == 0)
218 return NULL;
219
220 return kmalloc_caches[index];
221}
222
223void *kmem_cache_alloc(struct kmem_cache *, gfp_t); 107void *kmem_cache_alloc(struct kmem_cache *, gfp_t);
224void *__kmalloc(size_t size, gfp_t flags); 108void *__kmalloc(size_t size, gfp_t flags);
225 109
@@ -274,16 +158,17 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags)
274static __always_inline void *kmalloc(size_t size, gfp_t flags) 158static __always_inline void *kmalloc(size_t size, gfp_t flags)
275{ 159{
276 if (__builtin_constant_p(size)) { 160 if (__builtin_constant_p(size)) {
277 if (size > SLUB_MAX_SIZE) 161 if (size > KMALLOC_MAX_CACHE_SIZE)
278 return kmalloc_large(size, flags); 162 return kmalloc_large(size, flags);
279 163
280 if (!(flags & SLUB_DMA)) { 164 if (!(flags & GFP_DMA)) {
281 struct kmem_cache *s = kmalloc_slab(size); 165 int index = kmalloc_index(size);
282 166
283 if (!s) 167 if (!index)
284 return ZERO_SIZE_PTR; 168 return ZERO_SIZE_PTR;
285 169
286 return kmem_cache_alloc_trace(s, flags, size); 170 return kmem_cache_alloc_trace(kmalloc_caches[index],
171 flags, size);
287 } 172 }
288 } 173 }
289 return __kmalloc(size, flags); 174 return __kmalloc(size, flags);
@@ -310,13 +195,14 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s,
310static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) 195static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
311{ 196{
312 if (__builtin_constant_p(size) && 197 if (__builtin_constant_p(size) &&
313 size <= SLUB_MAX_SIZE && !(flags & SLUB_DMA)) { 198 size <= KMALLOC_MAX_CACHE_SIZE && !(flags & GFP_DMA)) {
314 struct kmem_cache *s = kmalloc_slab(size); 199 int index = kmalloc_index(size);
315 200
316 if (!s) 201 if (!index)
317 return ZERO_SIZE_PTR; 202 return ZERO_SIZE_PTR;
318 203
319 return kmem_cache_alloc_node_trace(s, flags, node, size); 204 return kmem_cache_alloc_node_trace(kmalloc_caches[index],
205 flags, node, size);
320 } 206 }
321 return __kmalloc_node(size, flags, node); 207 return __kmalloc_node(size, flags, node);
322} 208}
diff --git a/mm/slab.c b/mm/slab.c
index 96079244c860..8ccd296c6d9c 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -286,68 +286,27 @@ struct arraycache_init {
286}; 286};
287 287
288/* 288/*
289 * The slab lists for all objects.
290 */
291struct kmem_list3 {
292 struct list_head slabs_partial; /* partial list first, better asm code */
293 struct list_head slabs_full;
294 struct list_head slabs_free;
295 unsigned long free_objects;
296 unsigned int free_limit;
297 unsigned int colour_next; /* Per-node cache coloring */
298 spinlock_t list_lock;
299 struct array_cache *shared; /* shared per node */
300 struct array_cache **alien; /* on other nodes */
301 unsigned long next_reap; /* updated without locking */
302 int free_touched; /* updated without locking */
303};
304
305/*
306 * Need this for bootstrapping a per node allocator. 289 * Need this for bootstrapping a per node allocator.
307 */ 290 */
308#define NUM_INIT_LISTS (3 * MAX_NUMNODES) 291#define NUM_INIT_LISTS (3 * MAX_NUMNODES)
309static struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS]; 292static struct kmem_cache_node __initdata init_kmem_cache_node[NUM_INIT_LISTS];
310#define CACHE_CACHE 0 293#define CACHE_CACHE 0
311#define SIZE_AC MAX_NUMNODES 294#define SIZE_AC MAX_NUMNODES
312#define SIZE_L3 (2 * MAX_NUMNODES) 295#define SIZE_NODE (2 * MAX_NUMNODES)
313 296
314static int drain_freelist(struct kmem_cache *cache, 297static int drain_freelist(struct kmem_cache *cache,
315 struct kmem_list3 *l3, int tofree); 298 struct kmem_cache_node *n, int tofree);
316static void free_block(struct kmem_cache *cachep, void **objpp, int len, 299static void free_block(struct kmem_cache *cachep, void **objpp, int len,
317 int node); 300 int node);
318static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp); 301static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp);
319static void cache_reap(struct work_struct *unused); 302static void cache_reap(struct work_struct *unused);
320 303
321/*
322 * This function must be completely optimized away if a constant is passed to
323 * it. Mostly the same as what is in linux/slab.h except it returns an index.
324 */
325static __always_inline int index_of(const size_t size)
326{
327 extern void __bad_size(void);
328
329 if (__builtin_constant_p(size)) {
330 int i = 0;
331
332#define CACHE(x) \
333 if (size <=x) \
334 return i; \
335 else \
336 i++;
337#include <linux/kmalloc_sizes.h>
338#undef CACHE
339 __bad_size();
340 } else
341 __bad_size();
342 return 0;
343}
344
345static int slab_early_init = 1; 304static int slab_early_init = 1;
346 305
347#define INDEX_AC index_of(sizeof(struct arraycache_init)) 306#define INDEX_AC kmalloc_index(sizeof(struct arraycache_init))
348#define INDEX_L3 index_of(sizeof(struct kmem_list3)) 307#define INDEX_NODE kmalloc_index(sizeof(struct kmem_cache_node))
349 308
350static void kmem_list3_init(struct kmem_list3 *parent) 309static void kmem_cache_node_init(struct kmem_cache_node *parent)
351{ 310{
352 INIT_LIST_HEAD(&parent->slabs_full); 311 INIT_LIST_HEAD(&parent->slabs_full);
353 INIT_LIST_HEAD(&parent->slabs_partial); 312 INIT_LIST_HEAD(&parent->slabs_partial);
@@ -363,7 +322,7 @@ static void kmem_list3_init(struct kmem_list3 *parent)
363#define MAKE_LIST(cachep, listp, slab, nodeid) \ 322#define MAKE_LIST(cachep, listp, slab, nodeid) \
364 do { \ 323 do { \
365 INIT_LIST_HEAD(listp); \ 324 INIT_LIST_HEAD(listp); \
366 list_splice(&(cachep->nodelists[nodeid]->slab), listp); \ 325 list_splice(&(cachep->node[nodeid]->slab), listp); \
367 } while (0) 326 } while (0)
368 327
369#define MAKE_ALL_LISTS(cachep, ptr, nodeid) \ 328#define MAKE_ALL_LISTS(cachep, ptr, nodeid) \
@@ -524,30 +483,6 @@ static inline unsigned int obj_to_index(const struct kmem_cache *cache,
524 return reciprocal_divide(offset, cache->reciprocal_buffer_size); 483 return reciprocal_divide(offset, cache->reciprocal_buffer_size);
525} 484}
526 485
527/*
528 * These are the default caches for kmalloc. Custom caches can have other sizes.
529 */
530struct cache_sizes malloc_sizes[] = {
531#define CACHE(x) { .cs_size = (x) },
532#include <linux/kmalloc_sizes.h>
533 CACHE(ULONG_MAX)
534#undef CACHE
535};
536EXPORT_SYMBOL(malloc_sizes);
537
538/* Must match cache_sizes above. Out of line to keep cache footprint low. */
539struct cache_names {
540 char *name;
541 char *name_dma;
542};
543
544static struct cache_names __initdata cache_names[] = {
545#define CACHE(x) { .name = "size-" #x, .name_dma = "size-" #x "(DMA)" },
546#include <linux/kmalloc_sizes.h>
547 {NULL,}
548#undef CACHE
549};
550
551static struct arraycache_init initarray_generic = 486static struct arraycache_init initarray_generic =
552 { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} }; 487 { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
553 488
@@ -586,15 +521,15 @@ static void slab_set_lock_classes(struct kmem_cache *cachep,
586 int q) 521 int q)
587{ 522{
588 struct array_cache **alc; 523 struct array_cache **alc;
589 struct kmem_list3 *l3; 524 struct kmem_cache_node *n;
590 int r; 525 int r;
591 526
592 l3 = cachep->nodelists[q]; 527 n = cachep->node[q];
593 if (!l3) 528 if (!n)
594 return; 529 return;
595 530
596 lockdep_set_class(&l3->list_lock, l3_key); 531 lockdep_set_class(&n->list_lock, l3_key);
597 alc = l3->alien; 532 alc = n->alien;
598 /* 533 /*
599 * FIXME: This check for BAD_ALIEN_MAGIC 534 * FIXME: This check for BAD_ALIEN_MAGIC
600 * should go away when common slab code is taught to 535 * should go away when common slab code is taught to
@@ -625,28 +560,30 @@ static void slab_set_debugobj_lock_classes(struct kmem_cache *cachep)
625 560
626static void init_node_lock_keys(int q) 561static void init_node_lock_keys(int q)
627{ 562{
628 struct cache_sizes *s = malloc_sizes; 563 int i;
629 564
630 if (slab_state < UP) 565 if (slab_state < UP)
631 return; 566 return;
632 567
633 for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) { 568 for (i = 1; i < PAGE_SHIFT + MAX_ORDER; i++) {
634 struct kmem_list3 *l3; 569 struct kmem_cache_node *n;
570 struct kmem_cache *cache = kmalloc_caches[i];
571
572 if (!cache)
573 continue;
635 574
636 l3 = s->cs_cachep->nodelists[q]; 575 n = cache->node[q];
637 if (!l3 || OFF_SLAB(s->cs_cachep)) 576 if (!n || OFF_SLAB(cache))
638 continue; 577 continue;
639 578
640 slab_set_lock_classes(s->cs_cachep, &on_slab_l3_key, 579 slab_set_lock_classes(cache, &on_slab_l3_key,
641 &on_slab_alc_key, q); 580 &on_slab_alc_key, q);
642 } 581 }
643} 582}
644 583
645static void on_slab_lock_classes_node(struct kmem_cache *cachep, int q) 584static void on_slab_lock_classes_node(struct kmem_cache *cachep, int q)
646{ 585{
647 struct kmem_list3 *l3; 586 if (!cachep->node[q])
648 l3 = cachep->nodelists[q];
649 if (!l3)
650 return; 587 return;
651 588
652 slab_set_lock_classes(cachep, &on_slab_l3_key, 589 slab_set_lock_classes(cachep, &on_slab_l3_key,
@@ -702,41 +639,6 @@ static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
702 return cachep->array[smp_processor_id()]; 639 return cachep->array[smp_processor_id()];
703} 640}
704 641
705static inline struct kmem_cache *__find_general_cachep(size_t size,
706 gfp_t gfpflags)
707{
708 struct cache_sizes *csizep = malloc_sizes;
709
710#if DEBUG
711 /* This happens if someone tries to call
712 * kmem_cache_create(), or __kmalloc(), before
713 * the generic caches are initialized.
714 */
715 BUG_ON(malloc_sizes[INDEX_AC].cs_cachep == NULL);
716#endif
717 if (!size)
718 return ZERO_SIZE_PTR;
719
720 while (size > csizep->cs_size)
721 csizep++;
722
723 /*
724 * Really subtle: The last entry with cs->cs_size==ULONG_MAX
725 * has cs_{dma,}cachep==NULL. Thus no special case
726 * for large kmalloc calls required.
727 */
728#ifdef CONFIG_ZONE_DMA
729 if (unlikely(gfpflags & GFP_DMA))
730 return csizep->cs_dmacachep;
731#endif
732 return csizep->cs_cachep;
733}
734
735static struct kmem_cache *kmem_find_general_cachep(size_t size, gfp_t gfpflags)
736{
737 return __find_general_cachep(size, gfpflags);
738}
739
740static size_t slab_mgmt_size(size_t nr_objs, size_t align) 642static size_t slab_mgmt_size(size_t nr_objs, size_t align)
741{ 643{
742 return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align); 644 return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align);
@@ -938,29 +840,29 @@ static inline bool is_slab_pfmemalloc(struct slab *slabp)
938static void recheck_pfmemalloc_active(struct kmem_cache *cachep, 840static void recheck_pfmemalloc_active(struct kmem_cache *cachep,
939 struct array_cache *ac) 841 struct array_cache *ac)
940{ 842{
941 struct kmem_list3 *l3 = cachep->nodelists[numa_mem_id()]; 843 struct kmem_cache_node *n = cachep->node[numa_mem_id()];
942 struct slab *slabp; 844 struct slab *slabp;
943 unsigned long flags; 845 unsigned long flags;
944 846
945 if (!pfmemalloc_active) 847 if (!pfmemalloc_active)
946 return; 848 return;
947 849
948 spin_lock_irqsave(&l3->list_lock, flags); 850 spin_lock_irqsave(&n->list_lock, flags);
949 list_for_each_entry(slabp, &l3->slabs_full, list) 851 list_for_each_entry(slabp, &n->slabs_full, list)
950 if (is_slab_pfmemalloc(slabp)) 852 if (is_slab_pfmemalloc(slabp))
951 goto out; 853 goto out;
952 854
953 list_for_each_entry(slabp, &l3->slabs_partial, list) 855 list_for_each_entry(slabp, &n->slabs_partial, list)
954 if (is_slab_pfmemalloc(slabp)) 856 if (is_slab_pfmemalloc(slabp))
955 goto out; 857 goto out;
956 858
957 list_for_each_entry(slabp, &l3->slabs_free, list) 859 list_for_each_entry(slabp, &n->slabs_free, list)
958 if (is_slab_pfmemalloc(slabp)) 860 if (is_slab_pfmemalloc(slabp))
959 goto out; 861 goto out;
960 862
961 pfmemalloc_active = false; 863 pfmemalloc_active = false;
962out: 864out:
963 spin_unlock_irqrestore(&l3->list_lock, flags); 865 spin_unlock_irqrestore(&n->list_lock, flags);
964} 866}
965 867
966static void *__ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac, 868static void *__ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac,
@@ -971,7 +873,7 @@ static void *__ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac,
971 873
972 /* Ensure the caller is allowed to use objects from PFMEMALLOC slab */ 874 /* Ensure the caller is allowed to use objects from PFMEMALLOC slab */
973 if (unlikely(is_obj_pfmemalloc(objp))) { 875 if (unlikely(is_obj_pfmemalloc(objp))) {
974 struct kmem_list3 *l3; 876 struct kmem_cache_node *n;
975 877
976 if (gfp_pfmemalloc_allowed(flags)) { 878 if (gfp_pfmemalloc_allowed(flags)) {
977 clear_obj_pfmemalloc(&objp); 879 clear_obj_pfmemalloc(&objp);
@@ -993,8 +895,8 @@ static void *__ac_get_obj(struct kmem_cache *cachep, struct array_cache *ac,
993 * If there are empty slabs on the slabs_free list and we are 895 * If there are empty slabs on the slabs_free list and we are
994 * being forced to refill the cache, mark this one !pfmemalloc. 896 * being forced to refill the cache, mark this one !pfmemalloc.
995 */ 897 */
996 l3 = cachep->nodelists[numa_mem_id()]; 898 n = cachep->node[numa_mem_id()];
997 if (!list_empty(&l3->slabs_free) && force_refill) { 899 if (!list_empty(&n->slabs_free) && force_refill) {
998 struct slab *slabp = virt_to_slab(objp); 900 struct slab *slabp = virt_to_slab(objp);
999 ClearPageSlabPfmemalloc(virt_to_head_page(slabp->s_mem)); 901 ClearPageSlabPfmemalloc(virt_to_head_page(slabp->s_mem));
1000 clear_obj_pfmemalloc(&objp); 902 clear_obj_pfmemalloc(&objp);
@@ -1071,7 +973,7 @@ static int transfer_objects(struct array_cache *to,
1071#ifndef CONFIG_NUMA 973#ifndef CONFIG_NUMA
1072 974
1073#define drain_alien_cache(cachep, alien) do { } while (0) 975#define drain_alien_cache(cachep, alien) do { } while (0)
1074#define reap_alien(cachep, l3) do { } while (0) 976#define reap_alien(cachep, n) do { } while (0)
1075 977
1076static inline struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp) 978static inline struct array_cache **alloc_alien_cache(int node, int limit, gfp_t gfp)
1077{ 979{
@@ -1143,33 +1045,33 @@ static void free_alien_cache(struct array_cache **ac_ptr)
1143static void __drain_alien_cache(struct kmem_cache *cachep, 1045static void __drain_alien_cache(struct kmem_cache *cachep,
1144 struct array_cache *ac, int node) 1046 struct array_cache *ac, int node)
1145{ 1047{
1146 struct kmem_list3 *rl3 = cachep->nodelists[node]; 1048 struct kmem_cache_node *n = cachep->node[node];
1147 1049
1148 if (ac->avail) { 1050 if (ac->avail) {
1149 spin_lock(&rl3->list_lock); 1051 spin_lock(&n->list_lock);
1150 /* 1052 /*
1151 * Stuff objects into the remote nodes shared array first. 1053 * Stuff objects into the remote nodes shared array first.
1152 * That way we could avoid the overhead of putting the objects 1054 * That way we could avoid the overhead of putting the objects
1153 * into the free lists and getting them back later. 1055 * into the free lists and getting them back later.
1154 */ 1056 */
1155 if (rl3->shared) 1057 if (n->shared)
1156 transfer_objects(rl3->shared, ac, ac->limit); 1058 transfer_objects(n->shared, ac, ac->limit);
1157 1059
1158 free_block(cachep, ac->entry, ac->avail, node); 1060 free_block(cachep, ac->entry, ac->avail, node);
1159 ac->avail = 0; 1061 ac->avail = 0;
1160 spin_unlock(&rl3->list_lock); 1062 spin_unlock(&n->list_lock);
1161 } 1063 }
1162} 1064}
1163 1065
1164/* 1066/*
1165 * Called from cache_reap() to regularly drain alien caches round robin. 1067 * Called from cache_reap() to regularly drain alien caches round robin.
1166 */ 1068 */
1167static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3) 1069static void reap_alien(struct kmem_cache *cachep, struct kmem_cache_node *n)
1168{ 1070{
1169 int node = __this_cpu_read(slab_reap_node); 1071 int node = __this_cpu_read(slab_reap_node);
1170 1072
1171 if (l3->alien) { 1073 if (n->alien) {
1172 struct array_cache *ac = l3->alien[node]; 1074 struct array_cache *ac = n->alien[node];
1173 1075
1174 if (ac && ac->avail && spin_trylock_irq(&ac->lock)) { 1076 if (ac && ac->avail && spin_trylock_irq(&ac->lock)) {
1175 __drain_alien_cache(cachep, ac, node); 1077 __drain_alien_cache(cachep, ac, node);
@@ -1199,7 +1101,7 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
1199{ 1101{
1200 struct slab *slabp = virt_to_slab(objp); 1102 struct slab *slabp = virt_to_slab(objp);
1201 int nodeid = slabp->nodeid; 1103 int nodeid = slabp->nodeid;
1202 struct kmem_list3 *l3; 1104 struct kmem_cache_node *n;
1203 struct array_cache *alien = NULL; 1105 struct array_cache *alien = NULL;
1204 int node; 1106 int node;
1205 1107
@@ -1212,10 +1114,10 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
1212 if (likely(slabp->nodeid == node)) 1114 if (likely(slabp->nodeid == node))
1213 return 0; 1115 return 0;
1214 1116
1215 l3 = cachep->nodelists[node]; 1117 n = cachep->node[node];
1216 STATS_INC_NODEFREES(cachep); 1118 STATS_INC_NODEFREES(cachep);
1217 if (l3->alien && l3->alien[nodeid]) { 1119 if (n->alien && n->alien[nodeid]) {
1218 alien = l3->alien[nodeid]; 1120 alien = n->alien[nodeid];
1219 spin_lock(&alien->lock); 1121 spin_lock(&alien->lock);
1220 if (unlikely(alien->avail == alien->limit)) { 1122 if (unlikely(alien->avail == alien->limit)) {
1221 STATS_INC_ACOVERFLOW(cachep); 1123 STATS_INC_ACOVERFLOW(cachep);
@@ -1224,28 +1126,28 @@ static inline int cache_free_alien(struct kmem_cache *cachep, void *objp)
1224 ac_put_obj(cachep, alien, objp); 1126 ac_put_obj(cachep, alien, objp);
1225 spin_unlock(&alien->lock); 1127 spin_unlock(&alien->lock);
1226 } else { 1128 } else {
1227 spin_lock(&(cachep->nodelists[nodeid])->list_lock); 1129 spin_lock(&(cachep->node[nodeid])->list_lock);
1228 free_block(cachep, &objp, 1, nodeid); 1130 free_block(cachep, &objp, 1, nodeid);
1229 spin_unlock(&(cachep->nodelists[nodeid])->list_lock); 1131 spin_unlock(&(cachep->node[nodeid])->list_lock);
1230 } 1132 }
1231 return 1; 1133 return 1;
1232} 1134}
1233#endif 1135#endif
1234 1136
1235/* 1137/*
1236 * Allocates and initializes nodelists for a node on each slab cache, used for 1138 * Allocates and initializes node for a node on each slab cache, used for
1237 * either memory or cpu hotplug. If memory is being hot-added, the kmem_list3 1139 * either memory or cpu hotplug. If memory is being hot-added, the kmem_cache_node
1238 * will be allocated off-node since memory is not yet online for the new node. 1140 * will be allocated off-node since memory is not yet online for the new node.
1239 * When hotplugging memory or a cpu, existing nodelists are not replaced if 1141 * When hotplugging memory or a cpu, existing node are not replaced if
1240 * already in use. 1142 * already in use.
1241 * 1143 *
1242 * Must hold slab_mutex. 1144 * Must hold slab_mutex.
1243 */ 1145 */
1244static int init_cache_nodelists_node(int node) 1146static int init_cache_node_node(int node)
1245{ 1147{
1246 struct kmem_cache *cachep; 1148 struct kmem_cache *cachep;
1247 struct kmem_list3 *l3; 1149 struct kmem_cache_node *n;
1248 const int memsize = sizeof(struct kmem_list3); 1150 const int memsize = sizeof(struct kmem_cache_node);
1249 1151
1250 list_for_each_entry(cachep, &slab_caches, list) { 1152 list_for_each_entry(cachep, &slab_caches, list) {
1251 /* 1153 /*
@@ -1253,12 +1155,12 @@ static int init_cache_nodelists_node(int node)
1253 * begin anything. Make sure some other cpu on this 1155 * begin anything. Make sure some other cpu on this
1254 * node has not already allocated this 1156 * node has not already allocated this
1255 */ 1157 */
1256 if (!cachep->nodelists[node]) { 1158 if (!cachep->node[node]) {
1257 l3 = kmalloc_node(memsize, GFP_KERNEL, node); 1159 n = kmalloc_node(memsize, GFP_KERNEL, node);
1258 if (!l3) 1160 if (!n)
1259 return -ENOMEM; 1161 return -ENOMEM;
1260 kmem_list3_init(l3); 1162 kmem_cache_node_init(n);
1261 l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + 1163 n->next_reap = jiffies + REAPTIMEOUT_LIST3 +
1262 ((unsigned long)cachep) % REAPTIMEOUT_LIST3; 1164 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
1263 1165
1264 /* 1166 /*
@@ -1266,14 +1168,14 @@ static int init_cache_nodelists_node(int node)
1266 * go. slab_mutex is sufficient 1168 * go. slab_mutex is sufficient
1267 * protection here. 1169 * protection here.
1268 */ 1170 */
1269 cachep->nodelists[node] = l3; 1171 cachep->node[node] = n;
1270 } 1172 }
1271 1173
1272 spin_lock_irq(&cachep->nodelists[node]->list_lock); 1174 spin_lock_irq(&cachep->node[node]->list_lock);
1273 cachep->nodelists[node]->free_limit = 1175 cachep->node[node]->free_limit =
1274 (1 + nr_cpus_node(node)) * 1176 (1 + nr_cpus_node(node)) *
1275 cachep->batchcount + cachep->num; 1177 cachep->batchcount + cachep->num;
1276 spin_unlock_irq(&cachep->nodelists[node]->list_lock); 1178 spin_unlock_irq(&cachep->node[node]->list_lock);
1277 } 1179 }
1278 return 0; 1180 return 0;
1279} 1181}
@@ -1281,7 +1183,7 @@ static int init_cache_nodelists_node(int node)
1281static void __cpuinit cpuup_canceled(long cpu) 1183static void __cpuinit cpuup_canceled(long cpu)
1282{ 1184{
1283 struct kmem_cache *cachep; 1185 struct kmem_cache *cachep;
1284 struct kmem_list3 *l3 = NULL; 1186 struct kmem_cache_node *n = NULL;
1285 int node = cpu_to_mem(cpu); 1187 int node = cpu_to_mem(cpu);
1286 const struct cpumask *mask = cpumask_of_node(node); 1188 const struct cpumask *mask = cpumask_of_node(node);
1287 1189
@@ -1293,34 +1195,34 @@ static void __cpuinit cpuup_canceled(long cpu)
1293 /* cpu is dead; no one can alloc from it. */ 1195 /* cpu is dead; no one can alloc from it. */
1294 nc = cachep->array[cpu]; 1196 nc = cachep->array[cpu];
1295 cachep->array[cpu] = NULL; 1197 cachep->array[cpu] = NULL;
1296 l3 = cachep->nodelists[node]; 1198 n = cachep->node[node];
1297 1199
1298 if (!l3) 1200 if (!n)
1299 goto free_array_cache; 1201 goto free_array_cache;
1300 1202
1301 spin_lock_irq(&l3->list_lock); 1203 spin_lock_irq(&n->list_lock);
1302 1204
1303 /* Free limit for this kmem_list3 */ 1205 /* Free limit for this kmem_cache_node */
1304 l3->free_limit -= cachep->batchcount; 1206 n->free_limit -= cachep->batchcount;
1305 if (nc) 1207 if (nc)
1306 free_block(cachep, nc->entry, nc->avail, node); 1208 free_block(cachep, nc->entry, nc->avail, node);
1307 1209
1308 if (!cpumask_empty(mask)) { 1210 if (!cpumask_empty(mask)) {
1309 spin_unlock_irq(&l3->list_lock); 1211 spin_unlock_irq(&n->list_lock);
1310 goto free_array_cache; 1212 goto free_array_cache;
1311 } 1213 }
1312 1214
1313 shared = l3->shared; 1215 shared = n->shared;
1314 if (shared) { 1216 if (shared) {
1315 free_block(cachep, shared->entry, 1217 free_block(cachep, shared->entry,
1316 shared->avail, node); 1218 shared->avail, node);
1317 l3->shared = NULL; 1219 n->shared = NULL;
1318 } 1220 }
1319 1221
1320 alien = l3->alien; 1222 alien = n->alien;
1321 l3->alien = NULL; 1223 n->alien = NULL;
1322 1224
1323 spin_unlock_irq(&l3->list_lock); 1225 spin_unlock_irq(&n->list_lock);
1324 1226
1325 kfree(shared); 1227 kfree(shared);
1326 if (alien) { 1228 if (alien) {
@@ -1336,17 +1238,17 @@ free_array_cache:
1336 * shrink each nodelist to its limit. 1238 * shrink each nodelist to its limit.
1337 */ 1239 */
1338 list_for_each_entry(cachep, &slab_caches, list) { 1240 list_for_each_entry(cachep, &slab_caches, list) {
1339 l3 = cachep->nodelists[node]; 1241 n = cachep->node[node];
1340 if (!l3) 1242 if (!n)
1341 continue; 1243 continue;
1342 drain_freelist(cachep, l3, l3->free_objects); 1244 drain_freelist(cachep, n, n->free_objects);
1343 } 1245 }
1344} 1246}
1345 1247
1346static int __cpuinit cpuup_prepare(long cpu) 1248static int __cpuinit cpuup_prepare(long cpu)
1347{ 1249{
1348 struct kmem_cache *cachep; 1250 struct kmem_cache *cachep;
1349 struct kmem_list3 *l3 = NULL; 1251 struct kmem_cache_node *n = NULL;
1350 int node = cpu_to_mem(cpu); 1252 int node = cpu_to_mem(cpu);
1351 int err; 1253 int err;
1352 1254
@@ -1354,9 +1256,9 @@ static int __cpuinit cpuup_prepare(long cpu)
1354 * We need to do this right in the beginning since 1256 * We need to do this right in the beginning since
1355 * alloc_arraycache's are going to use this list. 1257 * alloc_arraycache's are going to use this list.
1356 * kmalloc_node allows us to add the slab to the right 1258 * kmalloc_node allows us to add the slab to the right
1357 * kmem_list3 and not this cpu's kmem_list3 1259 * kmem_cache_node and not this cpu's kmem_cache_node
1358 */ 1260 */
1359 err = init_cache_nodelists_node(node); 1261 err = init_cache_node_node(node);
1360 if (err < 0) 1262 if (err < 0)
1361 goto bad; 1263 goto bad;
1362 1264
@@ -1391,25 +1293,25 @@ static int __cpuinit cpuup_prepare(long cpu)
1391 } 1293 }
1392 } 1294 }
1393 cachep->array[cpu] = nc; 1295 cachep->array[cpu] = nc;
1394 l3 = cachep->nodelists[node]; 1296 n = cachep->node[node];
1395 BUG_ON(!l3); 1297 BUG_ON(!n);
1396 1298
1397 spin_lock_irq(&l3->list_lock); 1299 spin_lock_irq(&n->list_lock);
1398 if (!l3->shared) { 1300 if (!n->shared) {
1399 /* 1301 /*
1400 * We are serialised from CPU_DEAD or 1302 * We are serialised from CPU_DEAD or
1401 * CPU_UP_CANCELLED by the cpucontrol lock 1303 * CPU_UP_CANCELLED by the cpucontrol lock
1402 */ 1304 */
1403 l3->shared = shared; 1305 n->shared = shared;
1404 shared = NULL; 1306 shared = NULL;
1405 } 1307 }
1406#ifdef CONFIG_NUMA 1308#ifdef CONFIG_NUMA
1407 if (!l3->alien) { 1309 if (!n->alien) {
1408 l3->alien = alien; 1310 n->alien = alien;
1409 alien = NULL; 1311 alien = NULL;
1410 } 1312 }
1411#endif 1313#endif
1412 spin_unlock_irq(&l3->list_lock); 1314 spin_unlock_irq(&n->list_lock);
1413 kfree(shared); 1315 kfree(shared);
1414 free_alien_cache(alien); 1316 free_alien_cache(alien);
1415 if (cachep->flags & SLAB_DEBUG_OBJECTS) 1317 if (cachep->flags & SLAB_DEBUG_OBJECTS)
@@ -1464,9 +1366,9 @@ static int __cpuinit cpuup_callback(struct notifier_block *nfb,
1464 case CPU_DEAD_FROZEN: 1366 case CPU_DEAD_FROZEN:
1465 /* 1367 /*
1466 * Even if all the cpus of a node are down, we don't free the 1368 * Even if all the cpus of a node are down, we don't free the
1467 * kmem_list3 of any cache. This to avoid a race between 1369 * kmem_cache_node of any cache. This to avoid a race between
1468 * cpu_down, and a kmalloc allocation from another cpu for 1370 * cpu_down, and a kmalloc allocation from another cpu for
1469 * memory from the node of the cpu going down. The list3 1371 * memory from the node of the cpu going down. The node
1470 * structure is usually allocated from kmem_cache_create() and 1372 * structure is usually allocated from kmem_cache_create() and
1471 * gets destroyed at kmem_cache_destroy(). 1373 * gets destroyed at kmem_cache_destroy().
1472 */ 1374 */
@@ -1494,22 +1396,22 @@ static struct notifier_block __cpuinitdata cpucache_notifier = {
1494 * 1396 *
1495 * Must hold slab_mutex. 1397 * Must hold slab_mutex.
1496 */ 1398 */
1497static int __meminit drain_cache_nodelists_node(int node) 1399static int __meminit drain_cache_node_node(int node)
1498{ 1400{
1499 struct kmem_cache *cachep; 1401 struct kmem_cache *cachep;
1500 int ret = 0; 1402 int ret = 0;
1501 1403
1502 list_for_each_entry(cachep, &slab_caches, list) { 1404 list_for_each_entry(cachep, &slab_caches, list) {
1503 struct kmem_list3 *l3; 1405 struct kmem_cache_node *n;
1504 1406
1505 l3 = cachep->nodelists[node]; 1407 n = cachep->node[node];
1506 if (!l3) 1408 if (!n)
1507 continue; 1409 continue;
1508 1410
1509 drain_freelist(cachep, l3, l3->free_objects); 1411 drain_freelist(cachep, n, n->free_objects);
1510 1412
1511 if (!list_empty(&l3->slabs_full) || 1413 if (!list_empty(&n->slabs_full) ||
1512 !list_empty(&l3->slabs_partial)) { 1414 !list_empty(&n->slabs_partial)) {
1513 ret = -EBUSY; 1415 ret = -EBUSY;
1514 break; 1416 break;
1515 } 1417 }
@@ -1531,12 +1433,12 @@ static int __meminit slab_memory_callback(struct notifier_block *self,
1531 switch (action) { 1433 switch (action) {
1532 case MEM_GOING_ONLINE: 1434 case MEM_GOING_ONLINE:
1533 mutex_lock(&slab_mutex); 1435 mutex_lock(&slab_mutex);
1534 ret = init_cache_nodelists_node(nid); 1436 ret = init_cache_node_node(nid);
1535 mutex_unlock(&slab_mutex); 1437 mutex_unlock(&slab_mutex);
1536 break; 1438 break;
1537 case MEM_GOING_OFFLINE: 1439 case MEM_GOING_OFFLINE:
1538 mutex_lock(&slab_mutex); 1440 mutex_lock(&slab_mutex);
1539 ret = drain_cache_nodelists_node(nid); 1441 ret = drain_cache_node_node(nid);
1540 mutex_unlock(&slab_mutex); 1442 mutex_unlock(&slab_mutex);
1541 break; 1443 break;
1542 case MEM_ONLINE: 1444 case MEM_ONLINE:
@@ -1551,37 +1453,37 @@ out:
1551#endif /* CONFIG_NUMA && CONFIG_MEMORY_HOTPLUG */ 1453#endif /* CONFIG_NUMA && CONFIG_MEMORY_HOTPLUG */
1552 1454
1553/* 1455/*
1554 * swap the static kmem_list3 with kmalloced memory 1456 * swap the static kmem_cache_node with kmalloced memory
1555 */ 1457 */
1556static void __init init_list(struct kmem_cache *cachep, struct kmem_list3 *list, 1458static void __init init_list(struct kmem_cache *cachep, struct kmem_cache_node *list,
1557 int nodeid) 1459 int nodeid)
1558{ 1460{
1559 struct kmem_list3 *ptr; 1461 struct kmem_cache_node *ptr;
1560 1462
1561 ptr = kmalloc_node(sizeof(struct kmem_list3), GFP_NOWAIT, nodeid); 1463 ptr = kmalloc_node(sizeof(struct kmem_cache_node), GFP_NOWAIT, nodeid);
1562 BUG_ON(!ptr); 1464 BUG_ON(!ptr);
1563 1465
1564 memcpy(ptr, list, sizeof(struct kmem_list3)); 1466 memcpy(ptr, list, sizeof(struct kmem_cache_node));
1565 /* 1467 /*
1566 * Do not assume that spinlocks can be initialized via memcpy: 1468 * Do not assume that spinlocks can be initialized via memcpy:
1567 */ 1469 */
1568 spin_lock_init(&ptr->list_lock); 1470 spin_lock_init(&ptr->list_lock);
1569 1471
1570 MAKE_ALL_LISTS(cachep, ptr, nodeid); 1472 MAKE_ALL_LISTS(cachep, ptr, nodeid);
1571 cachep->nodelists[nodeid] = ptr; 1473 cachep->node[nodeid] = ptr;
1572} 1474}
1573 1475
1574/* 1476/*
1575 * For setting up all the kmem_list3s for cache whose buffer_size is same as 1477 * For setting up all the kmem_cache_node for cache whose buffer_size is same as
1576 * size of kmem_list3. 1478 * size of kmem_cache_node.
1577 */ 1479 */
1578static void __init set_up_list3s(struct kmem_cache *cachep, int index) 1480static void __init set_up_node(struct kmem_cache *cachep, int index)
1579{ 1481{
1580 int node; 1482 int node;
1581 1483
1582 for_each_online_node(node) { 1484 for_each_online_node(node) {
1583 cachep->nodelists[node] = &initkmem_list3[index + node]; 1485 cachep->node[node] = &init_kmem_cache_node[index + node];
1584 cachep->nodelists[node]->next_reap = jiffies + 1486 cachep->node[node]->next_reap = jiffies +
1585 REAPTIMEOUT_LIST3 + 1487 REAPTIMEOUT_LIST3 +
1586 ((unsigned long)cachep) % REAPTIMEOUT_LIST3; 1488 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
1587 } 1489 }
@@ -1589,11 +1491,11 @@ static void __init set_up_list3s(struct kmem_cache *cachep, int index)
1589 1491
1590/* 1492/*
1591 * The memory after the last cpu cache pointer is used for the 1493 * The memory after the last cpu cache pointer is used for the
1592 * the nodelists pointer. 1494 * the node pointer.
1593 */ 1495 */
1594static void setup_nodelists_pointer(struct kmem_cache *cachep) 1496static void setup_node_pointer(struct kmem_cache *cachep)
1595{ 1497{
1596 cachep->nodelists = (struct kmem_list3 **)&cachep->array[nr_cpu_ids]; 1498 cachep->node = (struct kmem_cache_node **)&cachep->array[nr_cpu_ids];
1597} 1499}
1598 1500
1599/* 1501/*
@@ -1602,20 +1504,18 @@ static void setup_nodelists_pointer(struct kmem_cache *cachep)
1602 */ 1504 */
1603void __init kmem_cache_init(void) 1505void __init kmem_cache_init(void)
1604{ 1506{
1605 struct cache_sizes *sizes;
1606 struct cache_names *names;
1607 int i; 1507 int i;
1608 1508
1609 kmem_cache = &kmem_cache_boot; 1509 kmem_cache = &kmem_cache_boot;
1610 setup_nodelists_pointer(kmem_cache); 1510 setup_node_pointer(kmem_cache);
1611 1511
1612 if (num_possible_nodes() == 1) 1512 if (num_possible_nodes() == 1)
1613 use_alien_caches = 0; 1513 use_alien_caches = 0;
1614 1514
1615 for (i = 0; i < NUM_INIT_LISTS; i++) 1515 for (i = 0; i < NUM_INIT_LISTS; i++)
1616 kmem_list3_init(&initkmem_list3[i]); 1516 kmem_cache_node_init(&init_kmem_cache_node[i]);
1617 1517
1618 set_up_list3s(kmem_cache, CACHE_CACHE); 1518 set_up_node(kmem_cache, CACHE_CACHE);
1619 1519
1620 /* 1520 /*
1621 * Fragmentation resistance on low memory - only use bigger 1521 * Fragmentation resistance on low memory - only use bigger
@@ -1631,7 +1531,7 @@ void __init kmem_cache_init(void)
1631 * kmem_cache structures of all caches, except kmem_cache itself: 1531 * kmem_cache structures of all caches, except kmem_cache itself:
1632 * kmem_cache is statically allocated. 1532 * kmem_cache is statically allocated.
1633 * Initially an __init data area is used for the head array and the 1533 * Initially an __init data area is used for the head array and the
1634 * kmem_list3 structures, it's replaced with a kmalloc allocated 1534 * kmem_cache_node structures, it's replaced with a kmalloc allocated
1635 * array at the end of the bootstrap. 1535 * array at the end of the bootstrap.
1636 * 2) Create the first kmalloc cache. 1536 * 2) Create the first kmalloc cache.
1637 * The struct kmem_cache for the new cache is allocated normally. 1537 * The struct kmem_cache for the new cache is allocated normally.
@@ -1640,7 +1540,7 @@ void __init kmem_cache_init(void)
1640 * head arrays. 1540 * head arrays.
1641 * 4) Replace the __init data head arrays for kmem_cache and the first 1541 * 4) Replace the __init data head arrays for kmem_cache and the first
1642 * kmalloc cache with kmalloc allocated arrays. 1542 * kmalloc cache with kmalloc allocated arrays.
1643 * 5) Replace the __init data for kmem_list3 for kmem_cache and 1543 * 5) Replace the __init data for kmem_cache_node for kmem_cache and
1644 * the other cache's with kmalloc allocated memory. 1544 * the other cache's with kmalloc allocated memory.
1645 * 6) Resize the head arrays of the kmalloc caches to their final sizes. 1545 * 6) Resize the head arrays of the kmalloc caches to their final sizes.
1646 */ 1546 */
@@ -1652,50 +1552,28 @@ void __init kmem_cache_init(void)
1652 */ 1552 */
1653 create_boot_cache(kmem_cache, "kmem_cache", 1553 create_boot_cache(kmem_cache, "kmem_cache",
1654 offsetof(struct kmem_cache, array[nr_cpu_ids]) + 1554 offsetof(struct kmem_cache, array[nr_cpu_ids]) +
1655 nr_node_ids * sizeof(struct kmem_list3 *), 1555 nr_node_ids * sizeof(struct kmem_cache_node *),
1656 SLAB_HWCACHE_ALIGN); 1556 SLAB_HWCACHE_ALIGN);
1657 list_add(&kmem_cache->list, &slab_caches); 1557 list_add(&kmem_cache->list, &slab_caches);
1658 1558
1659 /* 2+3) create the kmalloc caches */ 1559 /* 2+3) create the kmalloc caches */
1660 sizes = malloc_sizes;
1661 names = cache_names;
1662 1560
1663 /* 1561 /*
1664 * Initialize the caches that provide memory for the array cache and the 1562 * Initialize the caches that provide memory for the array cache and the
1665 * kmem_list3 structures first. Without this, further allocations will 1563 * kmem_cache_node structures first. Without this, further allocations will
1666 * bug. 1564 * bug.
1667 */ 1565 */
1668 1566
1669 sizes[INDEX_AC].cs_cachep = create_kmalloc_cache(names[INDEX_AC].name, 1567 kmalloc_caches[INDEX_AC] = create_kmalloc_cache("kmalloc-ac",
1670 sizes[INDEX_AC].cs_size, ARCH_KMALLOC_FLAGS); 1568 kmalloc_size(INDEX_AC), ARCH_KMALLOC_FLAGS);
1671 1569
1672 if (INDEX_AC != INDEX_L3) 1570 if (INDEX_AC != INDEX_NODE)
1673 sizes[INDEX_L3].cs_cachep = 1571 kmalloc_caches[INDEX_NODE] =
1674 create_kmalloc_cache(names[INDEX_L3].name, 1572 create_kmalloc_cache("kmalloc-node",
1675 sizes[INDEX_L3].cs_size, ARCH_KMALLOC_FLAGS); 1573 kmalloc_size(INDEX_NODE), ARCH_KMALLOC_FLAGS);
1676 1574
1677 slab_early_init = 0; 1575 slab_early_init = 0;
1678 1576
1679 while (sizes->cs_size != ULONG_MAX) {
1680 /*
1681 * For performance, all the general caches are L1 aligned.
1682 * This should be particularly beneficial on SMP boxes, as it
1683 * eliminates "false sharing".
1684 * Note for systems short on memory removing the alignment will
1685 * allow tighter packing of the smaller caches.
1686 */
1687 if (!sizes->cs_cachep)
1688 sizes->cs_cachep = create_kmalloc_cache(names->name,
1689 sizes->cs_size, ARCH_KMALLOC_FLAGS);
1690
1691#ifdef CONFIG_ZONE_DMA
1692 sizes->cs_dmacachep = create_kmalloc_cache(
1693 names->name_dma, sizes->cs_size,
1694 SLAB_CACHE_DMA|ARCH_KMALLOC_FLAGS);
1695#endif
1696 sizes++;
1697 names++;
1698 }
1699 /* 4) Replace the bootstrap head arrays */ 1577 /* 4) Replace the bootstrap head arrays */
1700 { 1578 {
1701 struct array_cache *ptr; 1579 struct array_cache *ptr;
@@ -1713,36 +1591,35 @@ void __init kmem_cache_init(void)
1713 1591
1714 ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT); 1592 ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
1715 1593
1716 BUG_ON(cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep) 1594 BUG_ON(cpu_cache_get(kmalloc_caches[INDEX_AC])
1717 != &initarray_generic.cache); 1595 != &initarray_generic.cache);
1718 memcpy(ptr, cpu_cache_get(malloc_sizes[INDEX_AC].cs_cachep), 1596 memcpy(ptr, cpu_cache_get(kmalloc_caches[INDEX_AC]),
1719 sizeof(struct arraycache_init)); 1597 sizeof(struct arraycache_init));
1720 /* 1598 /*
1721 * Do not assume that spinlocks can be initialized via memcpy: 1599 * Do not assume that spinlocks can be initialized via memcpy:
1722 */ 1600 */
1723 spin_lock_init(&ptr->lock); 1601 spin_lock_init(&ptr->lock);
1724 1602
1725 malloc_sizes[INDEX_AC].cs_cachep->array[smp_processor_id()] = 1603 kmalloc_caches[INDEX_AC]->array[smp_processor_id()] = ptr;
1726 ptr;
1727 } 1604 }
1728 /* 5) Replace the bootstrap kmem_list3's */ 1605 /* 5) Replace the bootstrap kmem_cache_node */
1729 { 1606 {
1730 int nid; 1607 int nid;
1731 1608
1732 for_each_online_node(nid) { 1609 for_each_online_node(nid) {
1733 init_list(kmem_cache, &initkmem_list3[CACHE_CACHE + nid], nid); 1610 init_list(kmem_cache, &init_kmem_cache_node[CACHE_CACHE + nid], nid);
1734 1611
1735 init_list(malloc_sizes[INDEX_AC].cs_cachep, 1612 init_list(kmalloc_caches[INDEX_AC],
1736 &initkmem_list3[SIZE_AC + nid], nid); 1613 &init_kmem_cache_node[SIZE_AC + nid], nid);
1737 1614
1738 if (INDEX_AC != INDEX_L3) { 1615 if (INDEX_AC != INDEX_NODE) {
1739 init_list(malloc_sizes[INDEX_L3].cs_cachep, 1616 init_list(kmalloc_caches[INDEX_NODE],
1740 &initkmem_list3[SIZE_L3 + nid], nid); 1617 &init_kmem_cache_node[SIZE_NODE + nid], nid);
1741 } 1618 }
1742 } 1619 }
1743 } 1620 }
1744 1621
1745 slab_state = UP; 1622 create_kmalloc_caches(ARCH_KMALLOC_FLAGS);
1746} 1623}
1747 1624
1748void __init kmem_cache_init_late(void) 1625void __init kmem_cache_init_late(void)
@@ -1773,7 +1650,7 @@ void __init kmem_cache_init_late(void)
1773#ifdef CONFIG_NUMA 1650#ifdef CONFIG_NUMA
1774 /* 1651 /*
1775 * Register a memory hotplug callback that initializes and frees 1652 * Register a memory hotplug callback that initializes and frees
1776 * nodelists. 1653 * node.
1777 */ 1654 */
1778 hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI); 1655 hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
1779#endif 1656#endif
@@ -1803,7 +1680,7 @@ __initcall(cpucache_init);
1803static noinline void 1680static noinline void
1804slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid) 1681slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
1805{ 1682{
1806 struct kmem_list3 *l3; 1683 struct kmem_cache_node *n;
1807 struct slab *slabp; 1684 struct slab *slabp;
1808 unsigned long flags; 1685 unsigned long flags;
1809 int node; 1686 int node;
@@ -1818,24 +1695,24 @@ slab_out_of_memory(struct kmem_cache *cachep, gfp_t gfpflags, int nodeid)
1818 unsigned long active_objs = 0, num_objs = 0, free_objects = 0; 1695 unsigned long active_objs = 0, num_objs = 0, free_objects = 0;
1819 unsigned long active_slabs = 0, num_slabs = 0; 1696 unsigned long active_slabs = 0, num_slabs = 0;
1820 1697
1821 l3 = cachep->nodelists[node]; 1698 n = cachep->node[node];
1822 if (!l3) 1699 if (!n)
1823 continue; 1700 continue;
1824 1701
1825 spin_lock_irqsave(&l3->list_lock, flags); 1702 spin_lock_irqsave(&n->list_lock, flags);
1826 list_for_each_entry(slabp, &l3->slabs_full, list) { 1703 list_for_each_entry(slabp, &n->slabs_full, list) {
1827 active_objs += cachep->num; 1704 active_objs += cachep->num;
1828 active_slabs++; 1705 active_slabs++;
1829 } 1706 }
1830 list_for_each_entry(slabp, &l3->slabs_partial, list) { 1707 list_for_each_entry(slabp, &n->slabs_partial, list) {
1831 active_objs += slabp->inuse; 1708 active_objs += slabp->inuse;
1832 active_slabs++; 1709 active_slabs++;
1833 } 1710 }
1834 list_for_each_entry(slabp, &l3->slabs_free, list) 1711 list_for_each_entry(slabp, &n->slabs_free, list)
1835 num_slabs++; 1712 num_slabs++;
1836 1713
1837 free_objects += l3->free_objects; 1714 free_objects += n->free_objects;
1838 spin_unlock_irqrestore(&l3->list_lock, flags); 1715 spin_unlock_irqrestore(&n->list_lock, flags);
1839 1716
1840 num_slabs += active_slabs; 1717 num_slabs += active_slabs;
1841 num_objs = num_slabs * cachep->num; 1718 num_objs = num_slabs * cachep->num;
@@ -2258,7 +2135,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
2258 if (slab_state == DOWN) { 2135 if (slab_state == DOWN) {
2259 /* 2136 /*
2260 * Note: Creation of first cache (kmem_cache). 2137 * Note: Creation of first cache (kmem_cache).
2261 * The setup_list3s is taken care 2138 * The setup_node is taken care
2262 * of by the caller of __kmem_cache_create 2139 * of by the caller of __kmem_cache_create
2263 */ 2140 */
2264 cachep->array[smp_processor_id()] = &initarray_generic.cache; 2141 cachep->array[smp_processor_id()] = &initarray_generic.cache;
@@ -2272,13 +2149,13 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
2272 cachep->array[smp_processor_id()] = &initarray_generic.cache; 2149 cachep->array[smp_processor_id()] = &initarray_generic.cache;
2273 2150
2274 /* 2151 /*
2275 * If the cache that's used by kmalloc(sizeof(kmem_list3)) is 2152 * If the cache that's used by kmalloc(sizeof(kmem_cache_node)) is
2276 * the second cache, then we need to set up all its list3s, 2153 * the second cache, then we need to set up all its node/,
2277 * otherwise the creation of further caches will BUG(). 2154 * otherwise the creation of further caches will BUG().
2278 */ 2155 */
2279 set_up_list3s(cachep, SIZE_AC); 2156 set_up_node(cachep, SIZE_AC);
2280 if (INDEX_AC == INDEX_L3) 2157 if (INDEX_AC == INDEX_NODE)
2281 slab_state = PARTIAL_L3; 2158 slab_state = PARTIAL_NODE;
2282 else 2159 else
2283 slab_state = PARTIAL_ARRAYCACHE; 2160 slab_state = PARTIAL_ARRAYCACHE;
2284 } else { 2161 } else {
@@ -2287,20 +2164,20 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
2287 kmalloc(sizeof(struct arraycache_init), gfp); 2164 kmalloc(sizeof(struct arraycache_init), gfp);
2288 2165
2289 if (slab_state == PARTIAL_ARRAYCACHE) { 2166 if (slab_state == PARTIAL_ARRAYCACHE) {
2290 set_up_list3s(cachep, SIZE_L3); 2167 set_up_node(cachep, SIZE_NODE);
2291 slab_state = PARTIAL_L3; 2168 slab_state = PARTIAL_NODE;
2292 } else { 2169 } else {
2293 int node; 2170 int node;
2294 for_each_online_node(node) { 2171 for_each_online_node(node) {
2295 cachep->nodelists[node] = 2172 cachep->node[node] =
2296 kmalloc_node(sizeof(struct kmem_list3), 2173 kmalloc_node(sizeof(struct kmem_cache_node),
2297 gfp, node); 2174 gfp, node);
2298 BUG_ON(!cachep->nodelists[node]); 2175 BUG_ON(!cachep->node[node]);
2299 kmem_list3_init(cachep->nodelists[node]); 2176 kmem_cache_node_init(cachep->node[node]);
2300 } 2177 }
2301 } 2178 }
2302 } 2179 }
2303 cachep->nodelists[numa_mem_id()]->next_reap = 2180 cachep->node[numa_mem_id()]->next_reap =
2304 jiffies + REAPTIMEOUT_LIST3 + 2181 jiffies + REAPTIMEOUT_LIST3 +
2305 ((unsigned long)cachep) % REAPTIMEOUT_LIST3; 2182 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
2306 2183
@@ -2403,7 +2280,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
2403 else 2280 else
2404 gfp = GFP_NOWAIT; 2281 gfp = GFP_NOWAIT;
2405 2282
2406 setup_nodelists_pointer(cachep); 2283 setup_node_pointer(cachep);
2407#if DEBUG 2284#if DEBUG
2408 2285
2409 /* 2286 /*
@@ -2426,7 +2303,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
2426 size += BYTES_PER_WORD; 2303 size += BYTES_PER_WORD;
2427 } 2304 }
2428#if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC) 2305#if FORCED_DEBUG && defined(CONFIG_DEBUG_PAGEALLOC)
2429 if (size >= malloc_sizes[INDEX_L3 + 1].cs_size 2306 if (size >= kmalloc_size(INDEX_NODE + 1)
2430 && cachep->object_size > cache_line_size() 2307 && cachep->object_size > cache_line_size()
2431 && ALIGN(size, cachep->align) < PAGE_SIZE) { 2308 && ALIGN(size, cachep->align) < PAGE_SIZE) {
2432 cachep->obj_offset += PAGE_SIZE - ALIGN(size, cachep->align); 2309 cachep->obj_offset += PAGE_SIZE - ALIGN(size, cachep->align);
@@ -2497,7 +2374,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
2497 cachep->reciprocal_buffer_size = reciprocal_value(size); 2374 cachep->reciprocal_buffer_size = reciprocal_value(size);
2498 2375
2499 if (flags & CFLGS_OFF_SLAB) { 2376 if (flags & CFLGS_OFF_SLAB) {
2500 cachep->slabp_cache = kmem_find_general_cachep(slab_size, 0u); 2377 cachep->slabp_cache = kmalloc_slab(slab_size, 0u);
2501 /* 2378 /*
2502 * This is a possibility for one of the malloc_sizes caches. 2379 * This is a possibility for one of the malloc_sizes caches.
2503 * But since we go off slab only for object size greater than 2380 * But since we go off slab only for object size greater than
@@ -2543,7 +2420,7 @@ static void check_spinlock_acquired(struct kmem_cache *cachep)
2543{ 2420{
2544#ifdef CONFIG_SMP 2421#ifdef CONFIG_SMP
2545 check_irq_off(); 2422 check_irq_off();
2546 assert_spin_locked(&cachep->nodelists[numa_mem_id()]->list_lock); 2423 assert_spin_locked(&cachep->node[numa_mem_id()]->list_lock);
2547#endif 2424#endif
2548} 2425}
2549 2426
@@ -2551,7 +2428,7 @@ static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node)
2551{ 2428{
2552#ifdef CONFIG_SMP 2429#ifdef CONFIG_SMP
2553 check_irq_off(); 2430 check_irq_off();
2554 assert_spin_locked(&cachep->nodelists[node]->list_lock); 2431 assert_spin_locked(&cachep->node[node]->list_lock);
2555#endif 2432#endif
2556} 2433}
2557 2434
@@ -2562,7 +2439,7 @@ static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node)
2562#define check_spinlock_acquired_node(x, y) do { } while(0) 2439#define check_spinlock_acquired_node(x, y) do { } while(0)
2563#endif 2440#endif
2564 2441
2565static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, 2442static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n,
2566 struct array_cache *ac, 2443 struct array_cache *ac,
2567 int force, int node); 2444 int force, int node);
2568 2445
@@ -2574,29 +2451,29 @@ static void do_drain(void *arg)
2574 2451
2575 check_irq_off(); 2452 check_irq_off();
2576 ac = cpu_cache_get(cachep); 2453 ac = cpu_cache_get(cachep);
2577 spin_lock(&cachep->nodelists[node]->list_lock); 2454 spin_lock(&cachep->node[node]->list_lock);
2578 free_block(cachep, ac->entry, ac->avail, node); 2455 free_block(cachep, ac->entry, ac->avail, node);
2579 spin_unlock(&cachep->nodelists[node]->list_lock); 2456 spin_unlock(&cachep->node[node]->list_lock);
2580 ac->avail = 0; 2457 ac->avail = 0;
2581} 2458}
2582 2459
2583static void drain_cpu_caches(struct kmem_cache *cachep) 2460static void drain_cpu_caches(struct kmem_cache *cachep)
2584{ 2461{
2585 struct kmem_list3 *l3; 2462 struct kmem_cache_node *n;
2586 int node; 2463 int node;
2587 2464
2588 on_each_cpu(do_drain, cachep, 1); 2465 on_each_cpu(do_drain, cachep, 1);
2589 check_irq_on(); 2466 check_irq_on();
2590 for_each_online_node(node) { 2467 for_each_online_node(node) {
2591 l3 = cachep->nodelists[node]; 2468 n = cachep->node[node];
2592 if (l3 && l3->alien) 2469 if (n && n->alien)
2593 drain_alien_cache(cachep, l3->alien); 2470 drain_alien_cache(cachep, n->alien);
2594 } 2471 }
2595 2472
2596 for_each_online_node(node) { 2473 for_each_online_node(node) {
2597 l3 = cachep->nodelists[node]; 2474 n = cachep->node[node];
2598 if (l3) 2475 if (n)
2599 drain_array(cachep, l3, l3->shared, 1, node); 2476 drain_array(cachep, n, n->shared, 1, node);
2600 } 2477 }
2601} 2478}
2602 2479
@@ -2607,19 +2484,19 @@ static void drain_cpu_caches(struct kmem_cache *cachep)
2607 * Returns the actual number of slabs released. 2484 * Returns the actual number of slabs released.
2608 */ 2485 */
2609static int drain_freelist(struct kmem_cache *cache, 2486static int drain_freelist(struct kmem_cache *cache,
2610 struct kmem_list3 *l3, int tofree) 2487 struct kmem_cache_node *n, int tofree)
2611{ 2488{
2612 struct list_head *p; 2489 struct list_head *p;
2613 int nr_freed; 2490 int nr_freed;
2614 struct slab *slabp; 2491 struct slab *slabp;
2615 2492
2616 nr_freed = 0; 2493 nr_freed = 0;
2617 while (nr_freed < tofree && !list_empty(&l3->slabs_free)) { 2494 while (nr_freed < tofree && !list_empty(&n->slabs_free)) {
2618 2495
2619 spin_lock_irq(&l3->list_lock); 2496 spin_lock_irq(&n->list_lock);
2620 p = l3->slabs_free.prev; 2497 p = n->slabs_free.prev;
2621 if (p == &l3->slabs_free) { 2498 if (p == &n->slabs_free) {
2622 spin_unlock_irq(&l3->list_lock); 2499 spin_unlock_irq(&n->list_lock);
2623 goto out; 2500 goto out;
2624 } 2501 }
2625 2502
@@ -2632,8 +2509,8 @@ static int drain_freelist(struct kmem_cache *cache,
2632 * Safe to drop the lock. The slab is no longer linked 2509 * Safe to drop the lock. The slab is no longer linked
2633 * to the cache. 2510 * to the cache.
2634 */ 2511 */
2635 l3->free_objects -= cache->num; 2512 n->free_objects -= cache->num;
2636 spin_unlock_irq(&l3->list_lock); 2513 spin_unlock_irq(&n->list_lock);
2637 slab_destroy(cache, slabp); 2514 slab_destroy(cache, slabp);
2638 nr_freed++; 2515 nr_freed++;
2639 } 2516 }
@@ -2645,20 +2522,20 @@ out:
2645static int __cache_shrink(struct kmem_cache *cachep) 2522static int __cache_shrink(struct kmem_cache *cachep)
2646{ 2523{
2647 int ret = 0, i = 0; 2524 int ret = 0, i = 0;
2648 struct kmem_list3 *l3; 2525 struct kmem_cache_node *n;
2649 2526
2650 drain_cpu_caches(cachep); 2527 drain_cpu_caches(cachep);
2651 2528
2652 check_irq_on(); 2529 check_irq_on();
2653 for_each_online_node(i) { 2530 for_each_online_node(i) {
2654 l3 = cachep->nodelists[i]; 2531 n = cachep->node[i];
2655 if (!l3) 2532 if (!n)
2656 continue; 2533 continue;
2657 2534
2658 drain_freelist(cachep, l3, l3->free_objects); 2535 drain_freelist(cachep, n, n->free_objects);
2659 2536
2660 ret += !list_empty(&l3->slabs_full) || 2537 ret += !list_empty(&n->slabs_full) ||
2661 !list_empty(&l3->slabs_partial); 2538 !list_empty(&n->slabs_partial);
2662 } 2539 }
2663 return (ret ? 1 : 0); 2540 return (ret ? 1 : 0);
2664} 2541}
@@ -2687,7 +2564,7 @@ EXPORT_SYMBOL(kmem_cache_shrink);
2687int __kmem_cache_shutdown(struct kmem_cache *cachep) 2564int __kmem_cache_shutdown(struct kmem_cache *cachep)
2688{ 2565{
2689 int i; 2566 int i;
2690 struct kmem_list3 *l3; 2567 struct kmem_cache_node *n;
2691 int rc = __cache_shrink(cachep); 2568 int rc = __cache_shrink(cachep);
2692 2569
2693 if (rc) 2570 if (rc)
@@ -2696,13 +2573,13 @@ int __kmem_cache_shutdown(struct kmem_cache *cachep)
2696 for_each_online_cpu(i) 2573 for_each_online_cpu(i)
2697 kfree(cachep->array[i]); 2574 kfree(cachep->array[i]);
2698 2575
2699 /* NUMA: free the list3 structures */ 2576 /* NUMA: free the node structures */
2700 for_each_online_node(i) { 2577 for_each_online_node(i) {
2701 l3 = cachep->nodelists[i]; 2578 n = cachep->node[i];
2702 if (l3) { 2579 if (n) {
2703 kfree(l3->shared); 2580 kfree(n->shared);
2704 free_alien_cache(l3->alien); 2581 free_alien_cache(n->alien);
2705 kfree(l3); 2582 kfree(n);
2706 } 2583 }
2707 } 2584 }
2708 return 0; 2585 return 0;
@@ -2884,7 +2761,7 @@ static int cache_grow(struct kmem_cache *cachep,
2884 struct slab *slabp; 2761 struct slab *slabp;
2885 size_t offset; 2762 size_t offset;
2886 gfp_t local_flags; 2763 gfp_t local_flags;
2887 struct kmem_list3 *l3; 2764 struct kmem_cache_node *n;
2888 2765
2889 /* 2766 /*
2890 * Be lazy and only check for valid flags here, keeping it out of the 2767 * Be lazy and only check for valid flags here, keeping it out of the
@@ -2893,17 +2770,17 @@ static int cache_grow(struct kmem_cache *cachep,
2893 BUG_ON(flags & GFP_SLAB_BUG_MASK); 2770 BUG_ON(flags & GFP_SLAB_BUG_MASK);
2894 local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK); 2771 local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
2895 2772
2896 /* Take the l3 list lock to change the colour_next on this node */ 2773 /* Take the node list lock to change the colour_next on this node */
2897 check_irq_off(); 2774 check_irq_off();
2898 l3 = cachep->nodelists[nodeid]; 2775 n = cachep->node[nodeid];
2899 spin_lock(&l3->list_lock); 2776 spin_lock(&n->list_lock);
2900 2777
2901 /* Get colour for the slab, and cal the next value. */ 2778 /* Get colour for the slab, and cal the next value. */
2902 offset = l3->colour_next; 2779 offset = n->colour_next;
2903 l3->colour_next++; 2780 n->colour_next++;
2904 if (l3->colour_next >= cachep->colour) 2781 if (n->colour_next >= cachep->colour)
2905 l3->colour_next = 0; 2782 n->colour_next = 0;
2906 spin_unlock(&l3->list_lock); 2783 spin_unlock(&n->list_lock);
2907 2784
2908 offset *= cachep->colour_off; 2785 offset *= cachep->colour_off;
2909 2786
@@ -2940,13 +2817,13 @@ static int cache_grow(struct kmem_cache *cachep,
2940 if (local_flags & __GFP_WAIT) 2817 if (local_flags & __GFP_WAIT)
2941 local_irq_disable(); 2818 local_irq_disable();
2942 check_irq_off(); 2819 check_irq_off();
2943 spin_lock(&l3->list_lock); 2820 spin_lock(&n->list_lock);
2944 2821
2945 /* Make slab active. */ 2822 /* Make slab active. */
2946 list_add_tail(&slabp->list, &(l3->slabs_free)); 2823 list_add_tail(&slabp->list, &(n->slabs_free));
2947 STATS_INC_GROWN(cachep); 2824 STATS_INC_GROWN(cachep);
2948 l3->free_objects += cachep->num; 2825 n->free_objects += cachep->num;
2949 spin_unlock(&l3->list_lock); 2826 spin_unlock(&n->list_lock);
2950 return 1; 2827 return 1;
2951opps1: 2828opps1:
2952 kmem_freepages(cachep, objp); 2829 kmem_freepages(cachep, objp);
@@ -3074,7 +2951,7 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags,
3074 bool force_refill) 2951 bool force_refill)
3075{ 2952{
3076 int batchcount; 2953 int batchcount;
3077 struct kmem_list3 *l3; 2954 struct kmem_cache_node *n;
3078 struct array_cache *ac; 2955 struct array_cache *ac;
3079 int node; 2956 int node;
3080 2957
@@ -3093,14 +2970,14 @@ retry:
3093 */ 2970 */
3094 batchcount = BATCHREFILL_LIMIT; 2971 batchcount = BATCHREFILL_LIMIT;
3095 } 2972 }
3096 l3 = cachep->nodelists[node]; 2973 n = cachep->node[node];
3097 2974
3098 BUG_ON(ac->avail > 0 || !l3); 2975 BUG_ON(ac->avail > 0 || !n);
3099 spin_lock(&l3->list_lock); 2976 spin_lock(&n->list_lock);
3100 2977
3101 /* See if we can refill from the shared array */ 2978 /* See if we can refill from the shared array */
3102 if (l3->shared && transfer_objects(ac, l3->shared, batchcount)) { 2979 if (n->shared && transfer_objects(ac, n->shared, batchcount)) {
3103 l3->shared->touched = 1; 2980 n->shared->touched = 1;
3104 goto alloc_done; 2981 goto alloc_done;
3105 } 2982 }
3106 2983
@@ -3108,11 +2985,11 @@ retry:
3108 struct list_head *entry; 2985 struct list_head *entry;
3109 struct slab *slabp; 2986 struct slab *slabp;
3110 /* Get slab alloc is to come from. */ 2987 /* Get slab alloc is to come from. */
3111 entry = l3->slabs_partial.next; 2988 entry = n->slabs_partial.next;
3112 if (entry == &l3->slabs_partial) { 2989 if (entry == &n->slabs_partial) {
3113 l3->free_touched = 1; 2990 n->free_touched = 1;
3114 entry = l3->slabs_free.next; 2991 entry = n->slabs_free.next;
3115 if (entry == &l3->slabs_free) 2992 if (entry == &n->slabs_free)
3116 goto must_grow; 2993 goto must_grow;
3117 } 2994 }
3118 2995
@@ -3140,15 +3017,15 @@ retry:
3140 /* move slabp to correct slabp list: */ 3017 /* move slabp to correct slabp list: */
3141 list_del(&slabp->list); 3018 list_del(&slabp->list);
3142 if (slabp->free == BUFCTL_END) 3019 if (slabp->free == BUFCTL_END)
3143 list_add(&slabp->list, &l3->slabs_full); 3020 list_add(&slabp->list, &n->slabs_full);
3144 else 3021 else
3145 list_add(&slabp->list, &l3->slabs_partial); 3022 list_add(&slabp->list, &n->slabs_partial);
3146 } 3023 }
3147 3024
3148must_grow: 3025must_grow:
3149 l3->free_objects -= ac->avail; 3026 n->free_objects -= ac->avail;
3150alloc_done: 3027alloc_done:
3151 spin_unlock(&l3->list_lock); 3028 spin_unlock(&n->list_lock);
3152 3029
3153 if (unlikely(!ac->avail)) { 3030 if (unlikely(!ac->avail)) {
3154 int x; 3031 int x;
@@ -3315,7 +3192,7 @@ static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags)
3315/* 3192/*
3316 * Fallback function if there was no memory available and no objects on a 3193 * Fallback function if there was no memory available and no objects on a
3317 * certain node and fall back is permitted. First we scan all the 3194 * certain node and fall back is permitted. First we scan all the
3318 * available nodelists for available objects. If that fails then we 3195 * available node for available objects. If that fails then we
3319 * perform an allocation without specifying a node. This allows the page 3196 * perform an allocation without specifying a node. This allows the page
3320 * allocator to do its reclaim / fallback magic. We then insert the 3197 * allocator to do its reclaim / fallback magic. We then insert the
3321 * slab into the proper nodelist and then allocate from it. 3198 * slab into the proper nodelist and then allocate from it.
@@ -3349,8 +3226,8 @@ retry:
3349 nid = zone_to_nid(zone); 3226 nid = zone_to_nid(zone);
3350 3227
3351 if (cpuset_zone_allowed_hardwall(zone, flags) && 3228 if (cpuset_zone_allowed_hardwall(zone, flags) &&
3352 cache->nodelists[nid] && 3229 cache->node[nid] &&
3353 cache->nodelists[nid]->free_objects) { 3230 cache->node[nid]->free_objects) {
3354 obj = ____cache_alloc_node(cache, 3231 obj = ____cache_alloc_node(cache,
3355 flags | GFP_THISNODE, nid); 3232 flags | GFP_THISNODE, nid);
3356 if (obj) 3233 if (obj)
@@ -3406,21 +3283,22 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
3406{ 3283{
3407 struct list_head *entry; 3284 struct list_head *entry;
3408 struct slab *slabp; 3285 struct slab *slabp;
3409 struct kmem_list3 *l3; 3286 struct kmem_cache_node *n;
3410 void *obj; 3287 void *obj;
3411 int x; 3288 int x;
3412 3289
3413 l3 = cachep->nodelists[nodeid]; 3290 VM_BUG_ON(nodeid > num_online_nodes());
3414 BUG_ON(!l3); 3291 n = cachep->node[nodeid];
3292 BUG_ON(!n);
3415 3293
3416retry: 3294retry:
3417 check_irq_off(); 3295 check_irq_off();
3418 spin_lock(&l3->list_lock); 3296 spin_lock(&n->list_lock);
3419 entry = l3->slabs_partial.next; 3297 entry = n->slabs_partial.next;
3420 if (entry == &l3->slabs_partial) { 3298 if (entry == &n->slabs_partial) {
3421 l3->free_touched = 1; 3299 n->free_touched = 1;
3422 entry = l3->slabs_free.next; 3300 entry = n->slabs_free.next;
3423 if (entry == &l3->slabs_free) 3301 if (entry == &n->slabs_free)
3424 goto must_grow; 3302 goto must_grow;
3425 } 3303 }
3426 3304
@@ -3436,20 +3314,20 @@ retry:
3436 3314
3437 obj = slab_get_obj(cachep, slabp, nodeid); 3315 obj = slab_get_obj(cachep, slabp, nodeid);
3438 check_slabp(cachep, slabp); 3316 check_slabp(cachep, slabp);
3439 l3->free_objects--; 3317 n->free_objects--;
3440 /* move slabp to correct slabp list: */ 3318 /* move slabp to correct slabp list: */
3441 list_del(&slabp->list); 3319 list_del(&slabp->list);
3442 3320
3443 if (slabp->free == BUFCTL_END) 3321 if (slabp->free == BUFCTL_END)
3444 list_add(&slabp->list, &l3->slabs_full); 3322 list_add(&slabp->list, &n->slabs_full);
3445 else 3323 else
3446 list_add(&slabp->list, &l3->slabs_partial); 3324 list_add(&slabp->list, &n->slabs_partial);
3447 3325
3448 spin_unlock(&l3->list_lock); 3326 spin_unlock(&n->list_lock);
3449 goto done; 3327 goto done;
3450 3328
3451must_grow: 3329must_grow:
3452 spin_unlock(&l3->list_lock); 3330 spin_unlock(&n->list_lock);
3453 x = cache_grow(cachep, flags | GFP_THISNODE, nodeid, NULL); 3331 x = cache_grow(cachep, flags | GFP_THISNODE, nodeid, NULL);
3454 if (x) 3332 if (x)
3455 goto retry; 3333 goto retry;
@@ -3495,7 +3373,7 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
3495 if (nodeid == NUMA_NO_NODE) 3373 if (nodeid == NUMA_NO_NODE)
3496 nodeid = slab_node; 3374 nodeid = slab_node;
3497 3375
3498 if (unlikely(!cachep->nodelists[nodeid])) { 3376 if (unlikely(!cachep->node[nodeid])) {
3499 /* Node not bootstrapped yet */ 3377 /* Node not bootstrapped yet */
3500 ptr = fallback_alloc(cachep, flags); 3378 ptr = fallback_alloc(cachep, flags);
3501 goto out; 3379 goto out;
@@ -3601,7 +3479,7 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
3601 int node) 3479 int node)
3602{ 3480{
3603 int i; 3481 int i;
3604 struct kmem_list3 *l3; 3482 struct kmem_cache_node *n;
3605 3483
3606 for (i = 0; i < nr_objects; i++) { 3484 for (i = 0; i < nr_objects; i++) {
3607 void *objp; 3485 void *objp;
@@ -3611,19 +3489,19 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
3611 objp = objpp[i]; 3489 objp = objpp[i];
3612 3490
3613 slabp = virt_to_slab(objp); 3491 slabp = virt_to_slab(objp);
3614 l3 = cachep->nodelists[node]; 3492 n = cachep->node[node];
3615 list_del(&slabp->list); 3493 list_del(&slabp->list);
3616 check_spinlock_acquired_node(cachep, node); 3494 check_spinlock_acquired_node(cachep, node);
3617 check_slabp(cachep, slabp); 3495 check_slabp(cachep, slabp);
3618 slab_put_obj(cachep, slabp, objp, node); 3496 slab_put_obj(cachep, slabp, objp, node);
3619 STATS_DEC_ACTIVE(cachep); 3497 STATS_DEC_ACTIVE(cachep);
3620 l3->free_objects++; 3498 n->free_objects++;
3621 check_slabp(cachep, slabp); 3499 check_slabp(cachep, slabp);
3622 3500
3623 /* fixup slab chains */ 3501 /* fixup slab chains */
3624 if (slabp->inuse == 0) { 3502 if (slabp->inuse == 0) {
3625 if (l3->free_objects > l3->free_limit) { 3503 if (n->free_objects > n->free_limit) {
3626 l3->free_objects -= cachep->num; 3504 n->free_objects -= cachep->num;
3627 /* No need to drop any previously held 3505 /* No need to drop any previously held
3628 * lock here, even if we have a off-slab slab 3506 * lock here, even if we have a off-slab slab
3629 * descriptor it is guaranteed to come from 3507 * descriptor it is guaranteed to come from
@@ -3632,14 +3510,14 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
3632 */ 3510 */
3633 slab_destroy(cachep, slabp); 3511 slab_destroy(cachep, slabp);
3634 } else { 3512 } else {
3635 list_add(&slabp->list, &l3->slabs_free); 3513 list_add(&slabp->list, &n->slabs_free);
3636 } 3514 }
3637 } else { 3515 } else {
3638 /* Unconditionally move a slab to the end of the 3516 /* Unconditionally move a slab to the end of the
3639 * partial list on free - maximum time for the 3517 * partial list on free - maximum time for the
3640 * other objects to be freed, too. 3518 * other objects to be freed, too.
3641 */ 3519 */
3642 list_add_tail(&slabp->list, &l3->slabs_partial); 3520 list_add_tail(&slabp->list, &n->slabs_partial);
3643 } 3521 }
3644 } 3522 }
3645} 3523}
@@ -3647,7 +3525,7 @@ static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects,
3647static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac) 3525static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
3648{ 3526{
3649 int batchcount; 3527 int batchcount;
3650 struct kmem_list3 *l3; 3528 struct kmem_cache_node *n;
3651 int node = numa_mem_id(); 3529 int node = numa_mem_id();
3652 3530
3653 batchcount = ac->batchcount; 3531 batchcount = ac->batchcount;
@@ -3655,10 +3533,10 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
3655 BUG_ON(!batchcount || batchcount > ac->avail); 3533 BUG_ON(!batchcount || batchcount > ac->avail);
3656#endif 3534#endif
3657 check_irq_off(); 3535 check_irq_off();
3658 l3 = cachep->nodelists[node]; 3536 n = cachep->node[node];
3659 spin_lock(&l3->list_lock); 3537 spin_lock(&n->list_lock);
3660 if (l3->shared) { 3538 if (n->shared) {
3661 struct array_cache *shared_array = l3->shared; 3539 struct array_cache *shared_array = n->shared;
3662 int max = shared_array->limit - shared_array->avail; 3540 int max = shared_array->limit - shared_array->avail;
3663 if (max) { 3541 if (max) {
3664 if (batchcount > max) 3542 if (batchcount > max)
@@ -3677,8 +3555,8 @@ free_done:
3677 int i = 0; 3555 int i = 0;
3678 struct list_head *p; 3556 struct list_head *p;
3679 3557
3680 p = l3->slabs_free.next; 3558 p = n->slabs_free.next;
3681 while (p != &(l3->slabs_free)) { 3559 while (p != &(n->slabs_free)) {
3682 struct slab *slabp; 3560 struct slab *slabp;
3683 3561
3684 slabp = list_entry(p, struct slab, list); 3562 slabp = list_entry(p, struct slab, list);
@@ -3690,7 +3568,7 @@ free_done:
3690 STATS_SET_FREEABLE(cachep, i); 3568 STATS_SET_FREEABLE(cachep, i);
3691 } 3569 }
3692#endif 3570#endif
3693 spin_unlock(&l3->list_lock); 3571 spin_unlock(&n->list_lock);
3694 ac->avail -= batchcount; 3572 ac->avail -= batchcount;
3695 memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail); 3573 memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail);
3696} 3574}
@@ -3800,7 +3678,7 @@ __do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller)
3800{ 3678{
3801 struct kmem_cache *cachep; 3679 struct kmem_cache *cachep;
3802 3680
3803 cachep = kmem_find_general_cachep(size, flags); 3681 cachep = kmalloc_slab(size, flags);
3804 if (unlikely(ZERO_OR_NULL_PTR(cachep))) 3682 if (unlikely(ZERO_OR_NULL_PTR(cachep)))
3805 return cachep; 3683 return cachep;
3806 return kmem_cache_alloc_node_trace(cachep, flags, node, size); 3684 return kmem_cache_alloc_node_trace(cachep, flags, node, size);
@@ -3845,7 +3723,7 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
3845 * Then kmalloc uses the uninlined functions instead of the inline 3723 * Then kmalloc uses the uninlined functions instead of the inline
3846 * functions. 3724 * functions.
3847 */ 3725 */
3848 cachep = __find_general_cachep(size, flags); 3726 cachep = kmalloc_slab(size, flags);
3849 if (unlikely(ZERO_OR_NULL_PTR(cachep))) 3727 if (unlikely(ZERO_OR_NULL_PTR(cachep)))
3850 return cachep; 3728 return cachep;
3851 ret = slab_alloc(cachep, flags, caller); 3729 ret = slab_alloc(cachep, flags, caller);
@@ -3934,12 +3812,12 @@ void kfree(const void *objp)
3934EXPORT_SYMBOL(kfree); 3812EXPORT_SYMBOL(kfree);
3935 3813
3936/* 3814/*
3937 * This initializes kmem_list3 or resizes various caches for all nodes. 3815 * This initializes kmem_cache_node or resizes various caches for all nodes.
3938 */ 3816 */
3939static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp) 3817static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp)
3940{ 3818{
3941 int node; 3819 int node;
3942 struct kmem_list3 *l3; 3820 struct kmem_cache_node *n;
3943 struct array_cache *new_shared; 3821 struct array_cache *new_shared;
3944 struct array_cache **new_alien = NULL; 3822 struct array_cache **new_alien = NULL;
3945 3823
@@ -3962,43 +3840,43 @@ static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp)
3962 } 3840 }
3963 } 3841 }
3964 3842
3965 l3 = cachep->nodelists[node]; 3843 n = cachep->node[node];
3966 if (l3) { 3844 if (n) {
3967 struct array_cache *shared = l3->shared; 3845 struct array_cache *shared = n->shared;
3968 3846
3969 spin_lock_irq(&l3->list_lock); 3847 spin_lock_irq(&n->list_lock);
3970 3848
3971 if (shared) 3849 if (shared)
3972 free_block(cachep, shared->entry, 3850 free_block(cachep, shared->entry,
3973 shared->avail, node); 3851 shared->avail, node);
3974 3852
3975 l3->shared = new_shared; 3853 n->shared = new_shared;
3976 if (!l3->alien) { 3854 if (!n->alien) {
3977 l3->alien = new_alien; 3855 n->alien = new_alien;
3978 new_alien = NULL; 3856 new_alien = NULL;
3979 } 3857 }
3980 l3->free_limit = (1 + nr_cpus_node(node)) * 3858 n->free_limit = (1 + nr_cpus_node(node)) *
3981 cachep->batchcount + cachep->num; 3859 cachep->batchcount + cachep->num;
3982 spin_unlock_irq(&l3->list_lock); 3860 spin_unlock_irq(&n->list_lock);
3983 kfree(shared); 3861 kfree(shared);
3984 free_alien_cache(new_alien); 3862 free_alien_cache(new_alien);
3985 continue; 3863 continue;
3986 } 3864 }
3987 l3 = kmalloc_node(sizeof(struct kmem_list3), gfp, node); 3865 n = kmalloc_node(sizeof(struct kmem_cache_node), gfp, node);
3988 if (!l3) { 3866 if (!n) {
3989 free_alien_cache(new_alien); 3867 free_alien_cache(new_alien);
3990 kfree(new_shared); 3868 kfree(new_shared);
3991 goto fail; 3869 goto fail;
3992 } 3870 }
3993 3871
3994 kmem_list3_init(l3); 3872 kmem_cache_node_init(n);
3995 l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + 3873 n->next_reap = jiffies + REAPTIMEOUT_LIST3 +
3996 ((unsigned long)cachep) % REAPTIMEOUT_LIST3; 3874 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
3997 l3->shared = new_shared; 3875 n->shared = new_shared;
3998 l3->alien = new_alien; 3876 n->alien = new_alien;
3999 l3->free_limit = (1 + nr_cpus_node(node)) * 3877 n->free_limit = (1 + nr_cpus_node(node)) *
4000 cachep->batchcount + cachep->num; 3878 cachep->batchcount + cachep->num;
4001 cachep->nodelists[node] = l3; 3879 cachep->node[node] = n;
4002 } 3880 }
4003 return 0; 3881 return 0;
4004 3882
@@ -4007,13 +3885,13 @@ fail:
4007 /* Cache is not active yet. Roll back what we did */ 3885 /* Cache is not active yet. Roll back what we did */
4008 node--; 3886 node--;
4009 while (node >= 0) { 3887 while (node >= 0) {
4010 if (cachep->nodelists[node]) { 3888 if (cachep->node[node]) {
4011 l3 = cachep->nodelists[node]; 3889 n = cachep->node[node];
4012 3890
4013 kfree(l3->shared); 3891 kfree(n->shared);
4014 free_alien_cache(l3->alien); 3892 free_alien_cache(n->alien);
4015 kfree(l3); 3893 kfree(n);
4016 cachep->nodelists[node] = NULL; 3894 cachep->node[node] = NULL;
4017 } 3895 }
4018 node--; 3896 node--;
4019 } 3897 }
@@ -4073,9 +3951,9 @@ static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,
4073 struct array_cache *ccold = new->new[i]; 3951 struct array_cache *ccold = new->new[i];
4074 if (!ccold) 3952 if (!ccold)
4075 continue; 3953 continue;
4076 spin_lock_irq(&cachep->nodelists[cpu_to_mem(i)]->list_lock); 3954 spin_lock_irq(&cachep->node[cpu_to_mem(i)]->list_lock);
4077 free_block(cachep, ccold->entry, ccold->avail, cpu_to_mem(i)); 3955 free_block(cachep, ccold->entry, ccold->avail, cpu_to_mem(i));
4078 spin_unlock_irq(&cachep->nodelists[cpu_to_mem(i)]->list_lock); 3956 spin_unlock_irq(&cachep->node[cpu_to_mem(i)]->list_lock);
4079 kfree(ccold); 3957 kfree(ccold);
4080 } 3958 }
4081 kfree(new); 3959 kfree(new);
@@ -4176,11 +4054,11 @@ skip_setup:
4176} 4054}
4177 4055
4178/* 4056/*
4179 * Drain an array if it contains any elements taking the l3 lock only if 4057 * Drain an array if it contains any elements taking the node lock only if
4180 * necessary. Note that the l3 listlock also protects the array_cache 4058 * necessary. Note that the node listlock also protects the array_cache
4181 * if drain_array() is used on the shared array. 4059 * if drain_array() is used on the shared array.
4182 */ 4060 */
4183static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, 4061static void drain_array(struct kmem_cache *cachep, struct kmem_cache_node *n,
4184 struct array_cache *ac, int force, int node) 4062 struct array_cache *ac, int force, int node)
4185{ 4063{
4186 int tofree; 4064 int tofree;
@@ -4190,7 +4068,7 @@ static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
4190 if (ac->touched && !force) { 4068 if (ac->touched && !force) {
4191 ac->touched = 0; 4069 ac->touched = 0;
4192 } else { 4070 } else {
4193 spin_lock_irq(&l3->list_lock); 4071 spin_lock_irq(&n->list_lock);
4194 if (ac->avail) { 4072 if (ac->avail) {
4195 tofree = force ? ac->avail : (ac->limit + 4) / 5; 4073 tofree = force ? ac->avail : (ac->limit + 4) / 5;
4196 if (tofree > ac->avail) 4074 if (tofree > ac->avail)
@@ -4200,7 +4078,7 @@ static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
4200 memmove(ac->entry, &(ac->entry[tofree]), 4078 memmove(ac->entry, &(ac->entry[tofree]),
4201 sizeof(void *) * ac->avail); 4079 sizeof(void *) * ac->avail);
4202 } 4080 }
4203 spin_unlock_irq(&l3->list_lock); 4081 spin_unlock_irq(&n->list_lock);
4204 } 4082 }
4205} 4083}
4206 4084
@@ -4219,7 +4097,7 @@ static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
4219static void cache_reap(struct work_struct *w) 4097static void cache_reap(struct work_struct *w)
4220{ 4098{
4221 struct kmem_cache *searchp; 4099 struct kmem_cache *searchp;
4222 struct kmem_list3 *l3; 4100 struct kmem_cache_node *n;
4223 int node = numa_mem_id(); 4101 int node = numa_mem_id();
4224 struct delayed_work *work = to_delayed_work(w); 4102 struct delayed_work *work = to_delayed_work(w);
4225 4103
@@ -4231,33 +4109,33 @@ static void cache_reap(struct work_struct *w)
4231 check_irq_on(); 4109 check_irq_on();
4232 4110
4233 /* 4111 /*
4234 * We only take the l3 lock if absolutely necessary and we 4112 * We only take the node lock if absolutely necessary and we
4235 * have established with reasonable certainty that 4113 * have established with reasonable certainty that
4236 * we can do some work if the lock was obtained. 4114 * we can do some work if the lock was obtained.
4237 */ 4115 */
4238 l3 = searchp->nodelists[node]; 4116 n = searchp->node[node];
4239 4117
4240 reap_alien(searchp, l3); 4118 reap_alien(searchp, n);
4241 4119
4242 drain_array(searchp, l3, cpu_cache_get(searchp), 0, node); 4120 drain_array(searchp, n, cpu_cache_get(searchp), 0, node);
4243 4121
4244 /* 4122 /*
4245 * These are racy checks but it does not matter 4123 * These are racy checks but it does not matter
4246 * if we skip one check or scan twice. 4124 * if we skip one check or scan twice.
4247 */ 4125 */
4248 if (time_after(l3->next_reap, jiffies)) 4126 if (time_after(n->next_reap, jiffies))
4249 goto next; 4127 goto next;
4250 4128
4251 l3->next_reap = jiffies + REAPTIMEOUT_LIST3; 4129 n->next_reap = jiffies + REAPTIMEOUT_LIST3;
4252 4130
4253 drain_array(searchp, l3, l3->shared, 0, node); 4131 drain_array(searchp, n, n->shared, 0, node);
4254 4132
4255 if (l3->free_touched) 4133 if (n->free_touched)
4256 l3->free_touched = 0; 4134 n->free_touched = 0;
4257 else { 4135 else {
4258 int freed; 4136 int freed;
4259 4137
4260 freed = drain_freelist(searchp, l3, (l3->free_limit + 4138 freed = drain_freelist(searchp, n, (n->free_limit +
4261 5 * searchp->num - 1) / (5 * searchp->num)); 4139 5 * searchp->num - 1) / (5 * searchp->num));
4262 STATS_ADD_REAPED(searchp, freed); 4140 STATS_ADD_REAPED(searchp, freed);
4263 } 4141 }
@@ -4283,25 +4161,25 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
4283 const char *name; 4161 const char *name;
4284 char *error = NULL; 4162 char *error = NULL;
4285 int node; 4163 int node;
4286 struct kmem_list3 *l3; 4164 struct kmem_cache_node *n;
4287 4165
4288 active_objs = 0; 4166 active_objs = 0;
4289 num_slabs = 0; 4167 num_slabs = 0;
4290 for_each_online_node(node) { 4168 for_each_online_node(node) {
4291 l3 = cachep->nodelists[node]; 4169 n = cachep->node[node];
4292 if (!l3) 4170 if (!n)
4293 continue; 4171 continue;
4294 4172
4295 check_irq_on(); 4173 check_irq_on();
4296 spin_lock_irq(&l3->list_lock); 4174 spin_lock_irq(&n->list_lock);
4297 4175
4298 list_for_each_entry(slabp, &l3->slabs_full, list) { 4176 list_for_each_entry(slabp, &n->slabs_full, list) {
4299 if (slabp->inuse != cachep->num && !error) 4177 if (slabp->inuse != cachep->num && !error)
4300 error = "slabs_full accounting error"; 4178 error = "slabs_full accounting error";
4301 active_objs += cachep->num; 4179 active_objs += cachep->num;
4302 active_slabs++; 4180 active_slabs++;
4303 } 4181 }
4304 list_for_each_entry(slabp, &l3->slabs_partial, list) { 4182 list_for_each_entry(slabp, &n->slabs_partial, list) {
4305 if (slabp->inuse == cachep->num && !error) 4183 if (slabp->inuse == cachep->num && !error)
4306 error = "slabs_partial inuse accounting error"; 4184 error = "slabs_partial inuse accounting error";
4307 if (!slabp->inuse && !error) 4185 if (!slabp->inuse && !error)
@@ -4309,16 +4187,16 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
4309 active_objs += slabp->inuse; 4187 active_objs += slabp->inuse;
4310 active_slabs++; 4188 active_slabs++;
4311 } 4189 }
4312 list_for_each_entry(slabp, &l3->slabs_free, list) { 4190 list_for_each_entry(slabp, &n->slabs_free, list) {
4313 if (slabp->inuse && !error) 4191 if (slabp->inuse && !error)
4314 error = "slabs_free/inuse accounting error"; 4192 error = "slabs_free/inuse accounting error";
4315 num_slabs++; 4193 num_slabs++;
4316 } 4194 }
4317 free_objects += l3->free_objects; 4195 free_objects += n->free_objects;
4318 if (l3->shared) 4196 if (n->shared)
4319 shared_avail += l3->shared->avail; 4197 shared_avail += n->shared->avail;
4320 4198
4321 spin_unlock_irq(&l3->list_lock); 4199 spin_unlock_irq(&n->list_lock);
4322 } 4200 }
4323 num_slabs += active_slabs; 4201 num_slabs += active_slabs;
4324 num_objs = num_slabs * cachep->num; 4202 num_objs = num_slabs * cachep->num;
@@ -4344,7 +4222,7 @@ void get_slabinfo(struct kmem_cache *cachep, struct slabinfo *sinfo)
4344void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *cachep) 4222void slabinfo_show_stats(struct seq_file *m, struct kmem_cache *cachep)
4345{ 4223{
4346#if STATS 4224#if STATS
4347 { /* list3 stats */ 4225 { /* node stats */
4348 unsigned long high = cachep->high_mark; 4226 unsigned long high = cachep->high_mark;
4349 unsigned long allocs = cachep->num_allocations; 4227 unsigned long allocs = cachep->num_allocations;
4350 unsigned long grown = cachep->grown; 4228 unsigned long grown = cachep->grown;
@@ -4497,9 +4375,9 @@ static int leaks_show(struct seq_file *m, void *p)
4497{ 4375{
4498 struct kmem_cache *cachep = list_entry(p, struct kmem_cache, list); 4376 struct kmem_cache *cachep = list_entry(p, struct kmem_cache, list);
4499 struct slab *slabp; 4377 struct slab *slabp;
4500 struct kmem_list3 *l3; 4378 struct kmem_cache_node *n;
4501 const char *name; 4379 const char *name;
4502 unsigned long *n = m->private; 4380 unsigned long *x = m->private;
4503 int node; 4381 int node;
4504 int i; 4382 int i;
4505 4383
@@ -4510,43 +4388,43 @@ static int leaks_show(struct seq_file *m, void *p)
4510 4388
4511 /* OK, we can do it */ 4389 /* OK, we can do it */
4512 4390
4513 n[1] = 0; 4391 x[1] = 0;
4514 4392
4515 for_each_online_node(node) { 4393 for_each_online_node(node) {
4516 l3 = cachep->nodelists[node]; 4394 n = cachep->node[node];
4517 if (!l3) 4395 if (!n)
4518 continue; 4396 continue;
4519 4397
4520 check_irq_on(); 4398 check_irq_on();
4521 spin_lock_irq(&l3->list_lock); 4399 spin_lock_irq(&n->list_lock);
4522 4400
4523 list_for_each_entry(slabp, &l3->slabs_full, list) 4401 list_for_each_entry(slabp, &n->slabs_full, list)
4524 handle_slab(n, cachep, slabp); 4402 handle_slab(x, cachep, slabp);
4525 list_for_each_entry(slabp, &l3->slabs_partial, list) 4403 list_for_each_entry(slabp, &n->slabs_partial, list)
4526 handle_slab(n, cachep, slabp); 4404 handle_slab(x, cachep, slabp);
4527 spin_unlock_irq(&l3->list_lock); 4405 spin_unlock_irq(&n->list_lock);
4528 } 4406 }
4529 name = cachep->name; 4407 name = cachep->name;
4530 if (n[0] == n[1]) { 4408 if (x[0] == x[1]) {
4531 /* Increase the buffer size */ 4409 /* Increase the buffer size */
4532 mutex_unlock(&slab_mutex); 4410 mutex_unlock(&slab_mutex);
4533 m->private = kzalloc(n[0] * 4 * sizeof(unsigned long), GFP_KERNEL); 4411 m->private = kzalloc(x[0] * 4 * sizeof(unsigned long), GFP_KERNEL);
4534 if (!m->private) { 4412 if (!m->private) {
4535 /* Too bad, we are really out */ 4413 /* Too bad, we are really out */
4536 m->private = n; 4414 m->private = x;
4537 mutex_lock(&slab_mutex); 4415 mutex_lock(&slab_mutex);
4538 return -ENOMEM; 4416 return -ENOMEM;
4539 } 4417 }
4540 *(unsigned long *)m->private = n[0] * 2; 4418 *(unsigned long *)m->private = x[0] * 2;
4541 kfree(n); 4419 kfree(x);
4542 mutex_lock(&slab_mutex); 4420 mutex_lock(&slab_mutex);
4543 /* Now make sure this entry will be retried */ 4421 /* Now make sure this entry will be retried */
4544 m->count = m->size; 4422 m->count = m->size;
4545 return 0; 4423 return 0;
4546 } 4424 }
4547 for (i = 0; i < n[1]; i++) { 4425 for (i = 0; i < x[1]; i++) {
4548 seq_printf(m, "%s: %lu ", name, n[2*i+3]); 4426 seq_printf(m, "%s: %lu ", name, x[2*i+3]);
4549 show_symbol(m, n[2*i+2]); 4427 show_symbol(m, x[2*i+2]);
4550 seq_putc(m, '\n'); 4428 seq_putc(m, '\n');
4551 } 4429 }
4552 4430
diff --git a/mm/slab.h b/mm/slab.h
index 34a98d642196..f96b49e4704e 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -16,7 +16,7 @@ enum slab_state {
16 DOWN, /* No slab functionality yet */ 16 DOWN, /* No slab functionality yet */
17 PARTIAL, /* SLUB: kmem_cache_node available */ 17 PARTIAL, /* SLUB: kmem_cache_node available */
18 PARTIAL_ARRAYCACHE, /* SLAB: kmalloc size for arraycache available */ 18 PARTIAL_ARRAYCACHE, /* SLAB: kmalloc size for arraycache available */
19 PARTIAL_L3, /* SLAB: kmalloc size for l3 struct available */ 19 PARTIAL_NODE, /* SLAB: kmalloc size for node struct available */
20 UP, /* Slab caches usable but not all extras yet */ 20 UP, /* Slab caches usable but not all extras yet */
21 FULL /* Everything is working */ 21 FULL /* Everything is working */
22}; 22};
@@ -35,6 +35,15 @@ extern struct kmem_cache *kmem_cache;
35unsigned long calculate_alignment(unsigned long flags, 35unsigned long calculate_alignment(unsigned long flags,
36 unsigned long align, unsigned long size); 36 unsigned long align, unsigned long size);
37 37
38#ifndef CONFIG_SLOB
39/* Kmalloc array related functions */
40void create_kmalloc_caches(unsigned long);
41
42/* Find the kmalloc slab corresponding for a certain size */
43struct kmem_cache *kmalloc_slab(size_t, gfp_t);
44#endif
45
46
38/* Functions provided by the slab allocators */ 47/* Functions provided by the slab allocators */
39extern int __kmem_cache_create(struct kmem_cache *, unsigned long flags); 48extern int __kmem_cache_create(struct kmem_cache *, unsigned long flags);
40 49
@@ -230,3 +239,35 @@ static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
230 return s; 239 return s;
231} 240}
232#endif 241#endif
242
243
244/*
245 * The slab lists for all objects.
246 */
247struct kmem_cache_node {
248 spinlock_t list_lock;
249
250#ifdef CONFIG_SLAB
251 struct list_head slabs_partial; /* partial list first, better asm code */
252 struct list_head slabs_full;
253 struct list_head slabs_free;
254 unsigned long free_objects;
255 unsigned int free_limit;
256 unsigned int colour_next; /* Per-node cache coloring */
257 struct array_cache *shared; /* shared per node */
258 struct array_cache **alien; /* on other nodes */
259 unsigned long next_reap; /* updated without locking */
260 int free_touched; /* updated without locking */
261#endif
262
263#ifdef CONFIG_SLUB
264 unsigned long nr_partial;
265 struct list_head partial;
266#ifdef CONFIG_SLUB_DEBUG
267 atomic_long_t nr_slabs;
268 atomic_long_t total_objects;
269 struct list_head full;
270#endif
271#endif
272
273};
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 3f3cd97d3fdf..d2517b05d5bc 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -299,7 +299,7 @@ void __init create_boot_cache(struct kmem_cache *s, const char *name, size_t siz
299 err = __kmem_cache_create(s, flags); 299 err = __kmem_cache_create(s, flags);
300 300
301 if (err) 301 if (err)
302 panic("Creation of kmalloc slab %s size=%zd failed. Reason %d\n", 302 panic("Creation of kmalloc slab %s size=%zu failed. Reason %d\n",
303 name, size, err); 303 name, size, err);
304 304
305 s->refcount = -1; /* Exempt from merging for now */ 305 s->refcount = -1; /* Exempt from merging for now */
@@ -319,6 +319,178 @@ struct kmem_cache *__init create_kmalloc_cache(const char *name, size_t size,
319 return s; 319 return s;
320} 320}
321 321
322struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1];
323EXPORT_SYMBOL(kmalloc_caches);
324
325#ifdef CONFIG_ZONE_DMA
326struct kmem_cache *kmalloc_dma_caches[KMALLOC_SHIFT_HIGH + 1];
327EXPORT_SYMBOL(kmalloc_dma_caches);
328#endif
329
330/*
331 * Conversion table for small slabs sizes / 8 to the index in the
332 * kmalloc array. This is necessary for slabs < 192 since we have non power
333 * of two cache sizes there. The size of larger slabs can be determined using
334 * fls.
335 */
336static s8 size_index[24] = {
337 3, /* 8 */
338 4, /* 16 */
339 5, /* 24 */
340 5, /* 32 */
341 6, /* 40 */
342 6, /* 48 */
343 6, /* 56 */
344 6, /* 64 */
345 1, /* 72 */
346 1, /* 80 */
347 1, /* 88 */
348 1, /* 96 */
349 7, /* 104 */
350 7, /* 112 */
351 7, /* 120 */
352 7, /* 128 */
353 2, /* 136 */
354 2, /* 144 */
355 2, /* 152 */
356 2, /* 160 */
357 2, /* 168 */
358 2, /* 176 */
359 2, /* 184 */
360 2 /* 192 */
361};
362
363static inline int size_index_elem(size_t bytes)
364{
365 return (bytes - 1) / 8;
366}
367
368/*
369 * Find the kmem_cache structure that serves a given size of
370 * allocation
371 */
372struct kmem_cache *kmalloc_slab(size_t size, gfp_t flags)
373{
374 int index;
375
376 if (WARN_ON_ONCE(size > KMALLOC_MAX_SIZE))
377 return NULL;
378
379 if (size <= 192) {
380 if (!size)
381 return ZERO_SIZE_PTR;
382
383 index = size_index[size_index_elem(size)];
384 } else
385 index = fls(size - 1);
386
387#ifdef CONFIG_ZONE_DMA
388 if (unlikely((flags & GFP_DMA)))
389 return kmalloc_dma_caches[index];
390
391#endif
392 return kmalloc_caches[index];
393}
394
395/*
396 * Create the kmalloc array. Some of the regular kmalloc arrays
397 * may already have been created because they were needed to
398 * enable allocations for slab creation.
399 */
400void __init create_kmalloc_caches(unsigned long flags)
401{
402 int i;
403
404 /*
405 * Patch up the size_index table if we have strange large alignment
406 * requirements for the kmalloc array. This is only the case for
407 * MIPS it seems. The standard arches will not generate any code here.
408 *
409 * Largest permitted alignment is 256 bytes due to the way we
410 * handle the index determination for the smaller caches.
411 *
412 * Make sure that nothing crazy happens if someone starts tinkering
413 * around with ARCH_KMALLOC_MINALIGN
414 */
415 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 ||
416 (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1)));
417
418 for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) {
419 int elem = size_index_elem(i);
420
421 if (elem >= ARRAY_SIZE(size_index))
422 break;
423 size_index[elem] = KMALLOC_SHIFT_LOW;
424 }
425
426 if (KMALLOC_MIN_SIZE >= 64) {
427 /*
428 * The 96 byte size cache is not used if the alignment
429 * is 64 byte.
430 */
431 for (i = 64 + 8; i <= 96; i += 8)
432 size_index[size_index_elem(i)] = 7;
433
434 }
435
436 if (KMALLOC_MIN_SIZE >= 128) {
437 /*
438 * The 192 byte sized cache is not used if the alignment
439 * is 128 byte. Redirect kmalloc to use the 256 byte cache
440 * instead.
441 */
442 for (i = 128 + 8; i <= 192; i += 8)
443 size_index[size_index_elem(i)] = 8;
444 }
445 for (i = KMALLOC_SHIFT_LOW; i <= KMALLOC_SHIFT_HIGH; i++) {
446 if (!kmalloc_caches[i]) {
447 kmalloc_caches[i] = create_kmalloc_cache(NULL,
448 1 << i, flags);
449
450 /*
451 * Caches that are not of the two-to-the-power-of size.
452 * These have to be created immediately after the
453 * earlier power of two caches
454 */
455 if (KMALLOC_MIN_SIZE <= 32 && !kmalloc_caches[1] && i == 6)
456 kmalloc_caches[1] = create_kmalloc_cache(NULL, 96, flags);
457
458 if (KMALLOC_MIN_SIZE <= 64 && !kmalloc_caches[2] && i == 7)
459 kmalloc_caches[2] = create_kmalloc_cache(NULL, 192, flags);
460 }
461 }
462
463 /* Kmalloc array is now usable */
464 slab_state = UP;
465
466 for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) {
467 struct kmem_cache *s = kmalloc_caches[i];
468 char *n;
469
470 if (s) {
471 n = kasprintf(GFP_NOWAIT, "kmalloc-%d", kmalloc_size(i));
472
473 BUG_ON(!n);
474 s->name = n;
475 }
476 }
477
478#ifdef CONFIG_ZONE_DMA
479 for (i = 0; i <= KMALLOC_SHIFT_HIGH; i++) {
480 struct kmem_cache *s = kmalloc_caches[i];
481
482 if (s) {
483 int size = kmalloc_size(i);
484 char *n = kasprintf(GFP_NOWAIT,
485 "dma-kmalloc-%d", size);
486
487 BUG_ON(!n);
488 kmalloc_dma_caches[i] = create_kmalloc_cache(n,
489 size, SLAB_CACHE_DMA | flags);
490 }
491 }
492#endif
493}
322#endif /* !CONFIG_SLOB */ 494#endif /* !CONFIG_SLOB */
323 495
324 496
diff --git a/mm/slub.c b/mm/slub.c
index a0206df88aba..57707f01bcfb 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1006,7 +1006,7 @@ static inline void inc_slabs_node(struct kmem_cache *s, int node, int objects)
1006 * dilemma by deferring the increment of the count during 1006 * dilemma by deferring the increment of the count during
1007 * bootstrap (see early_kmem_cache_node_alloc). 1007 * bootstrap (see early_kmem_cache_node_alloc).
1008 */ 1008 */
1009 if (n) { 1009 if (likely(n)) {
1010 atomic_long_inc(&n->nr_slabs); 1010 atomic_long_inc(&n->nr_slabs);
1011 atomic_long_add(objects, &n->total_objects); 1011 atomic_long_add(objects, &n->total_objects);
1012 } 1012 }
@@ -1494,7 +1494,7 @@ static inline void remove_partial(struct kmem_cache_node *n,
1494 */ 1494 */
1495static inline void *acquire_slab(struct kmem_cache *s, 1495static inline void *acquire_slab(struct kmem_cache *s,
1496 struct kmem_cache_node *n, struct page *page, 1496 struct kmem_cache_node *n, struct page *page,
1497 int mode) 1497 int mode, int *objects)
1498{ 1498{
1499 void *freelist; 1499 void *freelist;
1500 unsigned long counters; 1500 unsigned long counters;
@@ -1508,6 +1508,7 @@ static inline void *acquire_slab(struct kmem_cache *s,
1508 freelist = page->freelist; 1508 freelist = page->freelist;
1509 counters = page->counters; 1509 counters = page->counters;
1510 new.counters = counters; 1510 new.counters = counters;
1511 *objects = new.objects - new.inuse;
1511 if (mode) { 1512 if (mode) {
1512 new.inuse = page->objects; 1513 new.inuse = page->objects;
1513 new.freelist = NULL; 1514 new.freelist = NULL;
@@ -1529,7 +1530,7 @@ static inline void *acquire_slab(struct kmem_cache *s,
1529 return freelist; 1530 return freelist;
1530} 1531}
1531 1532
1532static int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain); 1533static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain);
1533static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags); 1534static inline bool pfmemalloc_match(struct page *page, gfp_t gfpflags);
1534 1535
1535/* 1536/*
@@ -1540,6 +1541,8 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
1540{ 1541{
1541 struct page *page, *page2; 1542 struct page *page, *page2;
1542 void *object = NULL; 1543 void *object = NULL;
1544 int available = 0;
1545 int objects;
1543 1546
1544 /* 1547 /*
1545 * Racy check. If we mistakenly see no partial slabs then we 1548 * Racy check. If we mistakenly see no partial slabs then we
@@ -1553,22 +1556,21 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
1553 spin_lock(&n->list_lock); 1556 spin_lock(&n->list_lock);
1554 list_for_each_entry_safe(page, page2, &n->partial, lru) { 1557 list_for_each_entry_safe(page, page2, &n->partial, lru) {
1555 void *t; 1558 void *t;
1556 int available;
1557 1559
1558 if (!pfmemalloc_match(page, flags)) 1560 if (!pfmemalloc_match(page, flags))
1559 continue; 1561 continue;
1560 1562
1561 t = acquire_slab(s, n, page, object == NULL); 1563 t = acquire_slab(s, n, page, object == NULL, &objects);
1562 if (!t) 1564 if (!t)
1563 break; 1565 break;
1564 1566
1567 available += objects;
1565 if (!object) { 1568 if (!object) {
1566 c->page = page; 1569 c->page = page;
1567 stat(s, ALLOC_FROM_PARTIAL); 1570 stat(s, ALLOC_FROM_PARTIAL);
1568 object = t; 1571 object = t;
1569 available = page->objects - page->inuse;
1570 } else { 1572 } else {
1571 available = put_cpu_partial(s, page, 0); 1573 put_cpu_partial(s, page, 0);
1572 stat(s, CPU_PARTIAL_NODE); 1574 stat(s, CPU_PARTIAL_NODE);
1573 } 1575 }
1574 if (kmem_cache_debug(s) || available > s->cpu_partial / 2) 1576 if (kmem_cache_debug(s) || available > s->cpu_partial / 2)
@@ -1947,7 +1949,7 @@ static void unfreeze_partials(struct kmem_cache *s,
1947 * If we did not find a slot then simply move all the partials to the 1949 * If we did not find a slot then simply move all the partials to the
1948 * per node partial list. 1950 * per node partial list.
1949 */ 1951 */
1950static int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain) 1952static void put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
1951{ 1953{
1952 struct page *oldpage; 1954 struct page *oldpage;
1953 int pages; 1955 int pages;
@@ -1985,7 +1987,6 @@ static int put_cpu_partial(struct kmem_cache *s, struct page *page, int drain)
1985 page->next = oldpage; 1987 page->next = oldpage;
1986 1988
1987 } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage); 1989 } while (this_cpu_cmpxchg(s->cpu_slab->partial, oldpage, page) != oldpage);
1988 return pobjects;
1989} 1990}
1990 1991
1991static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c) 1992static inline void flush_slab(struct kmem_cache *s, struct kmem_cache_cpu *c)
@@ -2042,7 +2043,7 @@ static void flush_all(struct kmem_cache *s)
2042static inline int node_match(struct page *page, int node) 2043static inline int node_match(struct page *page, int node)
2043{ 2044{
2044#ifdef CONFIG_NUMA 2045#ifdef CONFIG_NUMA
2045 if (node != NUMA_NO_NODE && page_to_nid(page) != node) 2046 if (!page || (node != NUMA_NO_NODE && page_to_nid(page) != node))
2046 return 0; 2047 return 0;
2047#endif 2048#endif
2048 return 1; 2049 return 1;
@@ -2332,13 +2333,18 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s,
2332 2333
2333 s = memcg_kmem_get_cache(s, gfpflags); 2334 s = memcg_kmem_get_cache(s, gfpflags);
2334redo: 2335redo:
2335
2336 /* 2336 /*
2337 * Must read kmem_cache cpu data via this cpu ptr. Preemption is 2337 * Must read kmem_cache cpu data via this cpu ptr. Preemption is
2338 * enabled. We may switch back and forth between cpus while 2338 * enabled. We may switch back and forth between cpus while
2339 * reading from one cpu area. That does not matter as long 2339 * reading from one cpu area. That does not matter as long
2340 * as we end up on the original cpu again when doing the cmpxchg. 2340 * as we end up on the original cpu again when doing the cmpxchg.
2341 *
2342 * Preemption is disabled for the retrieval of the tid because that
2343 * must occur from the current processor. We cannot allow rescheduling
2344 * on a different processor between the determination of the pointer
2345 * and the retrieval of the tid.
2341 */ 2346 */
2347 preempt_disable();
2342 c = __this_cpu_ptr(s->cpu_slab); 2348 c = __this_cpu_ptr(s->cpu_slab);
2343 2349
2344 /* 2350 /*
@@ -2348,7 +2354,7 @@ redo:
2348 * linked list in between. 2354 * linked list in between.
2349 */ 2355 */
2350 tid = c->tid; 2356 tid = c->tid;
2351 barrier(); 2357 preempt_enable();
2352 2358
2353 object = c->freelist; 2359 object = c->freelist;
2354 page = c->page; 2360 page = c->page;
@@ -2595,10 +2601,11 @@ redo:
2595 * data is retrieved via this pointer. If we are on the same cpu 2601 * data is retrieved via this pointer. If we are on the same cpu
2596 * during the cmpxchg then the free will succedd. 2602 * during the cmpxchg then the free will succedd.
2597 */ 2603 */
2604 preempt_disable();
2598 c = __this_cpu_ptr(s->cpu_slab); 2605 c = __this_cpu_ptr(s->cpu_slab);
2599 2606
2600 tid = c->tid; 2607 tid = c->tid;
2601 barrier(); 2608 preempt_enable();
2602 2609
2603 if (likely(page == c->page)) { 2610 if (likely(page == c->page)) {
2604 set_freepointer(s, object, c->freelist); 2611 set_freepointer(s, object, c->freelist);
@@ -2776,7 +2783,7 @@ init_kmem_cache_node(struct kmem_cache_node *n)
2776static inline int alloc_kmem_cache_cpus(struct kmem_cache *s) 2783static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
2777{ 2784{
2778 BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE < 2785 BUILD_BUG_ON(PERCPU_DYNAMIC_EARLY_SIZE <
2779 SLUB_PAGE_SHIFT * sizeof(struct kmem_cache_cpu)); 2786 KMALLOC_SHIFT_HIGH * sizeof(struct kmem_cache_cpu));
2780 2787
2781 /* 2788 /*
2782 * Must align to double word boundary for the double cmpxchg 2789 * Must align to double word boundary for the double cmpxchg
@@ -2983,7 +2990,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
2983 s->allocflags |= __GFP_COMP; 2990 s->allocflags |= __GFP_COMP;
2984 2991
2985 if (s->flags & SLAB_CACHE_DMA) 2992 if (s->flags & SLAB_CACHE_DMA)
2986 s->allocflags |= SLUB_DMA; 2993 s->allocflags |= GFP_DMA;
2987 2994
2988 if (s->flags & SLAB_RECLAIM_ACCOUNT) 2995 if (s->flags & SLAB_RECLAIM_ACCOUNT)
2989 s->allocflags |= __GFP_RECLAIMABLE; 2996 s->allocflags |= __GFP_RECLAIMABLE;
@@ -3175,13 +3182,6 @@ int __kmem_cache_shutdown(struct kmem_cache *s)
3175 * Kmalloc subsystem 3182 * Kmalloc subsystem
3176 *******************************************************************/ 3183 *******************************************************************/
3177 3184
3178struct kmem_cache *kmalloc_caches[SLUB_PAGE_SHIFT];
3179EXPORT_SYMBOL(kmalloc_caches);
3180
3181#ifdef CONFIG_ZONE_DMA
3182static struct kmem_cache *kmalloc_dma_caches[SLUB_PAGE_SHIFT];
3183#endif
3184
3185static int __init setup_slub_min_order(char *str) 3185static int __init setup_slub_min_order(char *str)
3186{ 3186{
3187 get_option(&str, &slub_min_order); 3187 get_option(&str, &slub_min_order);
@@ -3218,73 +3218,15 @@ static int __init setup_slub_nomerge(char *str)
3218 3218
3219__setup("slub_nomerge", setup_slub_nomerge); 3219__setup("slub_nomerge", setup_slub_nomerge);
3220 3220
3221/*
3222 * Conversion table for small slabs sizes / 8 to the index in the
3223 * kmalloc array. This is necessary for slabs < 192 since we have non power
3224 * of two cache sizes there. The size of larger slabs can be determined using
3225 * fls.
3226 */
3227static s8 size_index[24] = {
3228 3, /* 8 */
3229 4, /* 16 */
3230 5, /* 24 */
3231 5, /* 32 */
3232 6, /* 40 */
3233 6, /* 48 */
3234 6, /* 56 */
3235 6, /* 64 */
3236 1, /* 72 */
3237 1, /* 80 */
3238 1, /* 88 */
3239 1, /* 96 */
3240 7, /* 104 */
3241 7, /* 112 */
3242 7, /* 120 */
3243 7, /* 128 */
3244 2, /* 136 */
3245 2, /* 144 */
3246 2, /* 152 */
3247 2, /* 160 */
3248 2, /* 168 */
3249 2, /* 176 */
3250 2, /* 184 */
3251 2 /* 192 */
3252};
3253
3254static inline int size_index_elem(size_t bytes)
3255{
3256 return (bytes - 1) / 8;
3257}
3258
3259static struct kmem_cache *get_slab(size_t size, gfp_t flags)
3260{
3261 int index;
3262
3263 if (size <= 192) {
3264 if (!size)
3265 return ZERO_SIZE_PTR;
3266
3267 index = size_index[size_index_elem(size)];
3268 } else
3269 index = fls(size - 1);
3270
3271#ifdef CONFIG_ZONE_DMA
3272 if (unlikely((flags & SLUB_DMA)))
3273 return kmalloc_dma_caches[index];
3274
3275#endif
3276 return kmalloc_caches[index];
3277}
3278
3279void *__kmalloc(size_t size, gfp_t flags) 3221void *__kmalloc(size_t size, gfp_t flags)
3280{ 3222{
3281 struct kmem_cache *s; 3223 struct kmem_cache *s;
3282 void *ret; 3224 void *ret;
3283 3225
3284 if (unlikely(size > SLUB_MAX_SIZE)) 3226 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
3285 return kmalloc_large(size, flags); 3227 return kmalloc_large(size, flags);
3286 3228
3287 s = get_slab(size, flags); 3229 s = kmalloc_slab(size, flags);
3288 3230
3289 if (unlikely(ZERO_OR_NULL_PTR(s))) 3231 if (unlikely(ZERO_OR_NULL_PTR(s)))
3290 return s; 3232 return s;
@@ -3317,7 +3259,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
3317 struct kmem_cache *s; 3259 struct kmem_cache *s;
3318 void *ret; 3260 void *ret;
3319 3261
3320 if (unlikely(size > SLUB_MAX_SIZE)) { 3262 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
3321 ret = kmalloc_large_node(size, flags, node); 3263 ret = kmalloc_large_node(size, flags, node);
3322 3264
3323 trace_kmalloc_node(_RET_IP_, ret, 3265 trace_kmalloc_node(_RET_IP_, ret,
@@ -3327,7 +3269,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
3327 return ret; 3269 return ret;
3328 } 3270 }
3329 3271
3330 s = get_slab(size, flags); 3272 s = kmalloc_slab(size, flags);
3331 3273
3332 if (unlikely(ZERO_OR_NULL_PTR(s))) 3274 if (unlikely(ZERO_OR_NULL_PTR(s)))
3333 return s; 3275 return s;
@@ -3620,6 +3562,12 @@ static struct kmem_cache * __init bootstrap(struct kmem_cache *static_cache)
3620 3562
3621 memcpy(s, static_cache, kmem_cache->object_size); 3563 memcpy(s, static_cache, kmem_cache->object_size);
3622 3564
3565 /*
3566 * This runs very early, and only the boot processor is supposed to be
3567 * up. Even if it weren't true, IRQs are not up so we couldn't fire
3568 * IPIs around.
3569 */
3570 __flush_cpu_slab(s, smp_processor_id());
3623 for_each_node_state(node, N_NORMAL_MEMORY) { 3571 for_each_node_state(node, N_NORMAL_MEMORY) {
3624 struct kmem_cache_node *n = get_node(s, node); 3572 struct kmem_cache_node *n = get_node(s, node);
3625 struct page *p; 3573 struct page *p;
@@ -3642,8 +3590,6 @@ void __init kmem_cache_init(void)
3642{ 3590{
3643 static __initdata struct kmem_cache boot_kmem_cache, 3591 static __initdata struct kmem_cache boot_kmem_cache,
3644 boot_kmem_cache_node; 3592 boot_kmem_cache_node;
3645 int i;
3646 int caches = 2;
3647 3593
3648 if (debug_guardpage_minorder()) 3594 if (debug_guardpage_minorder())
3649 slub_max_order = 0; 3595 slub_max_order = 0;
@@ -3674,103 +3620,16 @@ void __init kmem_cache_init(void)
3674 kmem_cache_node = bootstrap(&boot_kmem_cache_node); 3620 kmem_cache_node = bootstrap(&boot_kmem_cache_node);
3675 3621
3676 /* Now we can use the kmem_cache to allocate kmalloc slabs */ 3622 /* Now we can use the kmem_cache to allocate kmalloc slabs */
3677 3623 create_kmalloc_caches(0);
3678 /*
3679 * Patch up the size_index table if we have strange large alignment
3680 * requirements for the kmalloc array. This is only the case for
3681 * MIPS it seems. The standard arches will not generate any code here.
3682 *
3683 * Largest permitted alignment is 256 bytes due to the way we
3684 * handle the index determination for the smaller caches.
3685 *
3686 * Make sure that nothing crazy happens if someone starts tinkering
3687 * around with ARCH_KMALLOC_MINALIGN
3688 */
3689 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 256 ||
3690 (KMALLOC_MIN_SIZE & (KMALLOC_MIN_SIZE - 1)));
3691
3692 for (i = 8; i < KMALLOC_MIN_SIZE; i += 8) {
3693 int elem = size_index_elem(i);
3694 if (elem >= ARRAY_SIZE(size_index))
3695 break;
3696 size_index[elem] = KMALLOC_SHIFT_LOW;
3697 }
3698
3699 if (KMALLOC_MIN_SIZE == 64) {
3700 /*
3701 * The 96 byte size cache is not used if the alignment
3702 * is 64 byte.
3703 */
3704 for (i = 64 + 8; i <= 96; i += 8)
3705 size_index[size_index_elem(i)] = 7;
3706 } else if (KMALLOC_MIN_SIZE == 128) {
3707 /*
3708 * The 192 byte sized cache is not used if the alignment
3709 * is 128 byte. Redirect kmalloc to use the 256 byte cache
3710 * instead.
3711 */
3712 for (i = 128 + 8; i <= 192; i += 8)
3713 size_index[size_index_elem(i)] = 8;
3714 }
3715
3716 /* Caches that are not of the two-to-the-power-of size */
3717 if (KMALLOC_MIN_SIZE <= 32) {
3718 kmalloc_caches[1] = create_kmalloc_cache("kmalloc-96", 96, 0);
3719 caches++;
3720 }
3721
3722 if (KMALLOC_MIN_SIZE <= 64) {
3723 kmalloc_caches[2] = create_kmalloc_cache("kmalloc-192", 192, 0);
3724 caches++;
3725 }
3726
3727 for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {
3728 kmalloc_caches[i] = create_kmalloc_cache("kmalloc", 1 << i, 0);
3729 caches++;
3730 }
3731
3732 slab_state = UP;
3733
3734 /* Provide the correct kmalloc names now that the caches are up */
3735 if (KMALLOC_MIN_SIZE <= 32) {
3736 kmalloc_caches[1]->name = kstrdup(kmalloc_caches[1]->name, GFP_NOWAIT);
3737 BUG_ON(!kmalloc_caches[1]->name);
3738 }
3739
3740 if (KMALLOC_MIN_SIZE <= 64) {
3741 kmalloc_caches[2]->name = kstrdup(kmalloc_caches[2]->name, GFP_NOWAIT);
3742 BUG_ON(!kmalloc_caches[2]->name);
3743 }
3744
3745 for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {
3746 char *s = kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i);
3747
3748 BUG_ON(!s);
3749 kmalloc_caches[i]->name = s;
3750 }
3751 3624
3752#ifdef CONFIG_SMP 3625#ifdef CONFIG_SMP
3753 register_cpu_notifier(&slab_notifier); 3626 register_cpu_notifier(&slab_notifier);
3754#endif 3627#endif
3755 3628
3756#ifdef CONFIG_ZONE_DMA
3757 for (i = 0; i < SLUB_PAGE_SHIFT; i++) {
3758 struct kmem_cache *s = kmalloc_caches[i];
3759
3760 if (s && s->size) {
3761 char *name = kasprintf(GFP_NOWAIT,
3762 "dma-kmalloc-%d", s->object_size);
3763
3764 BUG_ON(!name);
3765 kmalloc_dma_caches[i] = create_kmalloc_cache(name,
3766 s->object_size, SLAB_CACHE_DMA);
3767 }
3768 }
3769#endif
3770 printk(KERN_INFO 3629 printk(KERN_INFO
3771 "SLUB: Genslabs=%d, HWalign=%d, Order=%d-%d, MinObjects=%d," 3630 "SLUB: HWalign=%d, Order=%d-%d, MinObjects=%d,"
3772 " CPUs=%d, Nodes=%d\n", 3631 " CPUs=%d, Nodes=%d\n",
3773 caches, cache_line_size(), 3632 cache_line_size(),
3774 slub_min_order, slub_max_order, slub_min_objects, 3633 slub_min_order, slub_max_order, slub_min_objects,
3775 nr_cpu_ids, nr_node_ids); 3634 nr_cpu_ids, nr_node_ids);
3776} 3635}
@@ -3933,10 +3792,10 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
3933 struct kmem_cache *s; 3792 struct kmem_cache *s;
3934 void *ret; 3793 void *ret;
3935 3794
3936 if (unlikely(size > SLUB_MAX_SIZE)) 3795 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE))
3937 return kmalloc_large(size, gfpflags); 3796 return kmalloc_large(size, gfpflags);
3938 3797
3939 s = get_slab(size, gfpflags); 3798 s = kmalloc_slab(size, gfpflags);
3940 3799
3941 if (unlikely(ZERO_OR_NULL_PTR(s))) 3800 if (unlikely(ZERO_OR_NULL_PTR(s)))
3942 return s; 3801 return s;
@@ -3956,7 +3815,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
3956 struct kmem_cache *s; 3815 struct kmem_cache *s;
3957 void *ret; 3816 void *ret;
3958 3817
3959 if (unlikely(size > SLUB_MAX_SIZE)) { 3818 if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) {
3960 ret = kmalloc_large_node(size, gfpflags, node); 3819 ret = kmalloc_large_node(size, gfpflags, node);
3961 3820
3962 trace_kmalloc_node(caller, ret, 3821 trace_kmalloc_node(caller, ret,
@@ -3966,7 +3825,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
3966 return ret; 3825 return ret;
3967 } 3826 }
3968 3827
3969 s = get_slab(size, gfpflags); 3828 s = kmalloc_slab(size, gfpflags);
3970 3829
3971 if (unlikely(ZERO_OR_NULL_PTR(s))) 3830 if (unlikely(ZERO_OR_NULL_PTR(s)))
3972 return s; 3831 return s;
@@ -4315,7 +4174,7 @@ static void resiliency_test(void)
4315{ 4174{
4316 u8 *p; 4175 u8 *p;
4317 4176
4318 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || SLUB_PAGE_SHIFT < 10); 4177 BUILD_BUG_ON(KMALLOC_MIN_SIZE > 16 || KMALLOC_SHIFT_HIGH < 10);
4319 4178
4320 printk(KERN_ERR "SLUB resiliency testing\n"); 4179 printk(KERN_ERR "SLUB resiliency testing\n");
4321 printk(KERN_ERR "-----------------------\n"); 4180 printk(KERN_ERR "-----------------------\n");