aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--mm/slab.c596
1 files changed, 304 insertions, 292 deletions
diff --git a/mm/slab.c b/mm/slab.c
index 4d5c4b93e0eb..7b6f9f10e757 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -50,7 +50,7 @@
50 * The head array is strictly LIFO and should improve the cache hit rates. 50 * The head array is strictly LIFO and should improve the cache hit rates.
51 * On SMP, it additionally reduces the spinlock operations. 51 * On SMP, it additionally reduces the spinlock operations.
52 * 52 *
53 * The c_cpuarray may not be read with enabled local interrupts - 53 * The c_cpuarray may not be read with enabled local interrupts -
54 * it's changed with a smp_call_function(). 54 * it's changed with a smp_call_function().
55 * 55 *
56 * SMP synchronization: 56 * SMP synchronization:
@@ -266,16 +266,17 @@ struct array_cache {
266 unsigned int batchcount; 266 unsigned int batchcount;
267 unsigned int touched; 267 unsigned int touched;
268 spinlock_t lock; 268 spinlock_t lock;
269 void *entry[0]; /* 269 void *entry[0]; /*
270 * Must have this definition in here for the proper 270 * Must have this definition in here for the proper
271 * alignment of array_cache. Also simplifies accessing 271 * alignment of array_cache. Also simplifies accessing
272 * the entries. 272 * the entries.
273 * [0] is for gcc 2.95. It should really be []. 273 * [0] is for gcc 2.95. It should really be [].
274 */ 274 */
275}; 275};
276 276
277/* bootstrap: The caches do not work without cpuarrays anymore, 277/*
278 * but the cpuarrays are allocated from the generic caches... 278 * bootstrap: The caches do not work without cpuarrays anymore, but the
279 * cpuarrays are allocated from the generic caches...
279 */ 280 */
280#define BOOT_CPUCACHE_ENTRIES 1 281#define BOOT_CPUCACHE_ENTRIES 1
281struct arraycache_init { 282struct arraycache_init {
@@ -310,10 +311,8 @@ struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS];
310#define SIZE_L3 (1 + MAX_NUMNODES) 311#define SIZE_L3 (1 + MAX_NUMNODES)
311 312
312/* 313/*
313 * This function must be completely optimized away if 314 * This function must be completely optimized away if a constant is passed to
314 * a constant is passed to it. Mostly the same as 315 * it. Mostly the same as what is in linux/slab.h except it returns an index.
315 * what is in linux/slab.h except it returns an
316 * index.
317 */ 316 */
318static __always_inline int index_of(const size_t size) 317static __always_inline int index_of(const size_t size)
319{ 318{
@@ -351,14 +350,14 @@ static void kmem_list3_init(struct kmem_list3 *parent)
351 parent->free_touched = 0; 350 parent->free_touched = 0;
352} 351}
353 352
354#define MAKE_LIST(cachep, listp, slab, nodeid) \ 353#define MAKE_LIST(cachep, listp, slab, nodeid) \
355 do { \ 354 do { \
356 INIT_LIST_HEAD(listp); \ 355 INIT_LIST_HEAD(listp); \
357 list_splice(&(cachep->nodelists[nodeid]->slab), listp); \ 356 list_splice(&(cachep->nodelists[nodeid]->slab), listp); \
358 } while (0) 357 } while (0)
359 358
360#define MAKE_ALL_LISTS(cachep, ptr, nodeid) \ 359#define MAKE_ALL_LISTS(cachep, ptr, nodeid) \
361 do { \ 360 do { \
362 MAKE_LIST((cachep), (&(ptr)->slabs_full), slabs_full, nodeid); \ 361 MAKE_LIST((cachep), (&(ptr)->slabs_full), slabs_full, nodeid); \
363 MAKE_LIST((cachep), (&(ptr)->slabs_partial), slabs_partial, nodeid); \ 362 MAKE_LIST((cachep), (&(ptr)->slabs_partial), slabs_partial, nodeid); \
364 MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \ 363 MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \
@@ -379,8 +378,8 @@ struct kmem_cache {
379 unsigned int buffer_size; 378 unsigned int buffer_size;
380/* 2) touched by every alloc & free from the backend */ 379/* 2) touched by every alloc & free from the backend */
381 struct kmem_list3 *nodelists[MAX_NUMNODES]; 380 struct kmem_list3 *nodelists[MAX_NUMNODES];
382 unsigned int flags; /* constant flags */ 381 unsigned int flags; /* constant flags */
383 unsigned int num; /* # of objs per slab */ 382 unsigned int num; /* # of objs per slab */
384 spinlock_t spinlock; 383 spinlock_t spinlock;
385 384
386/* 3) cache_grow/shrink */ 385/* 3) cache_grow/shrink */
@@ -390,11 +389,11 @@ struct kmem_cache {
390 /* force GFP flags, e.g. GFP_DMA */ 389 /* force GFP flags, e.g. GFP_DMA */
391 gfp_t gfpflags; 390 gfp_t gfpflags;
392 391
393 size_t colour; /* cache colouring range */ 392 size_t colour; /* cache colouring range */
394 unsigned int colour_off; /* colour offset */ 393 unsigned int colour_off; /* colour offset */
395 struct kmem_cache *slabp_cache; 394 struct kmem_cache *slabp_cache;
396 unsigned int slab_size; 395 unsigned int slab_size;
397 unsigned int dflags; /* dynamic flags */ 396 unsigned int dflags; /* dynamic flags */
398 397
399 /* constructor func */ 398 /* constructor func */
400 void (*ctor) (void *, struct kmem_cache *, unsigned long); 399 void (*ctor) (void *, struct kmem_cache *, unsigned long);
@@ -438,8 +437,9 @@ struct kmem_cache {
438#define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB) 437#define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB)
439 438
440#define BATCHREFILL_LIMIT 16 439#define BATCHREFILL_LIMIT 16
441/* Optimization question: fewer reaps means less 440/*
442 * probability for unnessary cpucache drain/refill cycles. 441 * Optimization question: fewer reaps means less probability for unnessary
442 * cpucache drain/refill cycles.
443 * 443 *
444 * OTOH the cpuarrays can contain lots of objects, 444 * OTOH the cpuarrays can contain lots of objects,
445 * which could lock up otherwise freeable slabs. 445 * which could lock up otherwise freeable slabs.
@@ -453,17 +453,19 @@ struct kmem_cache {
453#define STATS_INC_ALLOCED(x) ((x)->num_allocations++) 453#define STATS_INC_ALLOCED(x) ((x)->num_allocations++)
454#define STATS_INC_GROWN(x) ((x)->grown++) 454#define STATS_INC_GROWN(x) ((x)->grown++)
455#define STATS_INC_REAPED(x) ((x)->reaped++) 455#define STATS_INC_REAPED(x) ((x)->reaped++)
456#define STATS_SET_HIGH(x) do { if ((x)->num_active > (x)->high_mark) \ 456#define STATS_SET_HIGH(x) \
457 (x)->high_mark = (x)->num_active; \ 457 do { \
458 } while (0) 458 if ((x)->num_active > (x)->high_mark) \
459 (x)->high_mark = (x)->num_active; \
460 } while (0)
459#define STATS_INC_ERR(x) ((x)->errors++) 461#define STATS_INC_ERR(x) ((x)->errors++)
460#define STATS_INC_NODEALLOCS(x) ((x)->node_allocs++) 462#define STATS_INC_NODEALLOCS(x) ((x)->node_allocs++)
461#define STATS_INC_NODEFREES(x) ((x)->node_frees++) 463#define STATS_INC_NODEFREES(x) ((x)->node_frees++)
462#define STATS_SET_FREEABLE(x, i) \ 464#define STATS_SET_FREEABLE(x, i) \
463 do { if ((x)->max_freeable < i) \ 465 do { \
464 (x)->max_freeable = i; \ 466 if ((x)->max_freeable < i) \
465 } while (0) 467 (x)->max_freeable = i; \
466 468 } while (0)
467#define STATS_INC_ALLOCHIT(x) atomic_inc(&(x)->allochit) 469#define STATS_INC_ALLOCHIT(x) atomic_inc(&(x)->allochit)
468#define STATS_INC_ALLOCMISS(x) atomic_inc(&(x)->allocmiss) 470#define STATS_INC_ALLOCMISS(x) atomic_inc(&(x)->allocmiss)
469#define STATS_INC_FREEHIT(x) atomic_inc(&(x)->freehit) 471#define STATS_INC_FREEHIT(x) atomic_inc(&(x)->freehit)
@@ -478,9 +480,7 @@ struct kmem_cache {
478#define STATS_INC_ERR(x) do { } while (0) 480#define STATS_INC_ERR(x) do { } while (0)
479#define STATS_INC_NODEALLOCS(x) do { } while (0) 481#define STATS_INC_NODEALLOCS(x) do { } while (0)
480#define STATS_INC_NODEFREES(x) do { } while (0) 482#define STATS_INC_NODEFREES(x) do { } while (0)
481#define STATS_SET_FREEABLE(x, i) \ 483#define STATS_SET_FREEABLE(x, i) do { } while (0)
482 do { } while (0)
483
484#define STATS_INC_ALLOCHIT(x) do { } while (0) 484#define STATS_INC_ALLOCHIT(x) do { } while (0)
485#define STATS_INC_ALLOCMISS(x) do { } while (0) 485#define STATS_INC_ALLOCMISS(x) do { } while (0)
486#define STATS_INC_FREEHIT(x) do { } while (0) 486#define STATS_INC_FREEHIT(x) do { } while (0)
@@ -488,7 +488,8 @@ struct kmem_cache {
488#endif 488#endif
489 489
490#if DEBUG 490#if DEBUG
491/* Magic nums for obj red zoning. 491/*
492 * Magic nums for obj red zoning.
492 * Placed in the first word before and the first word after an obj. 493 * Placed in the first word before and the first word after an obj.
493 */ 494 */
494#define RED_INACTIVE 0x5A2CF071UL /* when obj is inactive */ 495#define RED_INACTIVE 0x5A2CF071UL /* when obj is inactive */
@@ -499,7 +500,8 @@ struct kmem_cache {
499#define POISON_FREE 0x6b /* for use-after-free poisoning */ 500#define POISON_FREE 0x6b /* for use-after-free poisoning */
500#define POISON_END 0xa5 /* end-byte of poisoning */ 501#define POISON_END 0xa5 /* end-byte of poisoning */
501 502
502/* memory layout of objects: 503/*
504 * memory layout of objects:
503 * 0 : objp 505 * 0 : objp
504 * 0 .. cachep->obj_offset - BYTES_PER_WORD - 1: padding. This ensures that 506 * 0 .. cachep->obj_offset - BYTES_PER_WORD - 1: padding. This ensures that
505 * the end of an object is aligned with the end of the real 507 * the end of an object is aligned with the end of the real
@@ -508,7 +510,8 @@ struct kmem_cache {
508 * redzone word. 510 * redzone word.
509 * cachep->obj_offset: The real object. 511 * cachep->obj_offset: The real object.
510 * cachep->buffer_size - 2* BYTES_PER_WORD: redzone word [BYTES_PER_WORD long] 512 * cachep->buffer_size - 2* BYTES_PER_WORD: redzone word [BYTES_PER_WORD long]
511 * cachep->buffer_size - 1* BYTES_PER_WORD: last caller address [BYTES_PER_WORD long] 513 * cachep->buffer_size - 1* BYTES_PER_WORD: last caller address
514 * [BYTES_PER_WORD long]
512 */ 515 */
513static int obj_offset(struct kmem_cache *cachep) 516static int obj_offset(struct kmem_cache *cachep)
514{ 517{
@@ -552,8 +555,8 @@ static void **dbg_userword(struct kmem_cache *cachep, void *objp)
552#endif 555#endif
553 556
554/* 557/*
555 * Maximum size of an obj (in 2^order pages) 558 * Maximum size of an obj (in 2^order pages) and absolute limit for the gfp
556 * and absolute limit for the gfp order. 559 * order.
557 */ 560 */
558#if defined(CONFIG_LARGE_ALLOCS) 561#if defined(CONFIG_LARGE_ALLOCS)
559#define MAX_OBJ_ORDER 13 /* up to 32Mb */ 562#define MAX_OBJ_ORDER 13 /* up to 32Mb */
@@ -573,9 +576,10 @@ static void **dbg_userword(struct kmem_cache *cachep, void *objp)
573#define BREAK_GFP_ORDER_LO 0 576#define BREAK_GFP_ORDER_LO 0
574static int slab_break_gfp_order = BREAK_GFP_ORDER_LO; 577static int slab_break_gfp_order = BREAK_GFP_ORDER_LO;
575 578
576/* Functions for storing/retrieving the cachep and or slab from the 579/*
577 * global 'mem_map'. These are used to find the slab an obj belongs to. 580 * Functions for storing/retrieving the cachep and or slab from the page
578 * With kfree(), these are used to find the cache which an obj belongs to. 581 * allocator. These are used to find the slab an obj belongs to. With kfree(),
582 * these are used to find the cache which an obj belongs to.
579 */ 583 */
580static inline void page_set_cache(struct page *page, struct kmem_cache *cache) 584static inline void page_set_cache(struct page *page, struct kmem_cache *cache)
581{ 585{
@@ -621,7 +625,9 @@ static inline unsigned int obj_to_index(struct kmem_cache *cache,
621 return (unsigned)(obj - slab->s_mem) / cache->buffer_size; 625 return (unsigned)(obj - slab->s_mem) / cache->buffer_size;
622} 626}
623 627
624/* These are the default caches for kmalloc. Custom caches can have other sizes. */ 628/*
629 * These are the default caches for kmalloc. Custom caches can have other sizes.
630 */
625struct cache_sizes malloc_sizes[] = { 631struct cache_sizes malloc_sizes[] = {
626#define CACHE(x) { .cs_size = (x) }, 632#define CACHE(x) { .cs_size = (x) },
627#include <linux/kmalloc_sizes.h> 633#include <linux/kmalloc_sizes.h>
@@ -667,8 +673,8 @@ static DEFINE_MUTEX(cache_chain_mutex);
667static struct list_head cache_chain; 673static struct list_head cache_chain;
668 674
669/* 675/*
670 * vm_enough_memory() looks at this to determine how many 676 * vm_enough_memory() looks at this to determine how many slab-allocated pages
671 * slab-allocated pages are possibly freeable under pressure 677 * are possibly freeable under pressure
672 * 678 *
673 * SLAB_RECLAIM_ACCOUNT turns this on per-slab 679 * SLAB_RECLAIM_ACCOUNT turns this on per-slab
674 */ 680 */
@@ -687,7 +693,8 @@ static enum {
687 693
688static DEFINE_PER_CPU(struct work_struct, reap_work); 694static DEFINE_PER_CPU(struct work_struct, reap_work);
689 695
690static void free_block(struct kmem_cache *cachep, void **objpp, int len, int node); 696static void free_block(struct kmem_cache *cachep, void **objpp, int len,
697 int node);
691static void enable_cpucache(struct kmem_cache *cachep); 698static void enable_cpucache(struct kmem_cache *cachep);
692static void cache_reap(void *unused); 699static void cache_reap(void *unused);
693static int __node_shrink(struct kmem_cache *cachep, int node); 700static int __node_shrink(struct kmem_cache *cachep, int node);
@@ -697,7 +704,8 @@ static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
697 return cachep->array[smp_processor_id()]; 704 return cachep->array[smp_processor_id()];
698} 705}
699 706
700static inline struct kmem_cache *__find_general_cachep(size_t size, gfp_t gfpflags) 707static inline struct kmem_cache *__find_general_cachep(size_t size,
708 gfp_t gfpflags)
701{ 709{
702 struct cache_sizes *csizep = malloc_sizes; 710 struct cache_sizes *csizep = malloc_sizes;
703 711
@@ -732,8 +740,9 @@ static size_t slab_mgmt_size(size_t nr_objs, size_t align)
732 return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align); 740 return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align);
733} 741}
734 742
735/* Calculate the number of objects and left-over bytes for a given 743/*
736 buffer size. */ 744 * Calculate the number of objects and left-over bytes for a given buffer size.
745 */
737static void cache_estimate(unsigned long gfporder, size_t buffer_size, 746static void cache_estimate(unsigned long gfporder, size_t buffer_size,
738 size_t align, int flags, size_t *left_over, 747 size_t align, int flags, size_t *left_over,
739 unsigned int *num) 748 unsigned int *num)
@@ -794,7 +803,8 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size,
794 803
795#define slab_error(cachep, msg) __slab_error(__FUNCTION__, cachep, msg) 804#define slab_error(cachep, msg) __slab_error(__FUNCTION__, cachep, msg)
796 805
797static void __slab_error(const char *function, struct kmem_cache *cachep, char *msg) 806static void __slab_error(const char *function, struct kmem_cache *cachep,
807 char *msg)
798{ 808{
799 printk(KERN_ERR "slab error in %s(): cache `%s': %s\n", 809 printk(KERN_ERR "slab error in %s(): cache `%s': %s\n",
800 function, cachep->name, msg); 810 function, cachep->name, msg);
@@ -918,10 +928,8 @@ static void free_alien_cache(struct array_cache **ac_ptr)
918 928
919 if (!ac_ptr) 929 if (!ac_ptr)
920 return; 930 return;
921
922 for_each_node(i) 931 for_each_node(i)
923 kfree(ac_ptr[i]); 932 kfree(ac_ptr[i]);
924
925 kfree(ac_ptr); 933 kfree(ac_ptr);
926} 934}
927 935
@@ -955,7 +963,8 @@ static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3)
955 } 963 }
956} 964}
957 965
958static void drain_alien_cache(struct kmem_cache *cachep, struct array_cache **alien) 966static void drain_alien_cache(struct kmem_cache *cachep,
967 struct array_cache **alien)
959{ 968{
960 int i = 0; 969 int i = 0;
961 struct array_cache *ac; 970 struct array_cache *ac;
@@ -998,20 +1007,22 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
998 switch (action) { 1007 switch (action) {
999 case CPU_UP_PREPARE: 1008 case CPU_UP_PREPARE:
1000 mutex_lock(&cache_chain_mutex); 1009 mutex_lock(&cache_chain_mutex);
1001 /* we need to do this right in the beginning since 1010 /*
1011 * We need to do this right in the beginning since
1002 * alloc_arraycache's are going to use this list. 1012 * alloc_arraycache's are going to use this list.
1003 * kmalloc_node allows us to add the slab to the right 1013 * kmalloc_node allows us to add the slab to the right
1004 * kmem_list3 and not this cpu's kmem_list3 1014 * kmem_list3 and not this cpu's kmem_list3
1005 */ 1015 */
1006 1016
1007 list_for_each_entry(cachep, &cache_chain, next) { 1017 list_for_each_entry(cachep, &cache_chain, next) {
1008 /* setup the size64 kmemlist for cpu before we can 1018 /*
1019 * Set up the size64 kmemlist for cpu before we can
1009 * begin anything. Make sure some other cpu on this 1020 * begin anything. Make sure some other cpu on this
1010 * node has not already allocated this 1021 * node has not already allocated this
1011 */ 1022 */
1012 if (!cachep->nodelists[node]) { 1023 if (!cachep->nodelists[node]) {
1013 if (!(l3 = kmalloc_node(memsize, 1024 l3 = kmalloc_node(memsize, GFP_KERNEL, node);
1014 GFP_KERNEL, node))) 1025 if (!l3)
1015 goto bad; 1026 goto bad;
1016 kmem_list3_init(l3); 1027 kmem_list3_init(l3);
1017 l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + 1028 l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
@@ -1027,13 +1038,15 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
1027 1038
1028 spin_lock_irq(&cachep->nodelists[node]->list_lock); 1039 spin_lock_irq(&cachep->nodelists[node]->list_lock);
1029 cachep->nodelists[node]->free_limit = 1040 cachep->nodelists[node]->free_limit =
1030 (1 + nr_cpus_node(node)) * 1041 (1 + nr_cpus_node(node)) *
1031 cachep->batchcount + cachep->num; 1042 cachep->batchcount + cachep->num;
1032 spin_unlock_irq(&cachep->nodelists[node]->list_lock); 1043 spin_unlock_irq(&cachep->nodelists[node]->list_lock);
1033 } 1044 }
1034 1045
1035 /* Now we can go ahead with allocating the shared array's 1046 /*
1036 & array cache's */ 1047 * Now we can go ahead with allocating the shared arrays and
1048 * array caches
1049 */
1037 list_for_each_entry(cachep, &cache_chain, next) { 1050 list_for_each_entry(cachep, &cache_chain, next) {
1038 struct array_cache *nc; 1051 struct array_cache *nc;
1039 struct array_cache *shared; 1052 struct array_cache *shared;
@@ -1053,7 +1066,6 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
1053 if (!alien) 1066 if (!alien)
1054 goto bad; 1067 goto bad;
1055 cachep->array[cpu] = nc; 1068 cachep->array[cpu] = nc;
1056
1057 l3 = cachep->nodelists[node]; 1069 l3 = cachep->nodelists[node];
1058 BUG_ON(!l3); 1070 BUG_ON(!l3);
1059 1071
@@ -1073,7 +1085,6 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
1073 } 1085 }
1074#endif 1086#endif
1075 spin_unlock_irq(&l3->list_lock); 1087 spin_unlock_irq(&l3->list_lock);
1076
1077 kfree(shared); 1088 kfree(shared);
1078 free_alien_cache(alien); 1089 free_alien_cache(alien);
1079 } 1090 }
@@ -1095,7 +1106,6 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
1095 /* fall thru */ 1106 /* fall thru */
1096 case CPU_UP_CANCELED: 1107 case CPU_UP_CANCELED:
1097 mutex_lock(&cache_chain_mutex); 1108 mutex_lock(&cache_chain_mutex);
1098
1099 list_for_each_entry(cachep, &cache_chain, next) { 1109 list_for_each_entry(cachep, &cache_chain, next) {
1100 struct array_cache *nc; 1110 struct array_cache *nc;
1101 struct array_cache *shared; 1111 struct array_cache *shared;
@@ -1162,7 +1172,7 @@ free_array_cache:
1162#endif 1172#endif
1163 } 1173 }
1164 return NOTIFY_OK; 1174 return NOTIFY_OK;
1165 bad: 1175bad:
1166 mutex_unlock(&cache_chain_mutex); 1176 mutex_unlock(&cache_chain_mutex);
1167 return NOTIFY_BAD; 1177 return NOTIFY_BAD;
1168} 1178}
@@ -1172,7 +1182,8 @@ static struct notifier_block cpucache_notifier = { &cpuup_callback, NULL, 0 };
1172/* 1182/*
1173 * swap the static kmem_list3 with kmalloced memory 1183 * swap the static kmem_list3 with kmalloced memory
1174 */ 1184 */
1175static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list, int nodeid) 1185static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list,
1186 int nodeid)
1176{ 1187{
1177 struct kmem_list3 *ptr; 1188 struct kmem_list3 *ptr;
1178 1189
@@ -1187,8 +1198,9 @@ static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list, int no
1187 local_irq_enable(); 1198 local_irq_enable();
1188} 1199}
1189 1200
1190/* Initialisation. 1201/*
1191 * Called after the gfp() functions have been enabled, and before smp_init(). 1202 * Initialisation. Called after the page allocator have been initialised and
1203 * before smp_init().
1192 */ 1204 */
1193void __init kmem_cache_init(void) 1205void __init kmem_cache_init(void)
1194{ 1206{
@@ -1213,9 +1225,9 @@ void __init kmem_cache_init(void)
1213 1225
1214 /* Bootstrap is tricky, because several objects are allocated 1226 /* Bootstrap is tricky, because several objects are allocated
1215 * from caches that do not exist yet: 1227 * from caches that do not exist yet:
1216 * 1) initialize the cache_cache cache: it contains the struct kmem_cache 1228 * 1) initialize the cache_cache cache: it contains the struct
1217 * structures of all caches, except cache_cache itself: cache_cache 1229 * kmem_cache structures of all caches, except cache_cache itself:
1218 * is statically allocated. 1230 * cache_cache is statically allocated.
1219 * Initially an __init data area is used for the head array and the 1231 * Initially an __init data area is used for the head array and the
1220 * kmem_list3 structures, it's replaced with a kmalloc allocated 1232 * kmem_list3 structures, it's replaced with a kmalloc allocated
1221 * array at the end of the bootstrap. 1233 * array at the end of the bootstrap.
@@ -1238,7 +1250,8 @@ void __init kmem_cache_init(void)
1238 cache_cache.array[smp_processor_id()] = &initarray_cache.cache; 1250 cache_cache.array[smp_processor_id()] = &initarray_cache.cache;
1239 cache_cache.nodelists[numa_node_id()] = &initkmem_list3[CACHE_CACHE]; 1251 cache_cache.nodelists[numa_node_id()] = &initkmem_list3[CACHE_CACHE];
1240 1252
1241 cache_cache.buffer_size = ALIGN(cache_cache.buffer_size, cache_line_size()); 1253 cache_cache.buffer_size = ALIGN(cache_cache.buffer_size,
1254 cache_line_size());
1242 1255
1243 for (order = 0; order < MAX_ORDER; order++) { 1256 for (order = 0; order < MAX_ORDER; order++) {
1244 cache_estimate(order, cache_cache.buffer_size, 1257 cache_estimate(order, cache_cache.buffer_size,
@@ -1257,24 +1270,26 @@ void __init kmem_cache_init(void)
1257 sizes = malloc_sizes; 1270 sizes = malloc_sizes;
1258 names = cache_names; 1271 names = cache_names;
1259 1272
1260 /* Initialize the caches that provide memory for the array cache 1273 /*
1261 * and the kmem_list3 structures first. 1274 * Initialize the caches that provide memory for the array cache and the
1262 * Without this, further allocations will bug 1275 * kmem_list3 structures first. Without this, further allocations will
1276 * bug.
1263 */ 1277 */
1264 1278
1265 sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name, 1279 sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name,
1266 sizes[INDEX_AC].cs_size, 1280 sizes[INDEX_AC].cs_size,
1267 ARCH_KMALLOC_MINALIGN, 1281 ARCH_KMALLOC_MINALIGN,
1268 (ARCH_KMALLOC_FLAGS | 1282 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
1269 SLAB_PANIC), NULL, NULL); 1283 NULL, NULL);
1270 1284
1271 if (INDEX_AC != INDEX_L3) 1285 if (INDEX_AC != INDEX_L3) {
1272 sizes[INDEX_L3].cs_cachep = 1286 sizes[INDEX_L3].cs_cachep =
1273 kmem_cache_create(names[INDEX_L3].name, 1287 kmem_cache_create(names[INDEX_L3].name,
1274 sizes[INDEX_L3].cs_size, 1288 sizes[INDEX_L3].cs_size,
1275 ARCH_KMALLOC_MINALIGN, 1289 ARCH_KMALLOC_MINALIGN,
1276 (ARCH_KMALLOC_FLAGS | SLAB_PANIC), NULL, 1290 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
1277 NULL); 1291 NULL, NULL);
1292 }
1278 1293
1279 while (sizes->cs_size != ULONG_MAX) { 1294 while (sizes->cs_size != ULONG_MAX) {
1280 /* 1295 /*
@@ -1284,13 +1299,13 @@ void __init kmem_cache_init(void)
1284 * Note for systems short on memory removing the alignment will 1299 * Note for systems short on memory removing the alignment will
1285 * allow tighter packing of the smaller caches. 1300 * allow tighter packing of the smaller caches.
1286 */ 1301 */
1287 if (!sizes->cs_cachep) 1302 if (!sizes->cs_cachep) {
1288 sizes->cs_cachep = kmem_cache_create(names->name, 1303 sizes->cs_cachep = kmem_cache_create(names->name,
1289 sizes->cs_size, 1304 sizes->cs_size,
1290 ARCH_KMALLOC_MINALIGN, 1305 ARCH_KMALLOC_MINALIGN,
1291 (ARCH_KMALLOC_FLAGS 1306 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
1292 | SLAB_PANIC), 1307 NULL, NULL);
1293 NULL, NULL); 1308 }
1294 1309
1295 /* Inc off-slab bufctl limit until the ceiling is hit. */ 1310 /* Inc off-slab bufctl limit until the ceiling is hit. */
1296 if (!(OFF_SLAB(sizes->cs_cachep))) { 1311 if (!(OFF_SLAB(sizes->cs_cachep))) {
@@ -1299,13 +1314,11 @@ void __init kmem_cache_init(void)
1299 } 1314 }
1300 1315
1301 sizes->cs_dmacachep = kmem_cache_create(names->name_dma, 1316 sizes->cs_dmacachep = kmem_cache_create(names->name_dma,
1302 sizes->cs_size, 1317 sizes->cs_size,
1303 ARCH_KMALLOC_MINALIGN, 1318 ARCH_KMALLOC_MINALIGN,
1304 (ARCH_KMALLOC_FLAGS | 1319 ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA|
1305 SLAB_CACHE_DMA | 1320 SLAB_PANIC,
1306 SLAB_PANIC), NULL, 1321 NULL, NULL);
1307 NULL);
1308
1309 sizes++; 1322 sizes++;
1310 names++; 1323 names++;
1311 } 1324 }
@@ -1357,20 +1370,22 @@ void __init kmem_cache_init(void)
1357 struct kmem_cache *cachep; 1370 struct kmem_cache *cachep;
1358 mutex_lock(&cache_chain_mutex); 1371 mutex_lock(&cache_chain_mutex);
1359 list_for_each_entry(cachep, &cache_chain, next) 1372 list_for_each_entry(cachep, &cache_chain, next)
1360 enable_cpucache(cachep); 1373 enable_cpucache(cachep);
1361 mutex_unlock(&cache_chain_mutex); 1374 mutex_unlock(&cache_chain_mutex);
1362 } 1375 }
1363 1376
1364 /* Done! */ 1377 /* Done! */
1365 g_cpucache_up = FULL; 1378 g_cpucache_up = FULL;
1366 1379
1367 /* Register a cpu startup notifier callback 1380 /*
1368 * that initializes cpu_cache_get for all new cpus 1381 * Register a cpu startup notifier callback that initializes
1382 * cpu_cache_get for all new cpus
1369 */ 1383 */
1370 register_cpu_notifier(&cpucache_notifier); 1384 register_cpu_notifier(&cpucache_notifier);
1371 1385
1372 /* The reap timers are started later, with a module init call: 1386 /*
1373 * That part of the kernel is not yet operational. 1387 * The reap timers are started later, with a module init call: That part
1388 * of the kernel is not yet operational.
1374 */ 1389 */
1375} 1390}
1376 1391
@@ -1378,16 +1393,13 @@ static int __init cpucache_init(void)
1378{ 1393{
1379 int cpu; 1394 int cpu;
1380 1395
1381 /* 1396 /*
1382 * Register the timers that return unneeded 1397 * Register the timers that return unneeded pages to the page allocator
1383 * pages to gfp.
1384 */ 1398 */
1385 for_each_online_cpu(cpu) 1399 for_each_online_cpu(cpu)
1386 start_cpu_timer(cpu); 1400 start_cpu_timer(cpu);
1387
1388 return 0; 1401 return 0;
1389} 1402}
1390
1391__initcall(cpucache_init); 1403__initcall(cpucache_init);
1392 1404
1393/* 1405/*
@@ -1501,9 +1513,8 @@ static void dump_line(char *data, int offset, int limit)
1501{ 1513{
1502 int i; 1514 int i;
1503 printk(KERN_ERR "%03x:", offset); 1515 printk(KERN_ERR "%03x:", offset);
1504 for (i = 0; i < limit; i++) { 1516 for (i = 0; i < limit; i++)
1505 printk(" %02x", (unsigned char)data[offset + i]); 1517 printk(" %02x", (unsigned char)data[offset + i]);
1506 }
1507 printk("\n"); 1518 printk("\n");
1508} 1519}
1509#endif 1520#endif
@@ -1517,15 +1528,15 @@ static void print_objinfo(struct kmem_cache *cachep, void *objp, int lines)
1517 1528
1518 if (cachep->flags & SLAB_RED_ZONE) { 1529 if (cachep->flags & SLAB_RED_ZONE) {
1519 printk(KERN_ERR "Redzone: 0x%lx/0x%lx.\n", 1530 printk(KERN_ERR "Redzone: 0x%lx/0x%lx.\n",
1520 *dbg_redzone1(cachep, objp), 1531 *dbg_redzone1(cachep, objp),
1521 *dbg_redzone2(cachep, objp)); 1532 *dbg_redzone2(cachep, objp));
1522 } 1533 }
1523 1534
1524 if (cachep->flags & SLAB_STORE_USER) { 1535 if (cachep->flags & SLAB_STORE_USER) {
1525 printk(KERN_ERR "Last user: [<%p>]", 1536 printk(KERN_ERR "Last user: [<%p>]",
1526 *dbg_userword(cachep, objp)); 1537 *dbg_userword(cachep, objp));
1527 print_symbol("(%s)", 1538 print_symbol("(%s)",
1528 (unsigned long)*dbg_userword(cachep, objp)); 1539 (unsigned long)*dbg_userword(cachep, objp));
1529 printk("\n"); 1540 printk("\n");
1530 } 1541 }
1531 realobj = (char *)objp + obj_offset(cachep); 1542 realobj = (char *)objp + obj_offset(cachep);
@@ -1558,8 +1569,8 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp)
1558 /* Print header */ 1569 /* Print header */
1559 if (lines == 0) { 1570 if (lines == 0) {
1560 printk(KERN_ERR 1571 printk(KERN_ERR
1561 "Slab corruption: start=%p, len=%d\n", 1572 "Slab corruption: start=%p, len=%d\n",
1562 realobj, size); 1573 realobj, size);
1563 print_objinfo(cachep, objp, 0); 1574 print_objinfo(cachep, objp, 0);
1564 } 1575 }
1565 /* Hexdump the affected line */ 1576 /* Hexdump the affected line */
@@ -1614,11 +1625,10 @@ static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp)
1614 1625
1615 if (cachep->flags & SLAB_POISON) { 1626 if (cachep->flags & SLAB_POISON) {
1616#ifdef CONFIG_DEBUG_PAGEALLOC 1627#ifdef CONFIG_DEBUG_PAGEALLOC
1617 if ((cachep->buffer_size % PAGE_SIZE) == 0 1628 if (cachep->buffer_size % PAGE_SIZE == 0 &&
1618 && OFF_SLAB(cachep)) 1629 OFF_SLAB(cachep))
1619 kernel_map_pages(virt_to_page(objp), 1630 kernel_map_pages(virt_to_page(objp),
1620 cachep->buffer_size / PAGE_SIZE, 1631 cachep->buffer_size / PAGE_SIZE, 1);
1621 1);
1622 else 1632 else
1623 check_poison_obj(cachep, objp); 1633 check_poison_obj(cachep, objp);
1624#else 1634#else
@@ -1650,10 +1660,10 @@ static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp)
1650} 1660}
1651#endif 1661#endif
1652 1662
1653/** 1663/*
1654 * Destroy all the objs in a slab, and release the mem back to the system. 1664 * Destroy all the objs in a slab, and release the mem back to the system.
1655 * Before calling the slab must have been unlinked from the cache. 1665 * Before calling the slab must have been unlinked from the cache. The
1656 * The cache-lock is not held/needed. 1666 * cache-lock is not held/needed.
1657 */ 1667 */
1658static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp) 1668static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp)
1659{ 1669{
@@ -1674,8 +1684,10 @@ static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp)
1674 } 1684 }
1675} 1685}
1676 1686
1677/* For setting up all the kmem_list3s for cache whose buffer_size is same 1687/*
1678 as size of kmem_list3. */ 1688 * For setting up all the kmem_list3s for cache whose buffer_size is same as
1689 * size of kmem_list3.
1690 */
1679static void set_up_list3s(struct kmem_cache *cachep, int index) 1691static void set_up_list3s(struct kmem_cache *cachep, int index)
1680{ 1692{
1681 int node; 1693 int node;
@@ -1701,13 +1713,13 @@ static void set_up_list3s(struct kmem_cache *cachep, int index)
1701 * high order pages for slabs. When the gfp() functions are more friendly 1713 * high order pages for slabs. When the gfp() functions are more friendly
1702 * towards high-order requests, this should be changed. 1714 * towards high-order requests, this should be changed.
1703 */ 1715 */
1704static inline size_t calculate_slab_order(struct kmem_cache *cachep, 1716static size_t calculate_slab_order(struct kmem_cache *cachep,
1705 size_t size, size_t align, unsigned long flags) 1717 size_t size, size_t align, unsigned long flags)
1706{ 1718{
1707 size_t left_over = 0; 1719 size_t left_over = 0;
1708 int gfporder; 1720 int gfporder;
1709 1721
1710 for (gfporder = 0 ; gfporder <= MAX_GFP_ORDER; gfporder++) { 1722 for (gfporder = 0; gfporder <= MAX_GFP_ORDER; gfporder++) {
1711 unsigned int num; 1723 unsigned int num;
1712 size_t remainder; 1724 size_t remainder;
1713 1725
@@ -1742,7 +1754,7 @@ static inline size_t calculate_slab_order(struct kmem_cache *cachep,
1742 /* 1754 /*
1743 * Acceptable internal fragmentation? 1755 * Acceptable internal fragmentation?
1744 */ 1756 */
1745 if ((left_over * 8) <= (PAGE_SIZE << gfporder)) 1757 if (left_over * 8 <= (PAGE_SIZE << gfporder))
1746 break; 1758 break;
1747 } 1759 }
1748 return left_over; 1760 return left_over;
@@ -1817,9 +1829,8 @@ static void setup_cpu_cache(struct kmem_cache *cachep)
1817 * and the @dtor is run before the pages are handed back. 1829 * and the @dtor is run before the pages are handed back.
1818 * 1830 *
1819 * @name must be valid until the cache is destroyed. This implies that 1831 * @name must be valid until the cache is destroyed. This implies that
1820 * the module calling this has to destroy the cache before getting 1832 * the module calling this has to destroy the cache before getting unloaded.
1821 * unloaded. 1833 *
1822 *
1823 * The flags are 1834 * The flags are
1824 * 1835 *
1825 * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5) 1836 * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
@@ -1837,7 +1848,8 @@ static void setup_cpu_cache(struct kmem_cache *cachep)
1837 */ 1848 */
1838struct kmem_cache * 1849struct kmem_cache *
1839kmem_cache_create (const char *name, size_t size, size_t align, 1850kmem_cache_create (const char *name, size_t size, size_t align,
1840 unsigned long flags, void (*ctor)(void*, struct kmem_cache *, unsigned long), 1851 unsigned long flags,
1852 void (*ctor)(void*, struct kmem_cache *, unsigned long),
1841 void (*dtor)(void*, struct kmem_cache *, unsigned long)) 1853 void (*dtor)(void*, struct kmem_cache *, unsigned long))
1842{ 1854{
1843 size_t left_over, slab_size, ralign; 1855 size_t left_over, slab_size, ralign;
@@ -1847,12 +1859,10 @@ kmem_cache_create (const char *name, size_t size, size_t align,
1847 /* 1859 /*
1848 * Sanity checks... these are all serious usage bugs. 1860 * Sanity checks... these are all serious usage bugs.
1849 */ 1861 */
1850 if ((!name) || 1862 if (!name || in_interrupt() || (size < BYTES_PER_WORD) ||
1851 in_interrupt() ||
1852 (size < BYTES_PER_WORD) ||
1853 (size > (1 << MAX_OBJ_ORDER) * PAGE_SIZE) || (dtor && !ctor)) { 1863 (size > (1 << MAX_OBJ_ORDER) * PAGE_SIZE) || (dtor && !ctor)) {
1854 printk(KERN_ERR "%s: Early error in slab %s\n", 1864 printk(KERN_ERR "%s: Early error in slab %s\n", __FUNCTION__,
1855 __FUNCTION__, name); 1865 name);
1856 BUG(); 1866 BUG();
1857 } 1867 }
1858 1868
@@ -1906,8 +1916,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
1906 * above the next power of two: caches with object sizes just above a 1916 * above the next power of two: caches with object sizes just above a
1907 * power of two have a significant amount of internal fragmentation. 1917 * power of two have a significant amount of internal fragmentation.
1908 */ 1918 */
1909 if ((size < 4096 1919 if (size < 4096 || fls(size - 1) == fls(size-1 + 3 * BYTES_PER_WORD))
1910 || fls(size - 1) == fls(size - 1 + 3 * BYTES_PER_WORD)))
1911 flags |= SLAB_RED_ZONE | SLAB_STORE_USER; 1920 flags |= SLAB_RED_ZONE | SLAB_STORE_USER;
1912 if (!(flags & SLAB_DESTROY_BY_RCU)) 1921 if (!(flags & SLAB_DESTROY_BY_RCU))
1913 flags |= SLAB_POISON; 1922 flags |= SLAB_POISON;
@@ -1919,13 +1928,14 @@ kmem_cache_create (const char *name, size_t size, size_t align,
1919 BUG_ON(dtor); 1928 BUG_ON(dtor);
1920 1929
1921 /* 1930 /*
1922 * Always checks flags, a caller might be expecting debug 1931 * Always checks flags, a caller might be expecting debug support which
1923 * support which isn't available. 1932 * isn't available.
1924 */ 1933 */
1925 if (flags & ~CREATE_MASK) 1934 if (flags & ~CREATE_MASK)
1926 BUG(); 1935 BUG();
1927 1936
1928 /* Check that size is in terms of words. This is needed to avoid 1937 /*
1938 * Check that size is in terms of words. This is needed to avoid
1929 * unaligned accesses for some archs when redzoning is used, and makes 1939 * unaligned accesses for some archs when redzoning is used, and makes
1930 * sure any on-slab bufctl's are also correctly aligned. 1940 * sure any on-slab bufctl's are also correctly aligned.
1931 */ 1941 */
@@ -1934,12 +1944,14 @@ kmem_cache_create (const char *name, size_t size, size_t align,
1934 size &= ~(BYTES_PER_WORD - 1); 1944 size &= ~(BYTES_PER_WORD - 1);
1935 } 1945 }
1936 1946
1937 /* calculate out the final buffer alignment: */ 1947 /* calculate the final buffer alignment: */
1948
1938 /* 1) arch recommendation: can be overridden for debug */ 1949 /* 1) arch recommendation: can be overridden for debug */
1939 if (flags & SLAB_HWCACHE_ALIGN) { 1950 if (flags & SLAB_HWCACHE_ALIGN) {
1940 /* Default alignment: as specified by the arch code. 1951 /*
1941 * Except if an object is really small, then squeeze multiple 1952 * Default alignment: as specified by the arch code. Except if
1942 * objects into one cacheline. 1953 * an object is really small, then squeeze multiple objects into
1954 * one cacheline.
1943 */ 1955 */
1944 ralign = cache_line_size(); 1956 ralign = cache_line_size();
1945 while (size <= ralign / 2) 1957 while (size <= ralign / 2)
@@ -1959,7 +1971,8 @@ kmem_cache_create (const char *name, size_t size, size_t align,
1959 if (ralign > BYTES_PER_WORD) 1971 if (ralign > BYTES_PER_WORD)
1960 flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); 1972 flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
1961 } 1973 }
1962 /* 4) Store it. Note that the debug code below can reduce 1974 /*
1975 * 4) Store it. Note that the debug code below can reduce
1963 * the alignment to BYTES_PER_WORD. 1976 * the alignment to BYTES_PER_WORD.
1964 */ 1977 */
1965 align = ralign; 1978 align = ralign;
@@ -2058,7 +2071,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
2058 2071
2059 /* cache setup completed, link it into the list */ 2072 /* cache setup completed, link it into the list */
2060 list_add(&cachep->next, &cache_chain); 2073 list_add(&cachep->next, &cache_chain);
2061 oops: 2074oops:
2062 if (!cachep && (flags & SLAB_PANIC)) 2075 if (!cachep && (flags & SLAB_PANIC))
2063 panic("kmem_cache_create(): failed to create slab `%s'\n", 2076 panic("kmem_cache_create(): failed to create slab `%s'\n",
2064 name); 2077 name);
@@ -2109,7 +2122,6 @@ static void smp_call_function_all_cpus(void (*func)(void *arg), void *arg)
2109{ 2122{
2110 check_irq_on(); 2123 check_irq_on();
2111 preempt_disable(); 2124 preempt_disable();
2112
2113 local_irq_disable(); 2125 local_irq_disable();
2114 func(arg); 2126 func(arg);
2115 local_irq_enable(); 2127 local_irq_enable();
@@ -2120,12 +2132,12 @@ static void smp_call_function_all_cpus(void (*func)(void *arg), void *arg)
2120 preempt_enable(); 2132 preempt_enable();
2121} 2133}
2122 2134
2123static void drain_array_locked(struct kmem_cache *cachep, struct array_cache *ac, 2135static void drain_array_locked(struct kmem_cache *cachep,
2124 int force, int node); 2136 struct array_cache *ac, int force, int node);
2125 2137
2126static void do_drain(void *arg) 2138static void do_drain(void *arg)
2127{ 2139{
2128 struct kmem_cache *cachep = (struct kmem_cache *) arg; 2140 struct kmem_cache *cachep = arg;
2129 struct array_cache *ac; 2141 struct array_cache *ac;
2130 int node = numa_node_id(); 2142 int node = numa_node_id();
2131 2143
@@ -2273,16 +2285,15 @@ int kmem_cache_destroy(struct kmem_cache *cachep)
2273 2285
2274 /* NUMA: free the list3 structures */ 2286 /* NUMA: free the list3 structures */
2275 for_each_online_node(i) { 2287 for_each_online_node(i) {
2276 if ((l3 = cachep->nodelists[i])) { 2288 l3 = cachep->nodelists[i];
2289 if (l3) {
2277 kfree(l3->shared); 2290 kfree(l3->shared);
2278 free_alien_cache(l3->alien); 2291 free_alien_cache(l3->alien);
2279 kfree(l3); 2292 kfree(l3);
2280 } 2293 }
2281 } 2294 }
2282 kmem_cache_free(&cache_cache, cachep); 2295 kmem_cache_free(&cache_cache, cachep);
2283
2284 unlock_cpu_hotplug(); 2296 unlock_cpu_hotplug();
2285
2286 return 0; 2297 return 0;
2287} 2298}
2288EXPORT_SYMBOL(kmem_cache_destroy); 2299EXPORT_SYMBOL(kmem_cache_destroy);
@@ -2305,7 +2316,6 @@ static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp,
2305 slabp->inuse = 0; 2316 slabp->inuse = 0;
2306 slabp->colouroff = colour_off; 2317 slabp->colouroff = colour_off;
2307 slabp->s_mem = objp + colour_off; 2318 slabp->s_mem = objp + colour_off;
2308
2309 return slabp; 2319 return slabp;
2310} 2320}
2311 2321
@@ -2333,9 +2343,9 @@ static void cache_init_objs(struct kmem_cache *cachep,
2333 *dbg_redzone2(cachep, objp) = RED_INACTIVE; 2343 *dbg_redzone2(cachep, objp) = RED_INACTIVE;
2334 } 2344 }
2335 /* 2345 /*
2336 * Constructors are not allowed to allocate memory from 2346 * Constructors are not allowed to allocate memory from the same
2337 * the same cache which they are a constructor for. 2347 * cache which they are a constructor for. Otherwise, deadlock.
2338 * Otherwise, deadlock. They must also be threaded. 2348 * They must also be threaded.
2339 */ 2349 */
2340 if (cachep->ctor && !(cachep->flags & SLAB_POISON)) 2350 if (cachep->ctor && !(cachep->flags & SLAB_POISON))
2341 cachep->ctor(objp + obj_offset(cachep), cachep, 2351 cachep->ctor(objp + obj_offset(cachep), cachep,
@@ -2349,8 +2359,8 @@ static void cache_init_objs(struct kmem_cache *cachep,
2349 slab_error(cachep, "constructor overwrote the" 2359 slab_error(cachep, "constructor overwrote the"
2350 " start of an object"); 2360 " start of an object");
2351 } 2361 }
2352 if ((cachep->buffer_size % PAGE_SIZE) == 0 && OFF_SLAB(cachep) 2362 if ((cachep->buffer_size % PAGE_SIZE) == 0 &&
2353 && cachep->flags & SLAB_POISON) 2363 OFF_SLAB(cachep) && cachep->flags & SLAB_POISON)
2354 kernel_map_pages(virt_to_page(objp), 2364 kernel_map_pages(virt_to_page(objp),
2355 cachep->buffer_size / PAGE_SIZE, 0); 2365 cachep->buffer_size / PAGE_SIZE, 0);
2356#else 2366#else
@@ -2365,16 +2375,14 @@ static void cache_init_objs(struct kmem_cache *cachep,
2365 2375
2366static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags) 2376static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags)
2367{ 2377{
2368 if (flags & SLAB_DMA) { 2378 if (flags & SLAB_DMA)
2369 if (!(cachep->gfpflags & GFP_DMA)) 2379 BUG_ON(!(cachep->gfpflags & GFP_DMA));
2370 BUG(); 2380 else
2371 } else { 2381 BUG_ON(cachep->gfpflags & GFP_DMA);
2372 if (cachep->gfpflags & GFP_DMA)
2373 BUG();
2374 }
2375} 2382}
2376 2383
2377static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp, int nodeid) 2384static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp,
2385 int nodeid)
2378{ 2386{
2379 void *objp = index_to_obj(cachep, slabp, slabp->free); 2387 void *objp = index_to_obj(cachep, slabp, slabp->free);
2380 kmem_bufctl_t next; 2388 kmem_bufctl_t next;
@@ -2390,8 +2398,8 @@ static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp, int nod
2390 return objp; 2398 return objp;
2391} 2399}
2392 2400
2393static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp, void *objp, 2401static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp,
2394 int nodeid) 2402 void *objp, int nodeid)
2395{ 2403{
2396 unsigned int objnr = obj_to_index(cachep, slabp, objp); 2404 unsigned int objnr = obj_to_index(cachep, slabp, objp);
2397 2405
@@ -2401,7 +2409,7 @@ static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp, void *ob
2401 2409
2402 if (slab_bufctl(slabp)[objnr] != BUFCTL_FREE) { 2410 if (slab_bufctl(slabp)[objnr] != BUFCTL_FREE) {
2403 printk(KERN_ERR "slab: double free detected in cache " 2411 printk(KERN_ERR "slab: double free detected in cache "
2404 "'%s', objp %p\n", cachep->name, objp); 2412 "'%s', objp %p\n", cachep->name, objp);
2405 BUG(); 2413 BUG();
2406 } 2414 }
2407#endif 2415#endif
@@ -2410,7 +2418,8 @@ static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp, void *ob
2410 slabp->inuse--; 2418 slabp->inuse--;
2411} 2419}
2412 2420
2413static void set_slab_attr(struct kmem_cache *cachep, struct slab *slabp, void *objp) 2421static void set_slab_attr(struct kmem_cache *cachep, struct slab *slabp,
2422 void *objp)
2414{ 2423{
2415 int i; 2424 int i;
2416 struct page *page; 2425 struct page *page;
@@ -2438,8 +2447,9 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid)
2438 unsigned long ctor_flags; 2447 unsigned long ctor_flags;
2439 struct kmem_list3 *l3; 2448 struct kmem_list3 *l3;
2440 2449
2441 /* Be lazy and only check for valid flags here, 2450 /*
2442 * keeping it out of the critical path in kmem_cache_alloc(). 2451 * Be lazy and only check for valid flags here, keeping it out of the
2452 * critical path in kmem_cache_alloc().
2443 */ 2453 */
2444 if (flags & ~(SLAB_DMA | SLAB_LEVEL_MASK | SLAB_NO_GROW)) 2454 if (flags & ~(SLAB_DMA | SLAB_LEVEL_MASK | SLAB_NO_GROW))
2445 BUG(); 2455 BUG();
@@ -2480,14 +2490,17 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid)
2480 */ 2490 */
2481 kmem_flagcheck(cachep, flags); 2491 kmem_flagcheck(cachep, flags);
2482 2492
2483 /* Get mem for the objs. 2493 /*
2484 * Attempt to allocate a physical page from 'nodeid', 2494 * Get mem for the objs. Attempt to allocate a physical page from
2495 * 'nodeid'.
2485 */ 2496 */
2486 if (!(objp = kmem_getpages(cachep, flags, nodeid))) 2497 objp = kmem_getpages(cachep, flags, nodeid);
2498 if (!objp)
2487 goto failed; 2499 goto failed;
2488 2500
2489 /* Get slab management. */ 2501 /* Get slab management. */
2490 if (!(slabp = alloc_slabmgmt(cachep, objp, offset, local_flags))) 2502 slabp = alloc_slabmgmt(cachep, objp, offset, local_flags);
2503 if (!slabp)
2491 goto opps1; 2504 goto opps1;
2492 2505
2493 slabp->nodeid = nodeid; 2506 slabp->nodeid = nodeid;
@@ -2506,9 +2519,9 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid)
2506 l3->free_objects += cachep->num; 2519 l3->free_objects += cachep->num;
2507 spin_unlock(&l3->list_lock); 2520 spin_unlock(&l3->list_lock);
2508 return 1; 2521 return 1;
2509 opps1: 2522opps1:
2510 kmem_freepages(cachep, objp); 2523 kmem_freepages(cachep, objp);
2511 failed: 2524failed:
2512 if (local_flags & __GFP_WAIT) 2525 if (local_flags & __GFP_WAIT)
2513 local_irq_disable(); 2526 local_irq_disable();
2514 return 0; 2527 return 0;
@@ -2551,8 +2564,8 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
2551 page = virt_to_page(objp); 2564 page = virt_to_page(objp);
2552 2565
2553 if (page_get_cache(page) != cachep) { 2566 if (page_get_cache(page) != cachep) {
2554 printk(KERN_ERR 2567 printk(KERN_ERR "mismatch in kmem_cache_free: expected "
2555 "mismatch in kmem_cache_free: expected cache %p, got %p\n", 2568 "cache %p, got %p\n",
2556 page_get_cache(page), cachep); 2569 page_get_cache(page), cachep);
2557 printk(KERN_ERR "%p is %s.\n", cachep, cachep->name); 2570 printk(KERN_ERR "%p is %s.\n", cachep, cachep->name);
2558 printk(KERN_ERR "%p is %s.\n", page_get_cache(page), 2571 printk(KERN_ERR "%p is %s.\n", page_get_cache(page),
@@ -2562,13 +2575,12 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
2562 slabp = page_get_slab(page); 2575 slabp = page_get_slab(page);
2563 2576
2564 if (cachep->flags & SLAB_RED_ZONE) { 2577 if (cachep->flags & SLAB_RED_ZONE) {
2565 if (*dbg_redzone1(cachep, objp) != RED_ACTIVE 2578 if (*dbg_redzone1(cachep, objp) != RED_ACTIVE ||
2566 || *dbg_redzone2(cachep, objp) != RED_ACTIVE) { 2579 *dbg_redzone2(cachep, objp) != RED_ACTIVE) {
2567 slab_error(cachep, 2580 slab_error(cachep, "double free, or memory outside"
2568 "double free, or memory outside" 2581 " object was overwritten");
2569 " object was overwritten"); 2582 printk(KERN_ERR "%p: redzone 1:0x%lx, "
2570 printk(KERN_ERR 2583 "redzone 2:0x%lx.\n",
2571 "%p: redzone 1: 0x%lx, redzone 2: 0x%lx.\n",
2572 objp, *dbg_redzone1(cachep, objp), 2584 objp, *dbg_redzone1(cachep, objp),
2573 *dbg_redzone2(cachep, objp)); 2585 *dbg_redzone2(cachep, objp));
2574 } 2586 }
@@ -2584,9 +2596,10 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
2584 BUG_ON(objp != index_to_obj(cachep, slabp, objnr)); 2596 BUG_ON(objp != index_to_obj(cachep, slabp, objnr));
2585 2597
2586 if (cachep->flags & SLAB_DEBUG_INITIAL) { 2598 if (cachep->flags & SLAB_DEBUG_INITIAL) {
2587 /* Need to call the slab's constructor so the 2599 /*
2588 * caller can perform a verify of its state (debugging). 2600 * Need to call the slab's constructor so the caller can
2589 * Called without the cache-lock held. 2601 * perform a verify of its state (debugging). Called without
2602 * the cache-lock held.
2590 */ 2603 */
2591 cachep->ctor(objp + obj_offset(cachep), 2604 cachep->ctor(objp + obj_offset(cachep),
2592 cachep, SLAB_CTOR_CONSTRUCTOR | SLAB_CTOR_VERIFY); 2605 cachep, SLAB_CTOR_CONSTRUCTOR | SLAB_CTOR_VERIFY);
@@ -2599,7 +2612,7 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
2599 } 2612 }
2600 if (cachep->flags & SLAB_POISON) { 2613 if (cachep->flags & SLAB_POISON) {
2601#ifdef CONFIG_DEBUG_PAGEALLOC 2614#ifdef CONFIG_DEBUG_PAGEALLOC
2602 if ((cachep->buffer_size % PAGE_SIZE) == 0 && OFF_SLAB(cachep)) { 2615 if ((cachep->buffer_size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) {
2603 store_stackinfo(cachep, objp, (unsigned long)caller); 2616 store_stackinfo(cachep, objp, (unsigned long)caller);
2604 kernel_map_pages(virt_to_page(objp), 2617 kernel_map_pages(virt_to_page(objp),
2605 cachep->buffer_size / PAGE_SIZE, 0); 2618 cachep->buffer_size / PAGE_SIZE, 0);
@@ -2625,14 +2638,14 @@ static void check_slabp(struct kmem_cache *cachep, struct slab *slabp)
2625 goto bad; 2638 goto bad;
2626 } 2639 }
2627 if (entries != cachep->num - slabp->inuse) { 2640 if (entries != cachep->num - slabp->inuse) {
2628 bad: 2641bad:
2629 printk(KERN_ERR 2642 printk(KERN_ERR "slab: Internal list corruption detected in "
2630 "slab: Internal list corruption detected in cache '%s'(%d), slabp %p(%d). Hexdump:\n", 2643 "cache '%s'(%d), slabp %p(%d). Hexdump:\n",
2631 cachep->name, cachep->num, slabp, slabp->inuse); 2644 cachep->name, cachep->num, slabp, slabp->inuse);
2632 for (i = 0; 2645 for (i = 0;
2633 i < sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t); 2646 i < sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t);
2634 i++) { 2647 i++) {
2635 if ((i % 16) == 0) 2648 if (i % 16 == 0)
2636 printk("\n%03x:", i); 2649 printk("\n%03x:", i);
2637 printk(" %02x", ((unsigned char *)slabp)[i]); 2650 printk(" %02x", ((unsigned char *)slabp)[i]);
2638 } 2651 }
@@ -2654,12 +2667,13 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
2654 2667
2655 check_irq_off(); 2668 check_irq_off();
2656 ac = cpu_cache_get(cachep); 2669 ac = cpu_cache_get(cachep);
2657 retry: 2670retry:
2658 batchcount = ac->batchcount; 2671 batchcount = ac->batchcount;
2659 if (!ac->touched && batchcount > BATCHREFILL_LIMIT) { 2672 if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
2660 /* if there was little recent activity on this 2673 /*
2661 * cache, then perform only a partial refill. 2674 * If there was little recent activity on this cache, then
2662 * Otherwise we could generate refill bouncing. 2675 * perform only a partial refill. Otherwise we could generate
2676 * refill bouncing.
2663 */ 2677 */
2664 batchcount = BATCHREFILL_LIMIT; 2678 batchcount = BATCHREFILL_LIMIT;
2665 } 2679 }
@@ -2715,29 +2729,29 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
2715 list_add(&slabp->list, &l3->slabs_partial); 2729 list_add(&slabp->list, &l3->slabs_partial);
2716 } 2730 }
2717 2731
2718 must_grow: 2732must_grow:
2719 l3->free_objects -= ac->avail; 2733 l3->free_objects -= ac->avail;
2720 alloc_done: 2734alloc_done:
2721 spin_unlock(&l3->list_lock); 2735 spin_unlock(&l3->list_lock);
2722 2736
2723 if (unlikely(!ac->avail)) { 2737 if (unlikely(!ac->avail)) {
2724 int x; 2738 int x;
2725 x = cache_grow(cachep, flags, numa_node_id()); 2739 x = cache_grow(cachep, flags, numa_node_id());
2726 2740
2727 // cache_grow can reenable interrupts, then ac could change. 2741 /* cache_grow can reenable interrupts, then ac could change. */
2728 ac = cpu_cache_get(cachep); 2742 ac = cpu_cache_get(cachep);
2729 if (!x && ac->avail == 0) // no objects in sight? abort 2743 if (!x && ac->avail == 0) /* no objects in sight? abort */
2730 return NULL; 2744 return NULL;
2731 2745
2732 if (!ac->avail) // objects refilled by interrupt? 2746 if (!ac->avail) /* objects refilled by interrupt? */
2733 goto retry; 2747 goto retry;
2734 } 2748 }
2735 ac->touched = 1; 2749 ac->touched = 1;
2736 return ac->entry[--ac->avail]; 2750 return ac->entry[--ac->avail];
2737} 2751}
2738 2752
2739static inline void 2753static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep,
2740cache_alloc_debugcheck_before(struct kmem_cache *cachep, gfp_t flags) 2754 gfp_t flags)
2741{ 2755{
2742 might_sleep_if(flags & __GFP_WAIT); 2756 might_sleep_if(flags & __GFP_WAIT);
2743#if DEBUG 2757#if DEBUG
@@ -2746,8 +2760,8 @@ cache_alloc_debugcheck_before(struct kmem_cache *cachep, gfp_t flags)
2746} 2760}
2747 2761
2748#if DEBUG 2762#if DEBUG
2749static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, gfp_t flags, 2763static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
2750 void *objp, void *caller) 2764 gfp_t flags, void *objp, void *caller)
2751{ 2765{
2752 if (!objp) 2766 if (!objp)
2753 return objp; 2767 return objp;
@@ -2767,15 +2781,14 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, gfp_t flags
2767 *dbg_userword(cachep, objp) = caller; 2781 *dbg_userword(cachep, objp) = caller;
2768 2782
2769 if (cachep->flags & SLAB_RED_ZONE) { 2783 if (cachep->flags & SLAB_RED_ZONE) {
2770 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE 2784 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE ||
2771 || *dbg_redzone2(cachep, objp) != RED_INACTIVE) { 2785 *dbg_redzone2(cachep, objp) != RED_INACTIVE) {
2772 slab_error(cachep, 2786 slab_error(cachep, "double free, or memory outside"
2773 "double free, or memory outside" 2787 " object was overwritten");
2774 " object was overwritten");
2775 printk(KERN_ERR 2788 printk(KERN_ERR
2776 "%p: redzone 1: 0x%lx, redzone 2: 0x%lx.\n", 2789 "%p: redzone 1:0x%lx, redzone 2:0x%lx\n",
2777 objp, *dbg_redzone1(cachep, objp), 2790 objp, *dbg_redzone1(cachep, objp),
2778 *dbg_redzone2(cachep, objp)); 2791 *dbg_redzone2(cachep, objp));
2779 } 2792 }
2780 *dbg_redzone1(cachep, objp) = RED_ACTIVE; 2793 *dbg_redzone1(cachep, objp) = RED_ACTIVE;
2781 *dbg_redzone2(cachep, objp) = RED_ACTIVE; 2794 *dbg_redzone2(cachep, objp) = RED_ACTIVE;
@@ -2822,8 +2835,8 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
2822 return objp; 2835 return objp;
2823} 2836}
2824 2837
2825static __always_inline void * 2838static __always_inline void *__cache_alloc(struct kmem_cache *cachep,
2826__cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller) 2839 gfp_t flags, void *caller)
2827{ 2840{
2828 unsigned long save_flags; 2841 unsigned long save_flags;
2829 void *objp; 2842 void *objp;
@@ -2843,7 +2856,8 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
2843/* 2856/*
2844 * A interface to enable slab creation on nodeid 2857 * A interface to enable slab creation on nodeid
2845 */ 2858 */
2846static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) 2859static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
2860 int nodeid)
2847{ 2861{
2848 struct list_head *entry; 2862 struct list_head *entry;
2849 struct slab *slabp; 2863 struct slab *slabp;
@@ -2854,7 +2868,7 @@ static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int node
2854 l3 = cachep->nodelists[nodeid]; 2868 l3 = cachep->nodelists[nodeid];
2855 BUG_ON(!l3); 2869 BUG_ON(!l3);
2856 2870
2857 retry: 2871retry:
2858 check_irq_off(); 2872 check_irq_off();
2859 spin_lock(&l3->list_lock); 2873 spin_lock(&l3->list_lock);
2860 entry = l3->slabs_partial.next; 2874 entry = l3->slabs_partial.next;
@@ -2881,16 +2895,15 @@ static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int node
2881 /* move slabp to correct slabp list: */ 2895 /* move slabp to correct slabp list: */
2882 list_del(&slabp->list); 2896 list_del(&slabp->list);
2883 2897
2884 if (slabp->free == BUFCTL_END) { 2898 if (slabp->free == BUFCTL_END)
2885 list_add(&slabp->list, &l3->slabs_full); 2899 list_add(&slabp->list, &l3->slabs_full);
2886 } else { 2900 else
2887 list_add(&slabp->list, &l3->slabs_partial); 2901 list_add(&slabp->list, &l3->slabs_partial);
2888 }
2889 2902
2890 spin_unlock(&l3->list_lock); 2903 spin_unlock(&l3->list_lock);
2891 goto done; 2904 goto done;
2892 2905
2893 must_grow: 2906must_grow:
2894 spin_unlock(&l3->list_lock); 2907 spin_unlock(&l3->list_lock);
2895 x = cache_grow(cachep, flags, nodeid); 2908 x = cache_grow(cachep, flags, nodeid);
2896 2909
@@ -2898,7 +2911,7 @@ static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int node
2898 return NULL; 2911 return NULL;
2899 2912
2900 goto retry; 2913 goto retry;
2901 done: 2914done:
2902 return obj; 2915 return obj;
2903} 2916}
2904#endif 2917#endif
@@ -2971,7 +2984,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
2971 } 2984 }
2972 2985
2973 free_block(cachep, ac->entry, batchcount, node); 2986 free_block(cachep, ac->entry, batchcount, node);
2974 free_done: 2987free_done:
2975#if STATS 2988#if STATS
2976 { 2989 {
2977 int i = 0; 2990 int i = 0;
@@ -2992,16 +3005,12 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
2992#endif 3005#endif
2993 spin_unlock(&l3->list_lock); 3006 spin_unlock(&l3->list_lock);
2994 ac->avail -= batchcount; 3007 ac->avail -= batchcount;
2995 memmove(ac->entry, &(ac->entry[batchcount]), 3008 memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail);
2996 sizeof(void *) * ac->avail);
2997} 3009}
2998 3010
2999/* 3011/*
3000 * __cache_free 3012 * Release an obj back to its cache. If the obj has a constructed state, it must
3001 * Release an obj back to its cache. If the obj has a constructed 3013 * be in this state _before_ it is released. Called with disabled ints.
3002 * state, it must be in this state _before_ it is released.
3003 *
3004 * Called with disabled ints.
3005 */ 3014 */
3006static inline void __cache_free(struct kmem_cache *cachep, void *objp) 3015static inline void __cache_free(struct kmem_cache *cachep, void *objp)
3007{ 3016{
@@ -3020,9 +3029,9 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp)
3020 if (unlikely(slabp->nodeid != numa_node_id())) { 3029 if (unlikely(slabp->nodeid != numa_node_id())) {
3021 struct array_cache *alien = NULL; 3030 struct array_cache *alien = NULL;
3022 int nodeid = slabp->nodeid; 3031 int nodeid = slabp->nodeid;
3023 struct kmem_list3 *l3 = 3032 struct kmem_list3 *l3;
3024 cachep->nodelists[numa_node_id()];
3025 3033
3034 l3 = cachep->nodelists[numa_node_id()];
3026 STATS_INC_NODEFREES(cachep); 3035 STATS_INC_NODEFREES(cachep);
3027 if (l3->alien && l3->alien[nodeid]) { 3036 if (l3->alien && l3->alien[nodeid]) {
3028 alien = l3->alien[nodeid]; 3037 alien = l3->alien[nodeid];
@@ -3106,7 +3115,7 @@ int fastcall kmem_ptr_validate(struct kmem_cache *cachep, void *ptr)
3106 if (unlikely(page_get_cache(page) != cachep)) 3115 if (unlikely(page_get_cache(page) != cachep))
3107 goto out; 3116 goto out;
3108 return 1; 3117 return 1;
3109 out: 3118out:
3110 return 0; 3119 return 0;
3111} 3120}
3112 3121
@@ -3132,7 +3141,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
3132 local_irq_save(save_flags); 3141 local_irq_save(save_flags);
3133 3142
3134 if (nodeid == -1 || nodeid == numa_node_id() || 3143 if (nodeid == -1 || nodeid == numa_node_id() ||
3135 !cachep->nodelists[nodeid]) 3144 !cachep->nodelists[nodeid])
3136 ptr = ____cache_alloc(cachep, flags); 3145 ptr = ____cache_alloc(cachep, flags);
3137 else 3146 else
3138 ptr = __cache_alloc_node(cachep, flags, nodeid); 3147 ptr = __cache_alloc_node(cachep, flags, nodeid);
@@ -3249,7 +3258,7 @@ void *__alloc_percpu(size_t size)
3249 /* Catch derefs w/o wrappers */ 3258 /* Catch derefs w/o wrappers */
3250 return (void *)(~(unsigned long)pdata); 3259 return (void *)(~(unsigned long)pdata);
3251 3260
3252 unwind_oom: 3261unwind_oom:
3253 while (--i >= 0) { 3262 while (--i >= 0) {
3254 if (!cpu_possible(i)) 3263 if (!cpu_possible(i))
3255 continue; 3264 continue;
@@ -3352,18 +3361,20 @@ static int alloc_kmemlist(struct kmem_cache *cachep)
3352 struct array_cache *nc = NULL, *new; 3361 struct array_cache *nc = NULL, *new;
3353 struct array_cache **new_alien = NULL; 3362 struct array_cache **new_alien = NULL;
3354#ifdef CONFIG_NUMA 3363#ifdef CONFIG_NUMA
3355 if (!(new_alien = alloc_alien_cache(node, cachep->limit))) 3364 new_alien = alloc_alien_cache(node, cachep->limit);
3365 if (!new_alien)
3356 goto fail; 3366 goto fail;
3357#endif 3367#endif
3358 if (!(new = alloc_arraycache(node, (cachep->shared * 3368 new = alloc_arraycache(node, cachep->shared*cachep->batchcount,
3359 cachep->batchcount), 3369 0xbaadf00d);
3360 0xbaadf00d))) 3370 if (!new)
3361 goto fail; 3371 goto fail;
3362 if ((l3 = cachep->nodelists[node])) { 3372 l3 = cachep->nodelists[node];
3363 3373 if (l3) {
3364 spin_lock_irq(&l3->list_lock); 3374 spin_lock_irq(&l3->list_lock);
3365 3375
3366 if ((nc = cachep->nodelists[node]->shared)) 3376 nc = cachep->nodelists[node]->shared;
3377 if (nc)
3367 free_block(cachep, nc->entry, nc->avail, node); 3378 free_block(cachep, nc->entry, nc->avail, node);
3368 3379
3369 l3->shared = new; 3380 l3->shared = new;
@@ -3372,27 +3383,27 @@ static int alloc_kmemlist(struct kmem_cache *cachep)
3372 new_alien = NULL; 3383 new_alien = NULL;
3373 } 3384 }
3374 l3->free_limit = (1 + nr_cpus_node(node)) * 3385 l3->free_limit = (1 + nr_cpus_node(node)) *
3375 cachep->batchcount + cachep->num; 3386 cachep->batchcount + cachep->num;
3376 spin_unlock_irq(&l3->list_lock); 3387 spin_unlock_irq(&l3->list_lock);
3377 kfree(nc); 3388 kfree(nc);
3378 free_alien_cache(new_alien); 3389 free_alien_cache(new_alien);
3379 continue; 3390 continue;
3380 } 3391 }
3381 if (!(l3 = kmalloc_node(sizeof(struct kmem_list3), 3392 l3 = kmalloc_node(sizeof(struct kmem_list3), GFP_KERNEL, node);
3382 GFP_KERNEL, node))) 3393 if (!l3)
3383 goto fail; 3394 goto fail;
3384 3395
3385 kmem_list3_init(l3); 3396 kmem_list3_init(l3);
3386 l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + 3397 l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
3387 ((unsigned long)cachep) % REAPTIMEOUT_LIST3; 3398 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
3388 l3->shared = new; 3399 l3->shared = new;
3389 l3->alien = new_alien; 3400 l3->alien = new_alien;
3390 l3->free_limit = (1 + nr_cpus_node(node)) * 3401 l3->free_limit = (1 + nr_cpus_node(node)) *
3391 cachep->batchcount + cachep->num; 3402 cachep->batchcount + cachep->num;
3392 cachep->nodelists[node] = l3; 3403 cachep->nodelists[node] = l3;
3393 } 3404 }
3394 return err; 3405 return err;
3395 fail: 3406fail:
3396 err = -ENOMEM; 3407 err = -ENOMEM;
3397 return err; 3408 return err;
3398} 3409}
@@ -3404,7 +3415,7 @@ struct ccupdate_struct {
3404 3415
3405static void do_ccupdate_local(void *info) 3416static void do_ccupdate_local(void *info)
3406{ 3417{
3407 struct ccupdate_struct *new = (struct ccupdate_struct *)info; 3418 struct ccupdate_struct *new = info;
3408 struct array_cache *old; 3419 struct array_cache *old;
3409 3420
3410 check_irq_off(); 3421 check_irq_off();
@@ -3414,16 +3425,16 @@ static void do_ccupdate_local(void *info)
3414 new->new[smp_processor_id()] = old; 3425 new->new[smp_processor_id()] = old;
3415} 3426}
3416 3427
3417static int do_tune_cpucache(struct kmem_cache *cachep, int limit, int batchcount, 3428static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
3418 int shared) 3429 int batchcount, int shared)
3419{ 3430{
3420 struct ccupdate_struct new; 3431 struct ccupdate_struct new;
3421 int i, err; 3432 int i, err;
3422 3433
3423 memset(&new.new, 0, sizeof(new.new)); 3434 memset(&new.new, 0, sizeof(new.new));
3424 for_each_online_cpu(i) { 3435 for_each_online_cpu(i) {
3425 new.new[i] = 3436 new.new[i] = alloc_arraycache(cpu_to_node(i), limit,
3426 alloc_arraycache(cpu_to_node(i), limit, batchcount); 3437 batchcount);
3427 if (!new.new[i]) { 3438 if (!new.new[i]) {
3428 for (i--; i >= 0; i--) 3439 for (i--; i >= 0; i--)
3429 kfree(new.new[i]); 3440 kfree(new.new[i]);
@@ -3465,10 +3476,11 @@ static void enable_cpucache(struct kmem_cache *cachep)
3465 int err; 3476 int err;
3466 int limit, shared; 3477 int limit, shared;
3467 3478
3468 /* The head array serves three purposes: 3479 /*
3480 * The head array serves three purposes:
3469 * - create a LIFO ordering, i.e. return objects that are cache-warm 3481 * - create a LIFO ordering, i.e. return objects that are cache-warm
3470 * - reduce the number of spinlock operations. 3482 * - reduce the number of spinlock operations.
3471 * - reduce the number of linked list operations on the slab and 3483 * - reduce the number of linked list operations on the slab and
3472 * bufctl chains: array operations are cheaper. 3484 * bufctl chains: array operations are cheaper.
3473 * The numbers are guessed, we should auto-tune as described by 3485 * The numbers are guessed, we should auto-tune as described by
3474 * Bonwick. 3486 * Bonwick.
@@ -3484,7 +3496,8 @@ static void enable_cpucache(struct kmem_cache *cachep)
3484 else 3496 else
3485 limit = 120; 3497 limit = 120;
3486 3498
3487 /* Cpu bound tasks (e.g. network routing) can exhibit cpu bound 3499 /*
3500 * CPU bound tasks (e.g. network routing) can exhibit cpu bound
3488 * allocation behaviour: Most allocs on one cpu, most free operations 3501 * allocation behaviour: Most allocs on one cpu, most free operations
3489 * on another cpu. For these cases, an efficient object passing between 3502 * on another cpu. For these cases, an efficient object passing between
3490 * cpus is necessary. This is provided by a shared array. The array 3503 * cpus is necessary. This is provided by a shared array. The array
@@ -3499,9 +3512,9 @@ static void enable_cpucache(struct kmem_cache *cachep)
3499#endif 3512#endif
3500 3513
3501#if DEBUG 3514#if DEBUG
3502 /* With debugging enabled, large batchcount lead to excessively 3515 /*
3503 * long periods with disabled local interrupts. Limit the 3516 * With debugging enabled, large batchcount lead to excessively long
3504 * batchcount 3517 * periods with disabled local interrupts. Limit the batchcount
3505 */ 3518 */
3506 if (limit > 32) 3519 if (limit > 32)
3507 limit = 32; 3520 limit = 32;
@@ -3512,8 +3525,8 @@ static void enable_cpucache(struct kmem_cache *cachep)
3512 cachep->name, -err); 3525 cachep->name, -err);
3513} 3526}
3514 3527
3515static void drain_array_locked(struct kmem_cache *cachep, struct array_cache *ac, 3528static void drain_array_locked(struct kmem_cache *cachep,
3516 int force, int node) 3529 struct array_cache *ac, int force, int node)
3517{ 3530{
3518 int tofree; 3531 int tofree;
3519 3532
@@ -3522,9 +3535,8 @@ static void drain_array_locked(struct kmem_cache *cachep, struct array_cache *ac
3522 ac->touched = 0; 3535 ac->touched = 0;
3523 } else if (ac->avail) { 3536 } else if (ac->avail) {
3524 tofree = force ? ac->avail : (ac->limit + 4) / 5; 3537 tofree = force ? ac->avail : (ac->limit + 4) / 5;
3525 if (tofree > ac->avail) { 3538 if (tofree > ac->avail)
3526 tofree = (ac->avail + 1) / 2; 3539 tofree = (ac->avail + 1) / 2;
3527 }
3528 free_block(cachep, ac->entry, tofree, node); 3540 free_block(cachep, ac->entry, tofree, node);
3529 ac->avail -= tofree; 3541 ac->avail -= tofree;
3530 memmove(ac->entry, &(ac->entry[tofree]), 3542 memmove(ac->entry, &(ac->entry[tofree]),
@@ -3541,8 +3553,8 @@ static void drain_array_locked(struct kmem_cache *cachep, struct array_cache *ac
3541 * - clear the per-cpu caches for this CPU. 3553 * - clear the per-cpu caches for this CPU.
3542 * - return freeable pages to the main free memory pool. 3554 * - return freeable pages to the main free memory pool.
3543 * 3555 *
3544 * If we cannot acquire the cache chain mutex then just give up - we'll 3556 * If we cannot acquire the cache chain mutex then just give up - we'll try
3545 * try again on the next iteration. 3557 * again on the next iteration.
3546 */ 3558 */
3547static void cache_reap(void *unused) 3559static void cache_reap(void *unused)
3548{ 3560{
@@ -3590,9 +3602,8 @@ static void cache_reap(void *unused)
3590 goto next_unlock; 3602 goto next_unlock;
3591 } 3603 }
3592 3604
3593 tofree = 3605 tofree = (l3->free_limit + 5 * searchp->num - 1) /
3594 (l3->free_limit + 5 * searchp->num - 3606 (5 * searchp->num);
3595 1) / (5 * searchp->num);
3596 do { 3607 do {
3597 p = l3->slabs_free.next; 3608 p = l3->slabs_free.next;
3598 if (p == &(l3->slabs_free)) 3609 if (p == &(l3->slabs_free))
@@ -3603,9 +3614,9 @@ static void cache_reap(void *unused)
3603 list_del(&slabp->list); 3614 list_del(&slabp->list);
3604 STATS_INC_REAPED(searchp); 3615 STATS_INC_REAPED(searchp);
3605 3616
3606 /* Safe to drop the lock. The slab is no longer 3617 /*
3607 * linked to the cache. 3618 * Safe to drop the lock. The slab is no longer linked
3608 * searchp cannot disappear, we hold 3619 * to the cache. searchp cannot disappear, we hold
3609 * cache_chain_lock 3620 * cache_chain_lock
3610 */ 3621 */
3611 l3->free_objects -= searchp->num; 3622 l3->free_objects -= searchp->num;
@@ -3613,15 +3624,15 @@ static void cache_reap(void *unused)
3613 slab_destroy(searchp, slabp); 3624 slab_destroy(searchp, slabp);
3614 spin_lock_irq(&l3->list_lock); 3625 spin_lock_irq(&l3->list_lock);
3615 } while (--tofree > 0); 3626 } while (--tofree > 0);
3616 next_unlock: 3627next_unlock:
3617 spin_unlock_irq(&l3->list_lock); 3628 spin_unlock_irq(&l3->list_lock);
3618 next: 3629next:
3619 cond_resched(); 3630 cond_resched();
3620 } 3631 }
3621 check_irq_on(); 3632 check_irq_on();
3622 mutex_unlock(&cache_chain_mutex); 3633 mutex_unlock(&cache_chain_mutex);
3623 next_reap_node(); 3634 next_reap_node();
3624 /* Setup the next iteration */ 3635 /* Set up the next iteration */
3625 schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC); 3636 schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC);
3626} 3637}
3627 3638
@@ -3671,8 +3682,8 @@ static void *s_next(struct seq_file *m, void *p, loff_t *pos)
3671{ 3682{
3672 struct kmem_cache *cachep = p; 3683 struct kmem_cache *cachep = p;
3673 ++*pos; 3684 ++*pos;
3674 return cachep->next.next == &cache_chain ? NULL 3685 return cachep->next.next == &cache_chain ?
3675 : list_entry(cachep->next.next, struct kmem_cache, next); 3686 NULL : list_entry(cachep->next.next, struct kmem_cache, next);
3676} 3687}
3677 3688
3678static void s_stop(struct seq_file *m, void *p) 3689static void s_stop(struct seq_file *m, void *p)
@@ -3761,7 +3772,9 @@ static int s_show(struct seq_file *m, void *p)
3761 unsigned long node_frees = cachep->node_frees; 3772 unsigned long node_frees = cachep->node_frees;
3762 3773
3763 seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu \ 3774 seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu \
3764 %4lu %4lu %4lu %4lu", allocs, high, grown, reaped, errors, max_freeable, node_allocs, node_frees); 3775 %4lu %4lu %4lu %4lu", allocs, high, grown,
3776 reaped, errors, max_freeable, node_allocs,
3777 node_frees);
3765 } 3778 }
3766 /* cpu stats */ 3779 /* cpu stats */
3767 { 3780 {
@@ -3833,13 +3846,12 @@ ssize_t slabinfo_write(struct file *file, const char __user * buffer,
3833 mutex_lock(&cache_chain_mutex); 3846 mutex_lock(&cache_chain_mutex);
3834 res = -EINVAL; 3847 res = -EINVAL;
3835 list_for_each(p, &cache_chain) { 3848 list_for_each(p, &cache_chain) {
3836 struct kmem_cache *cachep = list_entry(p, struct kmem_cache, 3849 struct kmem_cache *cachep;
3837 next);
3838 3850
3851 cachep = list_entry(p, struct kmem_cache, next);
3839 if (!strcmp(cachep->name, kbuf)) { 3852 if (!strcmp(cachep->name, kbuf)) {
3840 if (limit < 1 || 3853 if (limit < 1 || batchcount < 1 ||
3841 batchcount < 1 || 3854 batchcount > limit || shared < 0) {
3842 batchcount > limit || shared < 0) {
3843 res = 0; 3855 res = 0;
3844 } else { 3856 } else {
3845 res = do_tune_cpucache(cachep, limit, 3857 res = do_tune_cpucache(cachep, limit,