aboutsummaryrefslogtreecommitdiffstats
path: root/mm/slab.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/slab.c')
-rw-r--r--mm/slab.c890
1 files changed, 463 insertions, 427 deletions
diff --git a/mm/slab.c b/mm/slab.c
index d0bd7f07ab04..1c8f5ee230d5 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -50,7 +50,7 @@
50 * The head array is strictly LIFO and should improve the cache hit rates. 50 * The head array is strictly LIFO and should improve the cache hit rates.
51 * On SMP, it additionally reduces the spinlock operations. 51 * On SMP, it additionally reduces the spinlock operations.
52 * 52 *
53 * The c_cpuarray may not be read with enabled local interrupts - 53 * The c_cpuarray may not be read with enabled local interrupts -
54 * it's changed with a smp_call_function(). 54 * it's changed with a smp_call_function().
55 * 55 *
56 * SMP synchronization: 56 * SMP synchronization:
@@ -170,12 +170,12 @@
170#if DEBUG 170#if DEBUG
171# define CREATE_MASK (SLAB_DEBUG_INITIAL | SLAB_RED_ZONE | \ 171# define CREATE_MASK (SLAB_DEBUG_INITIAL | SLAB_RED_ZONE | \
172 SLAB_POISON | SLAB_HWCACHE_ALIGN | \ 172 SLAB_POISON | SLAB_HWCACHE_ALIGN | \
173 SLAB_NO_REAP | SLAB_CACHE_DMA | \ 173 SLAB_CACHE_DMA | \
174 SLAB_MUST_HWCACHE_ALIGN | SLAB_STORE_USER | \ 174 SLAB_MUST_HWCACHE_ALIGN | SLAB_STORE_USER | \
175 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ 175 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
176 SLAB_DESTROY_BY_RCU) 176 SLAB_DESTROY_BY_RCU)
177#else 177#else
178# define CREATE_MASK (SLAB_HWCACHE_ALIGN | SLAB_NO_REAP | \ 178# define CREATE_MASK (SLAB_HWCACHE_ALIGN | \
179 SLAB_CACHE_DMA | SLAB_MUST_HWCACHE_ALIGN | \ 179 SLAB_CACHE_DMA | SLAB_MUST_HWCACHE_ALIGN | \
180 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ 180 SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \
181 SLAB_DESTROY_BY_RCU) 181 SLAB_DESTROY_BY_RCU)
@@ -266,16 +266,17 @@ struct array_cache {
266 unsigned int batchcount; 266 unsigned int batchcount;
267 unsigned int touched; 267 unsigned int touched;
268 spinlock_t lock; 268 spinlock_t lock;
269 void *entry[0]; /* 269 void *entry[0]; /*
270 * Must have this definition in here for the proper 270 * Must have this definition in here for the proper
271 * alignment of array_cache. Also simplifies accessing 271 * alignment of array_cache. Also simplifies accessing
272 * the entries. 272 * the entries.
273 * [0] is for gcc 2.95. It should really be []. 273 * [0] is for gcc 2.95. It should really be [].
274 */ 274 */
275}; 275};
276 276
277/* bootstrap: The caches do not work without cpuarrays anymore, 277/*
278 * but the cpuarrays are allocated from the generic caches... 278 * bootstrap: The caches do not work without cpuarrays anymore, but the
279 * cpuarrays are allocated from the generic caches...
279 */ 280 */
280#define BOOT_CPUCACHE_ENTRIES 1 281#define BOOT_CPUCACHE_ENTRIES 1
281struct arraycache_init { 282struct arraycache_init {
@@ -291,13 +292,13 @@ struct kmem_list3 {
291 struct list_head slabs_full; 292 struct list_head slabs_full;
292 struct list_head slabs_free; 293 struct list_head slabs_free;
293 unsigned long free_objects; 294 unsigned long free_objects;
294 unsigned long next_reap;
295 int free_touched;
296 unsigned int free_limit; 295 unsigned int free_limit;
297 unsigned int colour_next; /* Per-node cache coloring */ 296 unsigned int colour_next; /* Per-node cache coloring */
298 spinlock_t list_lock; 297 spinlock_t list_lock;
299 struct array_cache *shared; /* shared per node */ 298 struct array_cache *shared; /* shared per node */
300 struct array_cache **alien; /* on other nodes */ 299 struct array_cache **alien; /* on other nodes */
300 unsigned long next_reap; /* updated without locking */
301 int free_touched; /* updated without locking */
301}; 302};
302 303
303/* 304/*
@@ -310,10 +311,8 @@ struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS];
310#define SIZE_L3 (1 + MAX_NUMNODES) 311#define SIZE_L3 (1 + MAX_NUMNODES)
311 312
312/* 313/*
313 * This function must be completely optimized away if 314 * This function must be completely optimized away if a constant is passed to
314 * a constant is passed to it. Mostly the same as 315 * it. Mostly the same as what is in linux/slab.h except it returns an index.
315 * what is in linux/slab.h except it returns an
316 * index.
317 */ 316 */
318static __always_inline int index_of(const size_t size) 317static __always_inline int index_of(const size_t size)
319{ 318{
@@ -351,14 +350,14 @@ static void kmem_list3_init(struct kmem_list3 *parent)
351 parent->free_touched = 0; 350 parent->free_touched = 0;
352} 351}
353 352
354#define MAKE_LIST(cachep, listp, slab, nodeid) \ 353#define MAKE_LIST(cachep, listp, slab, nodeid) \
355 do { \ 354 do { \
356 INIT_LIST_HEAD(listp); \ 355 INIT_LIST_HEAD(listp); \
357 list_splice(&(cachep->nodelists[nodeid]->slab), listp); \ 356 list_splice(&(cachep->nodelists[nodeid]->slab), listp); \
358 } while (0) 357 } while (0)
359 358
360#define MAKE_ALL_LISTS(cachep, ptr, nodeid) \ 359#define MAKE_ALL_LISTS(cachep, ptr, nodeid) \
361 do { \ 360 do { \
362 MAKE_LIST((cachep), (&(ptr)->slabs_full), slabs_full, nodeid); \ 361 MAKE_LIST((cachep), (&(ptr)->slabs_full), slabs_full, nodeid); \
363 MAKE_LIST((cachep), (&(ptr)->slabs_partial), slabs_partial, nodeid); \ 362 MAKE_LIST((cachep), (&(ptr)->slabs_partial), slabs_partial, nodeid); \
364 MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \ 363 MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \
@@ -373,28 +372,30 @@ static void kmem_list3_init(struct kmem_list3 *parent)
373struct kmem_cache { 372struct kmem_cache {
374/* 1) per-cpu data, touched during every alloc/free */ 373/* 1) per-cpu data, touched during every alloc/free */
375 struct array_cache *array[NR_CPUS]; 374 struct array_cache *array[NR_CPUS];
375/* 2) Cache tunables. Protected by cache_chain_mutex */
376 unsigned int batchcount; 376 unsigned int batchcount;
377 unsigned int limit; 377 unsigned int limit;
378 unsigned int shared; 378 unsigned int shared;
379
379 unsigned int buffer_size; 380 unsigned int buffer_size;
380/* 2) touched by every alloc & free from the backend */ 381/* 3) touched by every alloc & free from the backend */
381 struct kmem_list3 *nodelists[MAX_NUMNODES]; 382 struct kmem_list3 *nodelists[MAX_NUMNODES];
382 unsigned int flags; /* constant flags */
383 unsigned int num; /* # of objs per slab */
384 spinlock_t spinlock;
385 383
386/* 3) cache_grow/shrink */ 384 unsigned int flags; /* constant flags */
385 unsigned int num; /* # of objs per slab */
386
387/* 4) cache_grow/shrink */
387 /* order of pgs per slab (2^n) */ 388 /* order of pgs per slab (2^n) */
388 unsigned int gfporder; 389 unsigned int gfporder;
389 390
390 /* force GFP flags, e.g. GFP_DMA */ 391 /* force GFP flags, e.g. GFP_DMA */
391 gfp_t gfpflags; 392 gfp_t gfpflags;
392 393
393 size_t colour; /* cache colouring range */ 394 size_t colour; /* cache colouring range */
394 unsigned int colour_off; /* colour offset */ 395 unsigned int colour_off; /* colour offset */
395 struct kmem_cache *slabp_cache; 396 struct kmem_cache *slabp_cache;
396 unsigned int slab_size; 397 unsigned int slab_size;
397 unsigned int dflags; /* dynamic flags */ 398 unsigned int dflags; /* dynamic flags */
398 399
399 /* constructor func */ 400 /* constructor func */
400 void (*ctor) (void *, struct kmem_cache *, unsigned long); 401 void (*ctor) (void *, struct kmem_cache *, unsigned long);
@@ -402,11 +403,11 @@ struct kmem_cache {
402 /* de-constructor func */ 403 /* de-constructor func */
403 void (*dtor) (void *, struct kmem_cache *, unsigned long); 404 void (*dtor) (void *, struct kmem_cache *, unsigned long);
404 405
405/* 4) cache creation/removal */ 406/* 5) cache creation/removal */
406 const char *name; 407 const char *name;
407 struct list_head next; 408 struct list_head next;
408 409
409/* 5) statistics */ 410/* 6) statistics */
410#if STATS 411#if STATS
411 unsigned long num_active; 412 unsigned long num_active;
412 unsigned long num_allocations; 413 unsigned long num_allocations;
@@ -438,8 +439,9 @@ struct kmem_cache {
438#define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB) 439#define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB)
439 440
440#define BATCHREFILL_LIMIT 16 441#define BATCHREFILL_LIMIT 16
441/* Optimization question: fewer reaps means less 442/*
442 * probability for unnessary cpucache drain/refill cycles. 443 * Optimization question: fewer reaps means less probability for unnessary
444 * cpucache drain/refill cycles.
443 * 445 *
444 * OTOH the cpuarrays can contain lots of objects, 446 * OTOH the cpuarrays can contain lots of objects,
445 * which could lock up otherwise freeable slabs. 447 * which could lock up otherwise freeable slabs.
@@ -453,17 +455,19 @@ struct kmem_cache {
453#define STATS_INC_ALLOCED(x) ((x)->num_allocations++) 455#define STATS_INC_ALLOCED(x) ((x)->num_allocations++)
454#define STATS_INC_GROWN(x) ((x)->grown++) 456#define STATS_INC_GROWN(x) ((x)->grown++)
455#define STATS_INC_REAPED(x) ((x)->reaped++) 457#define STATS_INC_REAPED(x) ((x)->reaped++)
456#define STATS_SET_HIGH(x) do { if ((x)->num_active > (x)->high_mark) \ 458#define STATS_SET_HIGH(x) \
457 (x)->high_mark = (x)->num_active; \ 459 do { \
458 } while (0) 460 if ((x)->num_active > (x)->high_mark) \
461 (x)->high_mark = (x)->num_active; \
462 } while (0)
459#define STATS_INC_ERR(x) ((x)->errors++) 463#define STATS_INC_ERR(x) ((x)->errors++)
460#define STATS_INC_NODEALLOCS(x) ((x)->node_allocs++) 464#define STATS_INC_NODEALLOCS(x) ((x)->node_allocs++)
461#define STATS_INC_NODEFREES(x) ((x)->node_frees++) 465#define STATS_INC_NODEFREES(x) ((x)->node_frees++)
462#define STATS_SET_FREEABLE(x, i) \ 466#define STATS_SET_FREEABLE(x, i) \
463 do { if ((x)->max_freeable < i) \ 467 do { \
464 (x)->max_freeable = i; \ 468 if ((x)->max_freeable < i) \
465 } while (0) 469 (x)->max_freeable = i; \
466 470 } while (0)
467#define STATS_INC_ALLOCHIT(x) atomic_inc(&(x)->allochit) 471#define STATS_INC_ALLOCHIT(x) atomic_inc(&(x)->allochit)
468#define STATS_INC_ALLOCMISS(x) atomic_inc(&(x)->allocmiss) 472#define STATS_INC_ALLOCMISS(x) atomic_inc(&(x)->allocmiss)
469#define STATS_INC_FREEHIT(x) atomic_inc(&(x)->freehit) 473#define STATS_INC_FREEHIT(x) atomic_inc(&(x)->freehit)
@@ -478,9 +482,7 @@ struct kmem_cache {
478#define STATS_INC_ERR(x) do { } while (0) 482#define STATS_INC_ERR(x) do { } while (0)
479#define STATS_INC_NODEALLOCS(x) do { } while (0) 483#define STATS_INC_NODEALLOCS(x) do { } while (0)
480#define STATS_INC_NODEFREES(x) do { } while (0) 484#define STATS_INC_NODEFREES(x) do { } while (0)
481#define STATS_SET_FREEABLE(x, i) \ 485#define STATS_SET_FREEABLE(x, i) do { } while (0)
482 do { } while (0)
483
484#define STATS_INC_ALLOCHIT(x) do { } while (0) 486#define STATS_INC_ALLOCHIT(x) do { } while (0)
485#define STATS_INC_ALLOCMISS(x) do { } while (0) 487#define STATS_INC_ALLOCMISS(x) do { } while (0)
486#define STATS_INC_FREEHIT(x) do { } while (0) 488#define STATS_INC_FREEHIT(x) do { } while (0)
@@ -488,7 +490,8 @@ struct kmem_cache {
488#endif 490#endif
489 491
490#if DEBUG 492#if DEBUG
491/* Magic nums for obj red zoning. 493/*
494 * Magic nums for obj red zoning.
492 * Placed in the first word before and the first word after an obj. 495 * Placed in the first word before and the first word after an obj.
493 */ 496 */
494#define RED_INACTIVE 0x5A2CF071UL /* when obj is inactive */ 497#define RED_INACTIVE 0x5A2CF071UL /* when obj is inactive */
@@ -499,7 +502,8 @@ struct kmem_cache {
499#define POISON_FREE 0x6b /* for use-after-free poisoning */ 502#define POISON_FREE 0x6b /* for use-after-free poisoning */
500#define POISON_END 0xa5 /* end-byte of poisoning */ 503#define POISON_END 0xa5 /* end-byte of poisoning */
501 504
502/* memory layout of objects: 505/*
506 * memory layout of objects:
503 * 0 : objp 507 * 0 : objp
504 * 0 .. cachep->obj_offset - BYTES_PER_WORD - 1: padding. This ensures that 508 * 0 .. cachep->obj_offset - BYTES_PER_WORD - 1: padding. This ensures that
505 * the end of an object is aligned with the end of the real 509 * the end of an object is aligned with the end of the real
@@ -508,7 +512,8 @@ struct kmem_cache {
508 * redzone word. 512 * redzone word.
509 * cachep->obj_offset: The real object. 513 * cachep->obj_offset: The real object.
510 * cachep->buffer_size - 2* BYTES_PER_WORD: redzone word [BYTES_PER_WORD long] 514 * cachep->buffer_size - 2* BYTES_PER_WORD: redzone word [BYTES_PER_WORD long]
511 * cachep->buffer_size - 1* BYTES_PER_WORD: last caller address [BYTES_PER_WORD long] 515 * cachep->buffer_size - 1* BYTES_PER_WORD: last caller address
516 * [BYTES_PER_WORD long]
512 */ 517 */
513static int obj_offset(struct kmem_cache *cachep) 518static int obj_offset(struct kmem_cache *cachep)
514{ 519{
@@ -552,8 +557,8 @@ static void **dbg_userword(struct kmem_cache *cachep, void *objp)
552#endif 557#endif
553 558
554/* 559/*
555 * Maximum size of an obj (in 2^order pages) 560 * Maximum size of an obj (in 2^order pages) and absolute limit for the gfp
556 * and absolute limit for the gfp order. 561 * order.
557 */ 562 */
558#if defined(CONFIG_LARGE_ALLOCS) 563#if defined(CONFIG_LARGE_ALLOCS)
559#define MAX_OBJ_ORDER 13 /* up to 32Mb */ 564#define MAX_OBJ_ORDER 13 /* up to 32Mb */
@@ -573,9 +578,10 @@ static void **dbg_userword(struct kmem_cache *cachep, void *objp)
573#define BREAK_GFP_ORDER_LO 0 578#define BREAK_GFP_ORDER_LO 0
574static int slab_break_gfp_order = BREAK_GFP_ORDER_LO; 579static int slab_break_gfp_order = BREAK_GFP_ORDER_LO;
575 580
576/* Functions for storing/retrieving the cachep and or slab from the 581/*
577 * global 'mem_map'. These are used to find the slab an obj belongs to. 582 * Functions for storing/retrieving the cachep and or slab from the page
578 * With kfree(), these are used to find the cache which an obj belongs to. 583 * allocator. These are used to find the slab an obj belongs to. With kfree(),
584 * these are used to find the cache which an obj belongs to.
579 */ 585 */
580static inline void page_set_cache(struct page *page, struct kmem_cache *cache) 586static inline void page_set_cache(struct page *page, struct kmem_cache *cache)
581{ 587{
@@ -584,6 +590,8 @@ static inline void page_set_cache(struct page *page, struct kmem_cache *cache)
584 590
585static inline struct kmem_cache *page_get_cache(struct page *page) 591static inline struct kmem_cache *page_get_cache(struct page *page)
586{ 592{
593 if (unlikely(PageCompound(page)))
594 page = (struct page *)page_private(page);
587 return (struct kmem_cache *)page->lru.next; 595 return (struct kmem_cache *)page->lru.next;
588} 596}
589 597
@@ -594,6 +602,8 @@ static inline void page_set_slab(struct page *page, struct slab *slab)
594 602
595static inline struct slab *page_get_slab(struct page *page) 603static inline struct slab *page_get_slab(struct page *page)
596{ 604{
605 if (unlikely(PageCompound(page)))
606 page = (struct page *)page_private(page);
597 return (struct slab *)page->lru.prev; 607 return (struct slab *)page->lru.prev;
598} 608}
599 609
@@ -609,7 +619,21 @@ static inline struct slab *virt_to_slab(const void *obj)
609 return page_get_slab(page); 619 return page_get_slab(page);
610} 620}
611 621
612/* These are the default caches for kmalloc. Custom caches can have other sizes. */ 622static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab,
623 unsigned int idx)
624{
625 return slab->s_mem + cache->buffer_size * idx;
626}
627
628static inline unsigned int obj_to_index(struct kmem_cache *cache,
629 struct slab *slab, void *obj)
630{
631 return (unsigned)(obj - slab->s_mem) / cache->buffer_size;
632}
633
634/*
635 * These are the default caches for kmalloc. Custom caches can have other sizes.
636 */
613struct cache_sizes malloc_sizes[] = { 637struct cache_sizes malloc_sizes[] = {
614#define CACHE(x) { .cs_size = (x) }, 638#define CACHE(x) { .cs_size = (x) },
615#include <linux/kmalloc_sizes.h> 639#include <linux/kmalloc_sizes.h>
@@ -642,8 +666,6 @@ static struct kmem_cache cache_cache = {
642 .limit = BOOT_CPUCACHE_ENTRIES, 666 .limit = BOOT_CPUCACHE_ENTRIES,
643 .shared = 1, 667 .shared = 1,
644 .buffer_size = sizeof(struct kmem_cache), 668 .buffer_size = sizeof(struct kmem_cache),
645 .flags = SLAB_NO_REAP,
646 .spinlock = SPIN_LOCK_UNLOCKED,
647 .name = "kmem_cache", 669 .name = "kmem_cache",
648#if DEBUG 670#if DEBUG
649 .obj_size = sizeof(struct kmem_cache), 671 .obj_size = sizeof(struct kmem_cache),
@@ -655,8 +677,8 @@ static DEFINE_MUTEX(cache_chain_mutex);
655static struct list_head cache_chain; 677static struct list_head cache_chain;
656 678
657/* 679/*
658 * vm_enough_memory() looks at this to determine how many 680 * vm_enough_memory() looks at this to determine how many slab-allocated pages
659 * slab-allocated pages are possibly freeable under pressure 681 * are possibly freeable under pressure
660 * 682 *
661 * SLAB_RECLAIM_ACCOUNT turns this on per-slab 683 * SLAB_RECLAIM_ACCOUNT turns this on per-slab
662 */ 684 */
@@ -675,7 +697,8 @@ static enum {
675 697
676static DEFINE_PER_CPU(struct work_struct, reap_work); 698static DEFINE_PER_CPU(struct work_struct, reap_work);
677 699
678static void free_block(struct kmem_cache *cachep, void **objpp, int len, int node); 700static void free_block(struct kmem_cache *cachep, void **objpp, int len,
701 int node);
679static void enable_cpucache(struct kmem_cache *cachep); 702static void enable_cpucache(struct kmem_cache *cachep);
680static void cache_reap(void *unused); 703static void cache_reap(void *unused);
681static int __node_shrink(struct kmem_cache *cachep, int node); 704static int __node_shrink(struct kmem_cache *cachep, int node);
@@ -685,7 +708,8 @@ static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
685 return cachep->array[smp_processor_id()]; 708 return cachep->array[smp_processor_id()];
686} 709}
687 710
688static inline struct kmem_cache *__find_general_cachep(size_t size, gfp_t gfpflags) 711static inline struct kmem_cache *__find_general_cachep(size_t size,
712 gfp_t gfpflags)
689{ 713{
690 struct cache_sizes *csizep = malloc_sizes; 714 struct cache_sizes *csizep = malloc_sizes;
691 715
@@ -720,8 +744,9 @@ static size_t slab_mgmt_size(size_t nr_objs, size_t align)
720 return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align); 744 return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align);
721} 745}
722 746
723/* Calculate the number of objects and left-over bytes for a given 747/*
724 buffer size. */ 748 * Calculate the number of objects and left-over bytes for a given buffer size.
749 */
725static void cache_estimate(unsigned long gfporder, size_t buffer_size, 750static void cache_estimate(unsigned long gfporder, size_t buffer_size,
726 size_t align, int flags, size_t *left_over, 751 size_t align, int flags, size_t *left_over,
727 unsigned int *num) 752 unsigned int *num)
@@ -782,7 +807,8 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size,
782 807
783#define slab_error(cachep, msg) __slab_error(__FUNCTION__, cachep, msg) 808#define slab_error(cachep, msg) __slab_error(__FUNCTION__, cachep, msg)
784 809
785static void __slab_error(const char *function, struct kmem_cache *cachep, char *msg) 810static void __slab_error(const char *function, struct kmem_cache *cachep,
811 char *msg)
786{ 812{
787 printk(KERN_ERR "slab error in %s(): cache `%s': %s\n", 813 printk(KERN_ERR "slab error in %s(): cache `%s': %s\n",
788 function, cachep->name, msg); 814 function, cachep->name, msg);
@@ -804,7 +830,7 @@ static void init_reap_node(int cpu)
804 830
805 node = next_node(cpu_to_node(cpu), node_online_map); 831 node = next_node(cpu_to_node(cpu), node_online_map);
806 if (node == MAX_NUMNODES) 832 if (node == MAX_NUMNODES)
807 node = 0; 833 node = first_node(node_online_map);
808 834
809 __get_cpu_var(reap_node) = node; 835 __get_cpu_var(reap_node) = node;
810} 836}
@@ -906,10 +932,8 @@ static void free_alien_cache(struct array_cache **ac_ptr)
906 932
907 if (!ac_ptr) 933 if (!ac_ptr)
908 return; 934 return;
909
910 for_each_node(i) 935 for_each_node(i)
911 kfree(ac_ptr[i]); 936 kfree(ac_ptr[i]);
912
913 kfree(ac_ptr); 937 kfree(ac_ptr);
914} 938}
915 939
@@ -943,7 +967,8 @@ static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3)
943 } 967 }
944} 968}
945 969
946static void drain_alien_cache(struct kmem_cache *cachep, struct array_cache **alien) 970static void drain_alien_cache(struct kmem_cache *cachep,
971 struct array_cache **alien)
947{ 972{
948 int i = 0; 973 int i = 0;
949 struct array_cache *ac; 974 struct array_cache *ac;
@@ -986,20 +1011,22 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
986 switch (action) { 1011 switch (action) {
987 case CPU_UP_PREPARE: 1012 case CPU_UP_PREPARE:
988 mutex_lock(&cache_chain_mutex); 1013 mutex_lock(&cache_chain_mutex);
989 /* we need to do this right in the beginning since 1014 /*
1015 * We need to do this right in the beginning since
990 * alloc_arraycache's are going to use this list. 1016 * alloc_arraycache's are going to use this list.
991 * kmalloc_node allows us to add the slab to the right 1017 * kmalloc_node allows us to add the slab to the right
992 * kmem_list3 and not this cpu's kmem_list3 1018 * kmem_list3 and not this cpu's kmem_list3
993 */ 1019 */
994 1020
995 list_for_each_entry(cachep, &cache_chain, next) { 1021 list_for_each_entry(cachep, &cache_chain, next) {
996 /* setup the size64 kmemlist for cpu before we can 1022 /*
1023 * Set up the size64 kmemlist for cpu before we can
997 * begin anything. Make sure some other cpu on this 1024 * begin anything. Make sure some other cpu on this
998 * node has not already allocated this 1025 * node has not already allocated this
999 */ 1026 */
1000 if (!cachep->nodelists[node]) { 1027 if (!cachep->nodelists[node]) {
1001 if (!(l3 = kmalloc_node(memsize, 1028 l3 = kmalloc_node(memsize, GFP_KERNEL, node);
1002 GFP_KERNEL, node))) 1029 if (!l3)
1003 goto bad; 1030 goto bad;
1004 kmem_list3_init(l3); 1031 kmem_list3_init(l3);
1005 l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + 1032 l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
@@ -1015,13 +1042,15 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
1015 1042
1016 spin_lock_irq(&cachep->nodelists[node]->list_lock); 1043 spin_lock_irq(&cachep->nodelists[node]->list_lock);
1017 cachep->nodelists[node]->free_limit = 1044 cachep->nodelists[node]->free_limit =
1018 (1 + nr_cpus_node(node)) * 1045 (1 + nr_cpus_node(node)) *
1019 cachep->batchcount + cachep->num; 1046 cachep->batchcount + cachep->num;
1020 spin_unlock_irq(&cachep->nodelists[node]->list_lock); 1047 spin_unlock_irq(&cachep->nodelists[node]->list_lock);
1021 } 1048 }
1022 1049
1023 /* Now we can go ahead with allocating the shared array's 1050 /*
1024 & array cache's */ 1051 * Now we can go ahead with allocating the shared arrays and
1052 * array caches
1053 */
1025 list_for_each_entry(cachep, &cache_chain, next) { 1054 list_for_each_entry(cachep, &cache_chain, next) {
1026 struct array_cache *nc; 1055 struct array_cache *nc;
1027 struct array_cache *shared; 1056 struct array_cache *shared;
@@ -1041,7 +1070,6 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
1041 if (!alien) 1070 if (!alien)
1042 goto bad; 1071 goto bad;
1043 cachep->array[cpu] = nc; 1072 cachep->array[cpu] = nc;
1044
1045 l3 = cachep->nodelists[node]; 1073 l3 = cachep->nodelists[node];
1046 BUG_ON(!l3); 1074 BUG_ON(!l3);
1047 1075
@@ -1061,7 +1089,6 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
1061 } 1089 }
1062#endif 1090#endif
1063 spin_unlock_irq(&l3->list_lock); 1091 spin_unlock_irq(&l3->list_lock);
1064
1065 kfree(shared); 1092 kfree(shared);
1066 free_alien_cache(alien); 1093 free_alien_cache(alien);
1067 } 1094 }
@@ -1083,7 +1110,6 @@ static int __devinit cpuup_callback(struct notifier_block *nfb,
1083 /* fall thru */ 1110 /* fall thru */
1084 case CPU_UP_CANCELED: 1111 case CPU_UP_CANCELED:
1085 mutex_lock(&cache_chain_mutex); 1112 mutex_lock(&cache_chain_mutex);
1086
1087 list_for_each_entry(cachep, &cache_chain, next) { 1113 list_for_each_entry(cachep, &cache_chain, next) {
1088 struct array_cache *nc; 1114 struct array_cache *nc;
1089 struct array_cache *shared; 1115 struct array_cache *shared;
@@ -1150,7 +1176,7 @@ free_array_cache:
1150#endif 1176#endif
1151 } 1177 }
1152 return NOTIFY_OK; 1178 return NOTIFY_OK;
1153 bad: 1179bad:
1154 mutex_unlock(&cache_chain_mutex); 1180 mutex_unlock(&cache_chain_mutex);
1155 return NOTIFY_BAD; 1181 return NOTIFY_BAD;
1156} 1182}
@@ -1160,7 +1186,8 @@ static struct notifier_block cpucache_notifier = { &cpuup_callback, NULL, 0 };
1160/* 1186/*
1161 * swap the static kmem_list3 with kmalloced memory 1187 * swap the static kmem_list3 with kmalloced memory
1162 */ 1188 */
1163static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list, int nodeid) 1189static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list,
1190 int nodeid)
1164{ 1191{
1165 struct kmem_list3 *ptr; 1192 struct kmem_list3 *ptr;
1166 1193
@@ -1175,8 +1202,9 @@ static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list, int no
1175 local_irq_enable(); 1202 local_irq_enable();
1176} 1203}
1177 1204
1178/* Initialisation. 1205/*
1179 * Called after the gfp() functions have been enabled, and before smp_init(). 1206 * Initialisation. Called after the page allocator have been initialised and
1207 * before smp_init().
1180 */ 1208 */
1181void __init kmem_cache_init(void) 1209void __init kmem_cache_init(void)
1182{ 1210{
@@ -1201,9 +1229,9 @@ void __init kmem_cache_init(void)
1201 1229
1202 /* Bootstrap is tricky, because several objects are allocated 1230 /* Bootstrap is tricky, because several objects are allocated
1203 * from caches that do not exist yet: 1231 * from caches that do not exist yet:
1204 * 1) initialize the cache_cache cache: it contains the struct kmem_cache 1232 * 1) initialize the cache_cache cache: it contains the struct
1205 * structures of all caches, except cache_cache itself: cache_cache 1233 * kmem_cache structures of all caches, except cache_cache itself:
1206 * is statically allocated. 1234 * cache_cache is statically allocated.
1207 * Initially an __init data area is used for the head array and the 1235 * Initially an __init data area is used for the head array and the
1208 * kmem_list3 structures, it's replaced with a kmalloc allocated 1236 * kmem_list3 structures, it's replaced with a kmalloc allocated
1209 * array at the end of the bootstrap. 1237 * array at the end of the bootstrap.
@@ -1226,7 +1254,8 @@ void __init kmem_cache_init(void)
1226 cache_cache.array[smp_processor_id()] = &initarray_cache.cache; 1254 cache_cache.array[smp_processor_id()] = &initarray_cache.cache;
1227 cache_cache.nodelists[numa_node_id()] = &initkmem_list3[CACHE_CACHE]; 1255 cache_cache.nodelists[numa_node_id()] = &initkmem_list3[CACHE_CACHE];
1228 1256
1229 cache_cache.buffer_size = ALIGN(cache_cache.buffer_size, cache_line_size()); 1257 cache_cache.buffer_size = ALIGN(cache_cache.buffer_size,
1258 cache_line_size());
1230 1259
1231 for (order = 0; order < MAX_ORDER; order++) { 1260 for (order = 0; order < MAX_ORDER; order++) {
1232 cache_estimate(order, cache_cache.buffer_size, 1261 cache_estimate(order, cache_cache.buffer_size,
@@ -1245,24 +1274,26 @@ void __init kmem_cache_init(void)
1245 sizes = malloc_sizes; 1274 sizes = malloc_sizes;
1246 names = cache_names; 1275 names = cache_names;
1247 1276
1248 /* Initialize the caches that provide memory for the array cache 1277 /*
1249 * and the kmem_list3 structures first. 1278 * Initialize the caches that provide memory for the array cache and the
1250 * Without this, further allocations will bug 1279 * kmem_list3 structures first. Without this, further allocations will
1280 * bug.
1251 */ 1281 */
1252 1282
1253 sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name, 1283 sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name,
1254 sizes[INDEX_AC].cs_size, 1284 sizes[INDEX_AC].cs_size,
1255 ARCH_KMALLOC_MINALIGN, 1285 ARCH_KMALLOC_MINALIGN,
1256 (ARCH_KMALLOC_FLAGS | 1286 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
1257 SLAB_PANIC), NULL, NULL); 1287 NULL, NULL);
1258 1288
1259 if (INDEX_AC != INDEX_L3) 1289 if (INDEX_AC != INDEX_L3) {
1260 sizes[INDEX_L3].cs_cachep = 1290 sizes[INDEX_L3].cs_cachep =
1261 kmem_cache_create(names[INDEX_L3].name, 1291 kmem_cache_create(names[INDEX_L3].name,
1262 sizes[INDEX_L3].cs_size, 1292 sizes[INDEX_L3].cs_size,
1263 ARCH_KMALLOC_MINALIGN, 1293 ARCH_KMALLOC_MINALIGN,
1264 (ARCH_KMALLOC_FLAGS | SLAB_PANIC), NULL, 1294 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
1265 NULL); 1295 NULL, NULL);
1296 }
1266 1297
1267 while (sizes->cs_size != ULONG_MAX) { 1298 while (sizes->cs_size != ULONG_MAX) {
1268 /* 1299 /*
@@ -1272,13 +1303,13 @@ void __init kmem_cache_init(void)
1272 * Note for systems short on memory removing the alignment will 1303 * Note for systems short on memory removing the alignment will
1273 * allow tighter packing of the smaller caches. 1304 * allow tighter packing of the smaller caches.
1274 */ 1305 */
1275 if (!sizes->cs_cachep) 1306 if (!sizes->cs_cachep) {
1276 sizes->cs_cachep = kmem_cache_create(names->name, 1307 sizes->cs_cachep = kmem_cache_create(names->name,
1277 sizes->cs_size, 1308 sizes->cs_size,
1278 ARCH_KMALLOC_MINALIGN, 1309 ARCH_KMALLOC_MINALIGN,
1279 (ARCH_KMALLOC_FLAGS 1310 ARCH_KMALLOC_FLAGS|SLAB_PANIC,
1280 | SLAB_PANIC), 1311 NULL, NULL);
1281 NULL, NULL); 1312 }
1282 1313
1283 /* Inc off-slab bufctl limit until the ceiling is hit. */ 1314 /* Inc off-slab bufctl limit until the ceiling is hit. */
1284 if (!(OFF_SLAB(sizes->cs_cachep))) { 1315 if (!(OFF_SLAB(sizes->cs_cachep))) {
@@ -1287,13 +1318,11 @@ void __init kmem_cache_init(void)
1287 } 1318 }
1288 1319
1289 sizes->cs_dmacachep = kmem_cache_create(names->name_dma, 1320 sizes->cs_dmacachep = kmem_cache_create(names->name_dma,
1290 sizes->cs_size, 1321 sizes->cs_size,
1291 ARCH_KMALLOC_MINALIGN, 1322 ARCH_KMALLOC_MINALIGN,
1292 (ARCH_KMALLOC_FLAGS | 1323 ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA|
1293 SLAB_CACHE_DMA | 1324 SLAB_PANIC,
1294 SLAB_PANIC), NULL, 1325 NULL, NULL);
1295 NULL);
1296
1297 sizes++; 1326 sizes++;
1298 names++; 1327 names++;
1299 } 1328 }
@@ -1345,20 +1374,22 @@ void __init kmem_cache_init(void)
1345 struct kmem_cache *cachep; 1374 struct kmem_cache *cachep;
1346 mutex_lock(&cache_chain_mutex); 1375 mutex_lock(&cache_chain_mutex);
1347 list_for_each_entry(cachep, &cache_chain, next) 1376 list_for_each_entry(cachep, &cache_chain, next)
1348 enable_cpucache(cachep); 1377 enable_cpucache(cachep);
1349 mutex_unlock(&cache_chain_mutex); 1378 mutex_unlock(&cache_chain_mutex);
1350 } 1379 }
1351 1380
1352 /* Done! */ 1381 /* Done! */
1353 g_cpucache_up = FULL; 1382 g_cpucache_up = FULL;
1354 1383
1355 /* Register a cpu startup notifier callback 1384 /*
1356 * that initializes cpu_cache_get for all new cpus 1385 * Register a cpu startup notifier callback that initializes
1386 * cpu_cache_get for all new cpus
1357 */ 1387 */
1358 register_cpu_notifier(&cpucache_notifier); 1388 register_cpu_notifier(&cpucache_notifier);
1359 1389
1360 /* The reap timers are started later, with a module init call: 1390 /*
1361 * That part of the kernel is not yet operational. 1391 * The reap timers are started later, with a module init call: That part
1392 * of the kernel is not yet operational.
1362 */ 1393 */
1363} 1394}
1364 1395
@@ -1366,16 +1397,13 @@ static int __init cpucache_init(void)
1366{ 1397{
1367 int cpu; 1398 int cpu;
1368 1399
1369 /* 1400 /*
1370 * Register the timers that return unneeded 1401 * Register the timers that return unneeded pages to the page allocator
1371 * pages to gfp.
1372 */ 1402 */
1373 for_each_online_cpu(cpu) 1403 for_each_online_cpu(cpu)
1374 start_cpu_timer(cpu); 1404 start_cpu_timer(cpu);
1375
1376 return 0; 1405 return 0;
1377} 1406}
1378
1379__initcall(cpucache_init); 1407__initcall(cpucache_init);
1380 1408
1381/* 1409/*
@@ -1402,7 +1430,7 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
1402 atomic_add(i, &slab_reclaim_pages); 1430 atomic_add(i, &slab_reclaim_pages);
1403 add_page_state(nr_slab, i); 1431 add_page_state(nr_slab, i);
1404 while (i--) { 1432 while (i--) {
1405 SetPageSlab(page); 1433 __SetPageSlab(page);
1406 page++; 1434 page++;
1407 } 1435 }
1408 return addr; 1436 return addr;
@@ -1418,8 +1446,8 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr)
1418 const unsigned long nr_freed = i; 1446 const unsigned long nr_freed = i;
1419 1447
1420 while (i--) { 1448 while (i--) {
1421 if (!TestClearPageSlab(page)) 1449 BUG_ON(!PageSlab(page));
1422 BUG(); 1450 __ClearPageSlab(page);
1423 page++; 1451 page++;
1424 } 1452 }
1425 sub_page_state(nr_slab, nr_freed); 1453 sub_page_state(nr_slab, nr_freed);
@@ -1489,9 +1517,8 @@ static void dump_line(char *data, int offset, int limit)
1489{ 1517{
1490 int i; 1518 int i;
1491 printk(KERN_ERR "%03x:", offset); 1519 printk(KERN_ERR "%03x:", offset);
1492 for (i = 0; i < limit; i++) { 1520 for (i = 0; i < limit; i++)
1493 printk(" %02x", (unsigned char)data[offset + i]); 1521 printk(" %02x", (unsigned char)data[offset + i]);
1494 }
1495 printk("\n"); 1522 printk("\n");
1496} 1523}
1497#endif 1524#endif
@@ -1505,15 +1532,15 @@ static void print_objinfo(struct kmem_cache *cachep, void *objp, int lines)
1505 1532
1506 if (cachep->flags & SLAB_RED_ZONE) { 1533 if (cachep->flags & SLAB_RED_ZONE) {
1507 printk(KERN_ERR "Redzone: 0x%lx/0x%lx.\n", 1534 printk(KERN_ERR "Redzone: 0x%lx/0x%lx.\n",
1508 *dbg_redzone1(cachep, objp), 1535 *dbg_redzone1(cachep, objp),
1509 *dbg_redzone2(cachep, objp)); 1536 *dbg_redzone2(cachep, objp));
1510 } 1537 }
1511 1538
1512 if (cachep->flags & SLAB_STORE_USER) { 1539 if (cachep->flags & SLAB_STORE_USER) {
1513 printk(KERN_ERR "Last user: [<%p>]", 1540 printk(KERN_ERR "Last user: [<%p>]",
1514 *dbg_userword(cachep, objp)); 1541 *dbg_userword(cachep, objp));
1515 print_symbol("(%s)", 1542 print_symbol("(%s)",
1516 (unsigned long)*dbg_userword(cachep, objp)); 1543 (unsigned long)*dbg_userword(cachep, objp));
1517 printk("\n"); 1544 printk("\n");
1518 } 1545 }
1519 realobj = (char *)objp + obj_offset(cachep); 1546 realobj = (char *)objp + obj_offset(cachep);
@@ -1546,8 +1573,8 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp)
1546 /* Print header */ 1573 /* Print header */
1547 if (lines == 0) { 1574 if (lines == 0) {
1548 printk(KERN_ERR 1575 printk(KERN_ERR
1549 "Slab corruption: start=%p, len=%d\n", 1576 "Slab corruption: start=%p, len=%d\n",
1550 realobj, size); 1577 realobj, size);
1551 print_objinfo(cachep, objp, 0); 1578 print_objinfo(cachep, objp, 0);
1552 } 1579 }
1553 /* Hexdump the affected line */ 1580 /* Hexdump the affected line */
@@ -1568,18 +1595,18 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp)
1568 * exist: 1595 * exist:
1569 */ 1596 */
1570 struct slab *slabp = virt_to_slab(objp); 1597 struct slab *slabp = virt_to_slab(objp);
1571 int objnr; 1598 unsigned int objnr;
1572 1599
1573 objnr = (unsigned)(objp - slabp->s_mem) / cachep->buffer_size; 1600 objnr = obj_to_index(cachep, slabp, objp);
1574 if (objnr) { 1601 if (objnr) {
1575 objp = slabp->s_mem + (objnr - 1) * cachep->buffer_size; 1602 objp = index_to_obj(cachep, slabp, objnr - 1);
1576 realobj = (char *)objp + obj_offset(cachep); 1603 realobj = (char *)objp + obj_offset(cachep);
1577 printk(KERN_ERR "Prev obj: start=%p, len=%d\n", 1604 printk(KERN_ERR "Prev obj: start=%p, len=%d\n",
1578 realobj, size); 1605 realobj, size);
1579 print_objinfo(cachep, objp, 2); 1606 print_objinfo(cachep, objp, 2);
1580 } 1607 }
1581 if (objnr + 1 < cachep->num) { 1608 if (objnr + 1 < cachep->num) {
1582 objp = slabp->s_mem + (objnr + 1) * cachep->buffer_size; 1609 objp = index_to_obj(cachep, slabp, objnr + 1);
1583 realobj = (char *)objp + obj_offset(cachep); 1610 realobj = (char *)objp + obj_offset(cachep);
1584 printk(KERN_ERR "Next obj: start=%p, len=%d\n", 1611 printk(KERN_ERR "Next obj: start=%p, len=%d\n",
1585 realobj, size); 1612 realobj, size);
@@ -1591,22 +1618,25 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp)
1591 1618
1592#if DEBUG 1619#if DEBUG
1593/** 1620/**
1594 * slab_destroy_objs - call the registered destructor for each object in 1621 * slab_destroy_objs - destroy a slab and its objects
1595 * a slab that is to be destroyed. 1622 * @cachep: cache pointer being destroyed
1623 * @slabp: slab pointer being destroyed
1624 *
1625 * Call the registered destructor for each object in a slab that is being
1626 * destroyed.
1596 */ 1627 */
1597static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp) 1628static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp)
1598{ 1629{
1599 int i; 1630 int i;
1600 for (i = 0; i < cachep->num; i++) { 1631 for (i = 0; i < cachep->num; i++) {
1601 void *objp = slabp->s_mem + cachep->buffer_size * i; 1632 void *objp = index_to_obj(cachep, slabp, i);
1602 1633
1603 if (cachep->flags & SLAB_POISON) { 1634 if (cachep->flags & SLAB_POISON) {
1604#ifdef CONFIG_DEBUG_PAGEALLOC 1635#ifdef CONFIG_DEBUG_PAGEALLOC
1605 if ((cachep->buffer_size % PAGE_SIZE) == 0 1636 if (cachep->buffer_size % PAGE_SIZE == 0 &&
1606 && OFF_SLAB(cachep)) 1637 OFF_SLAB(cachep))
1607 kernel_map_pages(virt_to_page(objp), 1638 kernel_map_pages(virt_to_page(objp),
1608 cachep->buffer_size / PAGE_SIZE, 1639 cachep->buffer_size / PAGE_SIZE, 1);
1609 1);
1610 else 1640 else
1611 check_poison_obj(cachep, objp); 1641 check_poison_obj(cachep, objp);
1612#else 1642#else
@@ -1631,7 +1661,7 @@ static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp)
1631 if (cachep->dtor) { 1661 if (cachep->dtor) {
1632 int i; 1662 int i;
1633 for (i = 0; i < cachep->num; i++) { 1663 for (i = 0; i < cachep->num; i++) {
1634 void *objp = slabp->s_mem + cachep->buffer_size * i; 1664 void *objp = index_to_obj(cachep, slabp, i);
1635 (cachep->dtor) (objp, cachep, 0); 1665 (cachep->dtor) (objp, cachep, 0);
1636 } 1666 }
1637 } 1667 }
@@ -1639,9 +1669,13 @@ static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp)
1639#endif 1669#endif
1640 1670
1641/** 1671/**
1672 * slab_destroy - destroy and release all objects in a slab
1673 * @cachep: cache pointer being destroyed
1674 * @slabp: slab pointer being destroyed
1675 *
1642 * Destroy all the objs in a slab, and release the mem back to the system. 1676 * Destroy all the objs in a slab, and release the mem back to the system.
1643 * Before calling the slab must have been unlinked from the cache. 1677 * Before calling the slab must have been unlinked from the cache. The
1644 * The cache-lock is not held/needed. 1678 * cache-lock is not held/needed.
1645 */ 1679 */
1646static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp) 1680static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp)
1647{ 1681{
@@ -1662,8 +1696,10 @@ static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp)
1662 } 1696 }
1663} 1697}
1664 1698
1665/* For setting up all the kmem_list3s for cache whose buffer_size is same 1699/*
1666 as size of kmem_list3. */ 1700 * For setting up all the kmem_list3s for cache whose buffer_size is same as
1701 * size of kmem_list3.
1702 */
1667static void set_up_list3s(struct kmem_cache *cachep, int index) 1703static void set_up_list3s(struct kmem_cache *cachep, int index)
1668{ 1704{
1669 int node; 1705 int node;
@@ -1689,13 +1725,13 @@ static void set_up_list3s(struct kmem_cache *cachep, int index)
1689 * high order pages for slabs. When the gfp() functions are more friendly 1725 * high order pages for slabs. When the gfp() functions are more friendly
1690 * towards high-order requests, this should be changed. 1726 * towards high-order requests, this should be changed.
1691 */ 1727 */
1692static inline size_t calculate_slab_order(struct kmem_cache *cachep, 1728static size_t calculate_slab_order(struct kmem_cache *cachep,
1693 size_t size, size_t align, unsigned long flags) 1729 size_t size, size_t align, unsigned long flags)
1694{ 1730{
1695 size_t left_over = 0; 1731 size_t left_over = 0;
1696 int gfporder; 1732 int gfporder;
1697 1733
1698 for (gfporder = 0 ; gfporder <= MAX_GFP_ORDER; gfporder++) { 1734 for (gfporder = 0; gfporder <= MAX_GFP_ORDER; gfporder++) {
1699 unsigned int num; 1735 unsigned int num;
1700 size_t remainder; 1736 size_t remainder;
1701 1737
@@ -1730,12 +1766,66 @@ static inline size_t calculate_slab_order(struct kmem_cache *cachep,
1730 /* 1766 /*
1731 * Acceptable internal fragmentation? 1767 * Acceptable internal fragmentation?
1732 */ 1768 */
1733 if ((left_over * 8) <= (PAGE_SIZE << gfporder)) 1769 if (left_over * 8 <= (PAGE_SIZE << gfporder))
1734 break; 1770 break;
1735 } 1771 }
1736 return left_over; 1772 return left_over;
1737} 1773}
1738 1774
1775static void setup_cpu_cache(struct kmem_cache *cachep)
1776{
1777 if (g_cpucache_up == FULL) {
1778 enable_cpucache(cachep);
1779 return;
1780 }
1781 if (g_cpucache_up == NONE) {
1782 /*
1783 * Note: the first kmem_cache_create must create the cache
1784 * that's used by kmalloc(24), otherwise the creation of
1785 * further caches will BUG().
1786 */
1787 cachep->array[smp_processor_id()] = &initarray_generic.cache;
1788
1789 /*
1790 * If the cache that's used by kmalloc(sizeof(kmem_list3)) is
1791 * the first cache, then we need to set up all its list3s,
1792 * otherwise the creation of further caches will BUG().
1793 */
1794 set_up_list3s(cachep, SIZE_AC);
1795 if (INDEX_AC == INDEX_L3)
1796 g_cpucache_up = PARTIAL_L3;
1797 else
1798 g_cpucache_up = PARTIAL_AC;
1799 } else {
1800 cachep->array[smp_processor_id()] =
1801 kmalloc(sizeof(struct arraycache_init), GFP_KERNEL);
1802
1803 if (g_cpucache_up == PARTIAL_AC) {
1804 set_up_list3s(cachep, SIZE_L3);
1805 g_cpucache_up = PARTIAL_L3;
1806 } else {
1807 int node;
1808 for_each_online_node(node) {
1809 cachep->nodelists[node] =
1810 kmalloc_node(sizeof(struct kmem_list3),
1811 GFP_KERNEL, node);
1812 BUG_ON(!cachep->nodelists[node]);
1813 kmem_list3_init(cachep->nodelists[node]);
1814 }
1815 }
1816 }
1817 cachep->nodelists[numa_node_id()]->next_reap =
1818 jiffies + REAPTIMEOUT_LIST3 +
1819 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
1820
1821 cpu_cache_get(cachep)->avail = 0;
1822 cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES;
1823 cpu_cache_get(cachep)->batchcount = 1;
1824 cpu_cache_get(cachep)->touched = 0;
1825 cachep->batchcount = 1;
1826 cachep->limit = BOOT_CPUCACHE_ENTRIES;
1827}
1828
1739/** 1829/**
1740 * kmem_cache_create - Create a cache. 1830 * kmem_cache_create - Create a cache.
1741 * @name: A string which is used in /proc/slabinfo to identify this cache. 1831 * @name: A string which is used in /proc/slabinfo to identify this cache.
@@ -1751,9 +1841,8 @@ static inline size_t calculate_slab_order(struct kmem_cache *cachep,
1751 * and the @dtor is run before the pages are handed back. 1841 * and the @dtor is run before the pages are handed back.
1752 * 1842 *
1753 * @name must be valid until the cache is destroyed. This implies that 1843 * @name must be valid until the cache is destroyed. This implies that
1754 * the module calling this has to destroy the cache before getting 1844 * the module calling this has to destroy the cache before getting unloaded.
1755 * unloaded. 1845 *
1756 *
1757 * The flags are 1846 * The flags are
1758 * 1847 *
1759 * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5) 1848 * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
@@ -1762,16 +1851,14 @@ static inline size_t calculate_slab_order(struct kmem_cache *cachep,
1762 * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check 1851 * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
1763 * for buffer overruns. 1852 * for buffer overruns.
1764 * 1853 *
1765 * %SLAB_NO_REAP - Don't automatically reap this cache when we're under
1766 * memory pressure.
1767 *
1768 * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware 1854 * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
1769 * cacheline. This can be beneficial if you're counting cycles as closely 1855 * cacheline. This can be beneficial if you're counting cycles as closely
1770 * as davem. 1856 * as davem.
1771 */ 1857 */
1772struct kmem_cache * 1858struct kmem_cache *
1773kmem_cache_create (const char *name, size_t size, size_t align, 1859kmem_cache_create (const char *name, size_t size, size_t align,
1774 unsigned long flags, void (*ctor)(void*, struct kmem_cache *, unsigned long), 1860 unsigned long flags,
1861 void (*ctor)(void*, struct kmem_cache *, unsigned long),
1775 void (*dtor)(void*, struct kmem_cache *, unsigned long)) 1862 void (*dtor)(void*, struct kmem_cache *, unsigned long))
1776{ 1863{
1777 size_t left_over, slab_size, ralign; 1864 size_t left_over, slab_size, ralign;
@@ -1781,12 +1868,10 @@ kmem_cache_create (const char *name, size_t size, size_t align,
1781 /* 1868 /*
1782 * Sanity checks... these are all serious usage bugs. 1869 * Sanity checks... these are all serious usage bugs.
1783 */ 1870 */
1784 if ((!name) || 1871 if (!name || in_interrupt() || (size < BYTES_PER_WORD) ||
1785 in_interrupt() ||
1786 (size < BYTES_PER_WORD) ||
1787 (size > (1 << MAX_OBJ_ORDER) * PAGE_SIZE) || (dtor && !ctor)) { 1872 (size > (1 << MAX_OBJ_ORDER) * PAGE_SIZE) || (dtor && !ctor)) {
1788 printk(KERN_ERR "%s: Early error in slab %s\n", 1873 printk(KERN_ERR "%s: Early error in slab %s\n", __FUNCTION__,
1789 __FUNCTION__, name); 1874 name);
1790 BUG(); 1875 BUG();
1791 } 1876 }
1792 1877
@@ -1840,8 +1925,7 @@ kmem_cache_create (const char *name, size_t size, size_t align,
1840 * above the next power of two: caches with object sizes just above a 1925 * above the next power of two: caches with object sizes just above a
1841 * power of two have a significant amount of internal fragmentation. 1926 * power of two have a significant amount of internal fragmentation.
1842 */ 1927 */
1843 if ((size < 4096 1928 if (size < 4096 || fls(size - 1) == fls(size-1 + 3 * BYTES_PER_WORD))
1844 || fls(size - 1) == fls(size - 1 + 3 * BYTES_PER_WORD)))
1845 flags |= SLAB_RED_ZONE | SLAB_STORE_USER; 1929 flags |= SLAB_RED_ZONE | SLAB_STORE_USER;
1846 if (!(flags & SLAB_DESTROY_BY_RCU)) 1930 if (!(flags & SLAB_DESTROY_BY_RCU))
1847 flags |= SLAB_POISON; 1931 flags |= SLAB_POISON;
@@ -1853,13 +1937,14 @@ kmem_cache_create (const char *name, size_t size, size_t align,
1853 BUG_ON(dtor); 1937 BUG_ON(dtor);
1854 1938
1855 /* 1939 /*
1856 * Always checks flags, a caller might be expecting debug 1940 * Always checks flags, a caller might be expecting debug support which
1857 * support which isn't available. 1941 * isn't available.
1858 */ 1942 */
1859 if (flags & ~CREATE_MASK) 1943 if (flags & ~CREATE_MASK)
1860 BUG(); 1944 BUG();
1861 1945
1862 /* Check that size is in terms of words. This is needed to avoid 1946 /*
1947 * Check that size is in terms of words. This is needed to avoid
1863 * unaligned accesses for some archs when redzoning is used, and makes 1948 * unaligned accesses for some archs when redzoning is used, and makes
1864 * sure any on-slab bufctl's are also correctly aligned. 1949 * sure any on-slab bufctl's are also correctly aligned.
1865 */ 1950 */
@@ -1868,12 +1953,14 @@ kmem_cache_create (const char *name, size_t size, size_t align,
1868 size &= ~(BYTES_PER_WORD - 1); 1953 size &= ~(BYTES_PER_WORD - 1);
1869 } 1954 }
1870 1955
1871 /* calculate out the final buffer alignment: */ 1956 /* calculate the final buffer alignment: */
1957
1872 /* 1) arch recommendation: can be overridden for debug */ 1958 /* 1) arch recommendation: can be overridden for debug */
1873 if (flags & SLAB_HWCACHE_ALIGN) { 1959 if (flags & SLAB_HWCACHE_ALIGN) {
1874 /* Default alignment: as specified by the arch code. 1960 /*
1875 * Except if an object is really small, then squeeze multiple 1961 * Default alignment: as specified by the arch code. Except if
1876 * objects into one cacheline. 1962 * an object is really small, then squeeze multiple objects into
1963 * one cacheline.
1877 */ 1964 */
1878 ralign = cache_line_size(); 1965 ralign = cache_line_size();
1879 while (size <= ralign / 2) 1966 while (size <= ralign / 2)
@@ -1893,7 +1980,8 @@ kmem_cache_create (const char *name, size_t size, size_t align,
1893 if (ralign > BYTES_PER_WORD) 1980 if (ralign > BYTES_PER_WORD)
1894 flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); 1981 flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER);
1895 } 1982 }
1896 /* 4) Store it. Note that the debug code below can reduce 1983 /*
1984 * 4) Store it. Note that the debug code below can reduce
1897 * the alignment to BYTES_PER_WORD. 1985 * the alignment to BYTES_PER_WORD.
1898 */ 1986 */
1899 align = ralign; 1987 align = ralign;
@@ -1978,7 +2066,6 @@ kmem_cache_create (const char *name, size_t size, size_t align,
1978 cachep->gfpflags = 0; 2066 cachep->gfpflags = 0;
1979 if (flags & SLAB_CACHE_DMA) 2067 if (flags & SLAB_CACHE_DMA)
1980 cachep->gfpflags |= GFP_DMA; 2068 cachep->gfpflags |= GFP_DMA;
1981 spin_lock_init(&cachep->spinlock);
1982 cachep->buffer_size = size; 2069 cachep->buffer_size = size;
1983 2070
1984 if (flags & CFLGS_OFF_SLAB) 2071 if (flags & CFLGS_OFF_SLAB)
@@ -1988,64 +2075,11 @@ kmem_cache_create (const char *name, size_t size, size_t align,
1988 cachep->name = name; 2075 cachep->name = name;
1989 2076
1990 2077
1991 if (g_cpucache_up == FULL) { 2078 setup_cpu_cache(cachep);
1992 enable_cpucache(cachep);
1993 } else {
1994 if (g_cpucache_up == NONE) {
1995 /* Note: the first kmem_cache_create must create
1996 * the cache that's used by kmalloc(24), otherwise
1997 * the creation of further caches will BUG().
1998 */
1999 cachep->array[smp_processor_id()] =
2000 &initarray_generic.cache;
2001
2002 /* If the cache that's used by
2003 * kmalloc(sizeof(kmem_list3)) is the first cache,
2004 * then we need to set up all its list3s, otherwise
2005 * the creation of further caches will BUG().
2006 */
2007 set_up_list3s(cachep, SIZE_AC);
2008 if (INDEX_AC == INDEX_L3)
2009 g_cpucache_up = PARTIAL_L3;
2010 else
2011 g_cpucache_up = PARTIAL_AC;
2012 } else {
2013 cachep->array[smp_processor_id()] =
2014 kmalloc(sizeof(struct arraycache_init), GFP_KERNEL);
2015
2016 if (g_cpucache_up == PARTIAL_AC) {
2017 set_up_list3s(cachep, SIZE_L3);
2018 g_cpucache_up = PARTIAL_L3;
2019 } else {
2020 int node;
2021 for_each_online_node(node) {
2022
2023 cachep->nodelists[node] =
2024 kmalloc_node(sizeof
2025 (struct kmem_list3),
2026 GFP_KERNEL, node);
2027 BUG_ON(!cachep->nodelists[node]);
2028 kmem_list3_init(cachep->
2029 nodelists[node]);
2030 }
2031 }
2032 }
2033 cachep->nodelists[numa_node_id()]->next_reap =
2034 jiffies + REAPTIMEOUT_LIST3 +
2035 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
2036
2037 BUG_ON(!cpu_cache_get(cachep));
2038 cpu_cache_get(cachep)->avail = 0;
2039 cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES;
2040 cpu_cache_get(cachep)->batchcount = 1;
2041 cpu_cache_get(cachep)->touched = 0;
2042 cachep->batchcount = 1;
2043 cachep->limit = BOOT_CPUCACHE_ENTRIES;
2044 }
2045 2079
2046 /* cache setup completed, link it into the list */ 2080 /* cache setup completed, link it into the list */
2047 list_add(&cachep->next, &cache_chain); 2081 list_add(&cachep->next, &cache_chain);
2048 oops: 2082oops:
2049 if (!cachep && (flags & SLAB_PANIC)) 2083 if (!cachep && (flags & SLAB_PANIC))
2050 panic("kmem_cache_create(): failed to create slab `%s'\n", 2084 panic("kmem_cache_create(): failed to create slab `%s'\n",
2051 name); 2085 name);
@@ -2089,30 +2123,13 @@ static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node)
2089#define check_spinlock_acquired_node(x, y) do { } while(0) 2123#define check_spinlock_acquired_node(x, y) do { } while(0)
2090#endif 2124#endif
2091 2125
2092/* 2126static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
2093 * Waits for all CPUs to execute func(). 2127 struct array_cache *ac,
2094 */ 2128 int force, int node);
2095static void smp_call_function_all_cpus(void (*func)(void *arg), void *arg)
2096{
2097 check_irq_on();
2098 preempt_disable();
2099
2100 local_irq_disable();
2101 func(arg);
2102 local_irq_enable();
2103
2104 if (smp_call_function(func, arg, 1, 1))
2105 BUG();
2106
2107 preempt_enable();
2108}
2109
2110static void drain_array_locked(struct kmem_cache *cachep, struct array_cache *ac,
2111 int force, int node);
2112 2129
2113static void do_drain(void *arg) 2130static void do_drain(void *arg)
2114{ 2131{
2115 struct kmem_cache *cachep = (struct kmem_cache *) arg; 2132 struct kmem_cache *cachep = arg;
2116 struct array_cache *ac; 2133 struct array_cache *ac;
2117 int node = numa_node_id(); 2134 int node = numa_node_id();
2118 2135
@@ -2129,14 +2146,12 @@ static void drain_cpu_caches(struct kmem_cache *cachep)
2129 struct kmem_list3 *l3; 2146 struct kmem_list3 *l3;
2130 int node; 2147 int node;
2131 2148
2132 smp_call_function_all_cpus(do_drain, cachep); 2149 on_each_cpu(do_drain, cachep, 1, 1);
2133 check_irq_on(); 2150 check_irq_on();
2134 for_each_online_node(node) { 2151 for_each_online_node(node) {
2135 l3 = cachep->nodelists[node]; 2152 l3 = cachep->nodelists[node];
2136 if (l3) { 2153 if (l3) {
2137 spin_lock_irq(&l3->list_lock); 2154 drain_array(cachep, l3, l3->shared, 1, node);
2138 drain_array_locked(cachep, l3->shared, 1, node);
2139 spin_unlock_irq(&l3->list_lock);
2140 if (l3->alien) 2155 if (l3->alien)
2141 drain_alien_cache(cachep, l3->alien); 2156 drain_alien_cache(cachep, l3->alien);
2142 } 2157 }
@@ -2260,16 +2275,15 @@ int kmem_cache_destroy(struct kmem_cache *cachep)
2260 2275
2261 /* NUMA: free the list3 structures */ 2276 /* NUMA: free the list3 structures */
2262 for_each_online_node(i) { 2277 for_each_online_node(i) {
2263 if ((l3 = cachep->nodelists[i])) { 2278 l3 = cachep->nodelists[i];
2279 if (l3) {
2264 kfree(l3->shared); 2280 kfree(l3->shared);
2265 free_alien_cache(l3->alien); 2281 free_alien_cache(l3->alien);
2266 kfree(l3); 2282 kfree(l3);
2267 } 2283 }
2268 } 2284 }
2269 kmem_cache_free(&cache_cache, cachep); 2285 kmem_cache_free(&cache_cache, cachep);
2270
2271 unlock_cpu_hotplug(); 2286 unlock_cpu_hotplug();
2272
2273 return 0; 2287 return 0;
2274} 2288}
2275EXPORT_SYMBOL(kmem_cache_destroy); 2289EXPORT_SYMBOL(kmem_cache_destroy);
@@ -2292,7 +2306,6 @@ static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp,
2292 slabp->inuse = 0; 2306 slabp->inuse = 0;
2293 slabp->colouroff = colour_off; 2307 slabp->colouroff = colour_off;
2294 slabp->s_mem = objp + colour_off; 2308 slabp->s_mem = objp + colour_off;
2295
2296 return slabp; 2309 return slabp;
2297} 2310}
2298 2311
@@ -2307,7 +2320,7 @@ static void cache_init_objs(struct kmem_cache *cachep,
2307 int i; 2320 int i;
2308 2321
2309 for (i = 0; i < cachep->num; i++) { 2322 for (i = 0; i < cachep->num; i++) {
2310 void *objp = slabp->s_mem + cachep->buffer_size * i; 2323 void *objp = index_to_obj(cachep, slabp, i);
2311#if DEBUG 2324#if DEBUG
2312 /* need to poison the objs? */ 2325 /* need to poison the objs? */
2313 if (cachep->flags & SLAB_POISON) 2326 if (cachep->flags & SLAB_POISON)
@@ -2320,9 +2333,9 @@ static void cache_init_objs(struct kmem_cache *cachep,
2320 *dbg_redzone2(cachep, objp) = RED_INACTIVE; 2333 *dbg_redzone2(cachep, objp) = RED_INACTIVE;
2321 } 2334 }
2322 /* 2335 /*
2323 * Constructors are not allowed to allocate memory from 2336 * Constructors are not allowed to allocate memory from the same
2324 * the same cache which they are a constructor for. 2337 * cache which they are a constructor for. Otherwise, deadlock.
2325 * Otherwise, deadlock. They must also be threaded. 2338 * They must also be threaded.
2326 */ 2339 */
2327 if (cachep->ctor && !(cachep->flags & SLAB_POISON)) 2340 if (cachep->ctor && !(cachep->flags & SLAB_POISON))
2328 cachep->ctor(objp + obj_offset(cachep), cachep, 2341 cachep->ctor(objp + obj_offset(cachep), cachep,
@@ -2336,8 +2349,8 @@ static void cache_init_objs(struct kmem_cache *cachep,
2336 slab_error(cachep, "constructor overwrote the" 2349 slab_error(cachep, "constructor overwrote the"
2337 " start of an object"); 2350 " start of an object");
2338 } 2351 }
2339 if ((cachep->buffer_size % PAGE_SIZE) == 0 && OFF_SLAB(cachep) 2352 if ((cachep->buffer_size % PAGE_SIZE) == 0 &&
2340 && cachep->flags & SLAB_POISON) 2353 OFF_SLAB(cachep) && cachep->flags & SLAB_POISON)
2341 kernel_map_pages(virt_to_page(objp), 2354 kernel_map_pages(virt_to_page(objp),
2342 cachep->buffer_size / PAGE_SIZE, 0); 2355 cachep->buffer_size / PAGE_SIZE, 0);
2343#else 2356#else
@@ -2352,18 +2365,16 @@ static void cache_init_objs(struct kmem_cache *cachep,
2352 2365
2353static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags) 2366static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags)
2354{ 2367{
2355 if (flags & SLAB_DMA) { 2368 if (flags & SLAB_DMA)
2356 if (!(cachep->gfpflags & GFP_DMA)) 2369 BUG_ON(!(cachep->gfpflags & GFP_DMA));
2357 BUG(); 2370 else
2358 } else { 2371 BUG_ON(cachep->gfpflags & GFP_DMA);
2359 if (cachep->gfpflags & GFP_DMA)
2360 BUG();
2361 }
2362} 2372}
2363 2373
2364static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp, int nodeid) 2374static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp,
2375 int nodeid)
2365{ 2376{
2366 void *objp = slabp->s_mem + (slabp->free * cachep->buffer_size); 2377 void *objp = index_to_obj(cachep, slabp, slabp->free);
2367 kmem_bufctl_t next; 2378 kmem_bufctl_t next;
2368 2379
2369 slabp->inuse++; 2380 slabp->inuse++;
@@ -2377,10 +2388,10 @@ static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp, int nod
2377 return objp; 2388 return objp;
2378} 2389}
2379 2390
2380static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp, void *objp, 2391static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp,
2381 int nodeid) 2392 void *objp, int nodeid)
2382{ 2393{
2383 unsigned int objnr = (unsigned)(objp-slabp->s_mem) / cachep->buffer_size; 2394 unsigned int objnr = obj_to_index(cachep, slabp, objp);
2384 2395
2385#if DEBUG 2396#if DEBUG
2386 /* Verify that the slab belongs to the intended node */ 2397 /* Verify that the slab belongs to the intended node */
@@ -2388,7 +2399,7 @@ static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp, void *ob
2388 2399
2389 if (slab_bufctl(slabp)[objnr] != BUFCTL_FREE) { 2400 if (slab_bufctl(slabp)[objnr] != BUFCTL_FREE) {
2390 printk(KERN_ERR "slab: double free detected in cache " 2401 printk(KERN_ERR "slab: double free detected in cache "
2391 "'%s', objp %p\n", cachep->name, objp); 2402 "'%s', objp %p\n", cachep->name, objp);
2392 BUG(); 2403 BUG();
2393 } 2404 }
2394#endif 2405#endif
@@ -2397,14 +2408,18 @@ static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp, void *ob
2397 slabp->inuse--; 2408 slabp->inuse--;
2398} 2409}
2399 2410
2400static void set_slab_attr(struct kmem_cache *cachep, struct slab *slabp, void *objp) 2411static void set_slab_attr(struct kmem_cache *cachep, struct slab *slabp,
2412 void *objp)
2401{ 2413{
2402 int i; 2414 int i;
2403 struct page *page; 2415 struct page *page;
2404 2416
2405 /* Nasty!!!!!! I hope this is OK. */ 2417 /* Nasty!!!!!! I hope this is OK. */
2406 i = 1 << cachep->gfporder;
2407 page = virt_to_page(objp); 2418 page = virt_to_page(objp);
2419
2420 i = 1;
2421 if (likely(!PageCompound(page)))
2422 i <<= cachep->gfporder;
2408 do { 2423 do {
2409 page_set_cache(page, cachep); 2424 page_set_cache(page, cachep);
2410 page_set_slab(page, slabp); 2425 page_set_slab(page, slabp);
@@ -2425,8 +2440,9 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid)
2425 unsigned long ctor_flags; 2440 unsigned long ctor_flags;
2426 struct kmem_list3 *l3; 2441 struct kmem_list3 *l3;
2427 2442
2428 /* Be lazy and only check for valid flags here, 2443 /*
2429 * keeping it out of the critical path in kmem_cache_alloc(). 2444 * Be lazy and only check for valid flags here, keeping it out of the
2445 * critical path in kmem_cache_alloc().
2430 */ 2446 */
2431 if (flags & ~(SLAB_DMA | SLAB_LEVEL_MASK | SLAB_NO_GROW)) 2447 if (flags & ~(SLAB_DMA | SLAB_LEVEL_MASK | SLAB_NO_GROW))
2432 BUG(); 2448 BUG();
@@ -2467,14 +2483,17 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid)
2467 */ 2483 */
2468 kmem_flagcheck(cachep, flags); 2484 kmem_flagcheck(cachep, flags);
2469 2485
2470 /* Get mem for the objs. 2486 /*
2471 * Attempt to allocate a physical page from 'nodeid', 2487 * Get mem for the objs. Attempt to allocate a physical page from
2488 * 'nodeid'.
2472 */ 2489 */
2473 if (!(objp = kmem_getpages(cachep, flags, nodeid))) 2490 objp = kmem_getpages(cachep, flags, nodeid);
2491 if (!objp)
2474 goto failed; 2492 goto failed;
2475 2493
2476 /* Get slab management. */ 2494 /* Get slab management. */
2477 if (!(slabp = alloc_slabmgmt(cachep, objp, offset, local_flags))) 2495 slabp = alloc_slabmgmt(cachep, objp, offset, local_flags);
2496 if (!slabp)
2478 goto opps1; 2497 goto opps1;
2479 2498
2480 slabp->nodeid = nodeid; 2499 slabp->nodeid = nodeid;
@@ -2493,9 +2512,9 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid)
2493 l3->free_objects += cachep->num; 2512 l3->free_objects += cachep->num;
2494 spin_unlock(&l3->list_lock); 2513 spin_unlock(&l3->list_lock);
2495 return 1; 2514 return 1;
2496 opps1: 2515opps1:
2497 kmem_freepages(cachep, objp); 2516 kmem_freepages(cachep, objp);
2498 failed: 2517failed:
2499 if (local_flags & __GFP_WAIT) 2518 if (local_flags & __GFP_WAIT)
2500 local_irq_disable(); 2519 local_irq_disable();
2501 return 0; 2520 return 0;
@@ -2538,8 +2557,8 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
2538 page = virt_to_page(objp); 2557 page = virt_to_page(objp);
2539 2558
2540 if (page_get_cache(page) != cachep) { 2559 if (page_get_cache(page) != cachep) {
2541 printk(KERN_ERR 2560 printk(KERN_ERR "mismatch in kmem_cache_free: expected "
2542 "mismatch in kmem_cache_free: expected cache %p, got %p\n", 2561 "cache %p, got %p\n",
2543 page_get_cache(page), cachep); 2562 page_get_cache(page), cachep);
2544 printk(KERN_ERR "%p is %s.\n", cachep, cachep->name); 2563 printk(KERN_ERR "%p is %s.\n", cachep, cachep->name);
2545 printk(KERN_ERR "%p is %s.\n", page_get_cache(page), 2564 printk(KERN_ERR "%p is %s.\n", page_get_cache(page),
@@ -2549,13 +2568,12 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
2549 slabp = page_get_slab(page); 2568 slabp = page_get_slab(page);
2550 2569
2551 if (cachep->flags & SLAB_RED_ZONE) { 2570 if (cachep->flags & SLAB_RED_ZONE) {
2552 if (*dbg_redzone1(cachep, objp) != RED_ACTIVE 2571 if (*dbg_redzone1(cachep, objp) != RED_ACTIVE ||
2553 || *dbg_redzone2(cachep, objp) != RED_ACTIVE) { 2572 *dbg_redzone2(cachep, objp) != RED_ACTIVE) {
2554 slab_error(cachep, 2573 slab_error(cachep, "double free, or memory outside"
2555 "double free, or memory outside" 2574 " object was overwritten");
2556 " object was overwritten"); 2575 printk(KERN_ERR "%p: redzone 1:0x%lx, "
2557 printk(KERN_ERR 2576 "redzone 2:0x%lx.\n",
2558 "%p: redzone 1: 0x%lx, redzone 2: 0x%lx.\n",
2559 objp, *dbg_redzone1(cachep, objp), 2577 objp, *dbg_redzone1(cachep, objp),
2560 *dbg_redzone2(cachep, objp)); 2578 *dbg_redzone2(cachep, objp));
2561 } 2579 }
@@ -2565,15 +2583,16 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
2565 if (cachep->flags & SLAB_STORE_USER) 2583 if (cachep->flags & SLAB_STORE_USER)
2566 *dbg_userword(cachep, objp) = caller; 2584 *dbg_userword(cachep, objp) = caller;
2567 2585
2568 objnr = (unsigned)(objp - slabp->s_mem) / cachep->buffer_size; 2586 objnr = obj_to_index(cachep, slabp, objp);
2569 2587
2570 BUG_ON(objnr >= cachep->num); 2588 BUG_ON(objnr >= cachep->num);
2571 BUG_ON(objp != slabp->s_mem + objnr * cachep->buffer_size); 2589 BUG_ON(objp != index_to_obj(cachep, slabp, objnr));
2572 2590
2573 if (cachep->flags & SLAB_DEBUG_INITIAL) { 2591 if (cachep->flags & SLAB_DEBUG_INITIAL) {
2574 /* Need to call the slab's constructor so the 2592 /*
2575 * caller can perform a verify of its state (debugging). 2593 * Need to call the slab's constructor so the caller can
2576 * Called without the cache-lock held. 2594 * perform a verify of its state (debugging). Called without
2595 * the cache-lock held.
2577 */ 2596 */
2578 cachep->ctor(objp + obj_offset(cachep), 2597 cachep->ctor(objp + obj_offset(cachep),
2579 cachep, SLAB_CTOR_CONSTRUCTOR | SLAB_CTOR_VERIFY); 2598 cachep, SLAB_CTOR_CONSTRUCTOR | SLAB_CTOR_VERIFY);
@@ -2586,7 +2605,7 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
2586 } 2605 }
2587 if (cachep->flags & SLAB_POISON) { 2606 if (cachep->flags & SLAB_POISON) {
2588#ifdef CONFIG_DEBUG_PAGEALLOC 2607#ifdef CONFIG_DEBUG_PAGEALLOC
2589 if ((cachep->buffer_size % PAGE_SIZE) == 0 && OFF_SLAB(cachep)) { 2608 if ((cachep->buffer_size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) {
2590 store_stackinfo(cachep, objp, (unsigned long)caller); 2609 store_stackinfo(cachep, objp, (unsigned long)caller);
2591 kernel_map_pages(virt_to_page(objp), 2610 kernel_map_pages(virt_to_page(objp),
2592 cachep->buffer_size / PAGE_SIZE, 0); 2611 cachep->buffer_size / PAGE_SIZE, 0);
@@ -2612,14 +2631,14 @@ static void check_slabp(struct kmem_cache *cachep, struct slab *slabp)
2612 goto bad; 2631 goto bad;
2613 } 2632 }
2614 if (entries != cachep->num - slabp->inuse) { 2633 if (entries != cachep->num - slabp->inuse) {
2615 bad: 2634bad:
2616 printk(KERN_ERR 2635 printk(KERN_ERR "slab: Internal list corruption detected in "
2617 "slab: Internal list corruption detected in cache '%s'(%d), slabp %p(%d). Hexdump:\n", 2636 "cache '%s'(%d), slabp %p(%d). Hexdump:\n",
2618 cachep->name, cachep->num, slabp, slabp->inuse); 2637 cachep->name, cachep->num, slabp, slabp->inuse);
2619 for (i = 0; 2638 for (i = 0;
2620 i < sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t); 2639 i < sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t);
2621 i++) { 2640 i++) {
2622 if ((i % 16) == 0) 2641 if (i % 16 == 0)
2623 printk("\n%03x:", i); 2642 printk("\n%03x:", i);
2624 printk(" %02x", ((unsigned char *)slabp)[i]); 2643 printk(" %02x", ((unsigned char *)slabp)[i]);
2625 } 2644 }
@@ -2641,12 +2660,13 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
2641 2660
2642 check_irq_off(); 2661 check_irq_off();
2643 ac = cpu_cache_get(cachep); 2662 ac = cpu_cache_get(cachep);
2644 retry: 2663retry:
2645 batchcount = ac->batchcount; 2664 batchcount = ac->batchcount;
2646 if (!ac->touched && batchcount > BATCHREFILL_LIMIT) { 2665 if (!ac->touched && batchcount > BATCHREFILL_LIMIT) {
2647 /* if there was little recent activity on this 2666 /*
2648 * cache, then perform only a partial refill. 2667 * If there was little recent activity on this cache, then
2649 * Otherwise we could generate refill bouncing. 2668 * perform only a partial refill. Otherwise we could generate
2669 * refill bouncing.
2650 */ 2670 */
2651 batchcount = BATCHREFILL_LIMIT; 2671 batchcount = BATCHREFILL_LIMIT;
2652 } 2672 }
@@ -2702,29 +2722,29 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags)
2702 list_add(&slabp->list, &l3->slabs_partial); 2722 list_add(&slabp->list, &l3->slabs_partial);
2703 } 2723 }
2704 2724
2705 must_grow: 2725must_grow:
2706 l3->free_objects -= ac->avail; 2726 l3->free_objects -= ac->avail;
2707 alloc_done: 2727alloc_done:
2708 spin_unlock(&l3->list_lock); 2728 spin_unlock(&l3->list_lock);
2709 2729
2710 if (unlikely(!ac->avail)) { 2730 if (unlikely(!ac->avail)) {
2711 int x; 2731 int x;
2712 x = cache_grow(cachep, flags, numa_node_id()); 2732 x = cache_grow(cachep, flags, numa_node_id());
2713 2733
2714 // cache_grow can reenable interrupts, then ac could change. 2734 /* cache_grow can reenable interrupts, then ac could change. */
2715 ac = cpu_cache_get(cachep); 2735 ac = cpu_cache_get(cachep);
2716 if (!x && ac->avail == 0) // no objects in sight? abort 2736 if (!x && ac->avail == 0) /* no objects in sight? abort */
2717 return NULL; 2737 return NULL;
2718 2738
2719 if (!ac->avail) // objects refilled by interrupt? 2739 if (!ac->avail) /* objects refilled by interrupt? */
2720 goto retry; 2740 goto retry;
2721 } 2741 }
2722 ac->touched = 1; 2742 ac->touched = 1;
2723 return ac->entry[--ac->avail]; 2743 return ac->entry[--ac->avail];
2724} 2744}
2725 2745
2726static inline void 2746static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep,
2727cache_alloc_debugcheck_before(struct kmem_cache *cachep, gfp_t flags) 2747 gfp_t flags)
2728{ 2748{
2729 might_sleep_if(flags & __GFP_WAIT); 2749 might_sleep_if(flags & __GFP_WAIT);
2730#if DEBUG 2750#if DEBUG
@@ -2733,8 +2753,8 @@ cache_alloc_debugcheck_before(struct kmem_cache *cachep, gfp_t flags)
2733} 2753}
2734 2754
2735#if DEBUG 2755#if DEBUG
2736static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, gfp_t flags, 2756static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
2737 void *objp, void *caller) 2757 gfp_t flags, void *objp, void *caller)
2738{ 2758{
2739 if (!objp) 2759 if (!objp)
2740 return objp; 2760 return objp;
@@ -2754,15 +2774,14 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, gfp_t flags
2754 *dbg_userword(cachep, objp) = caller; 2774 *dbg_userword(cachep, objp) = caller;
2755 2775
2756 if (cachep->flags & SLAB_RED_ZONE) { 2776 if (cachep->flags & SLAB_RED_ZONE) {
2757 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE 2777 if (*dbg_redzone1(cachep, objp) != RED_INACTIVE ||
2758 || *dbg_redzone2(cachep, objp) != RED_INACTIVE) { 2778 *dbg_redzone2(cachep, objp) != RED_INACTIVE) {
2759 slab_error(cachep, 2779 slab_error(cachep, "double free, or memory outside"
2760 "double free, or memory outside" 2780 " object was overwritten");
2761 " object was overwritten");
2762 printk(KERN_ERR 2781 printk(KERN_ERR
2763 "%p: redzone 1: 0x%lx, redzone 2: 0x%lx.\n", 2782 "%p: redzone 1:0x%lx, redzone 2:0x%lx\n",
2764 objp, *dbg_redzone1(cachep, objp), 2783 objp, *dbg_redzone1(cachep, objp),
2765 *dbg_redzone2(cachep, objp)); 2784 *dbg_redzone2(cachep, objp));
2766 } 2785 }
2767 *dbg_redzone1(cachep, objp) = RED_ACTIVE; 2786 *dbg_redzone1(cachep, objp) = RED_ACTIVE;
2768 *dbg_redzone2(cachep, objp) = RED_ACTIVE; 2787 *dbg_redzone2(cachep, objp) = RED_ACTIVE;
@@ -2809,8 +2828,8 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
2809 return objp; 2828 return objp;
2810} 2829}
2811 2830
2812static __always_inline void * 2831static __always_inline void *__cache_alloc(struct kmem_cache *cachep,
2813__cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller) 2832 gfp_t flags, void *caller)
2814{ 2833{
2815 unsigned long save_flags; 2834 unsigned long save_flags;
2816 void *objp; 2835 void *objp;
@@ -2830,7 +2849,8 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller)
2830/* 2849/*
2831 * A interface to enable slab creation on nodeid 2850 * A interface to enable slab creation on nodeid
2832 */ 2851 */
2833static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) 2852static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
2853 int nodeid)
2834{ 2854{
2835 struct list_head *entry; 2855 struct list_head *entry;
2836 struct slab *slabp; 2856 struct slab *slabp;
@@ -2841,7 +2861,7 @@ static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int node
2841 l3 = cachep->nodelists[nodeid]; 2861 l3 = cachep->nodelists[nodeid];
2842 BUG_ON(!l3); 2862 BUG_ON(!l3);
2843 2863
2844 retry: 2864retry:
2845 check_irq_off(); 2865 check_irq_off();
2846 spin_lock(&l3->list_lock); 2866 spin_lock(&l3->list_lock);
2847 entry = l3->slabs_partial.next; 2867 entry = l3->slabs_partial.next;
@@ -2868,16 +2888,15 @@ static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int node
2868 /* move slabp to correct slabp list: */ 2888 /* move slabp to correct slabp list: */
2869 list_del(&slabp->list); 2889 list_del(&slabp->list);
2870 2890
2871 if (slabp->free == BUFCTL_END) { 2891 if (slabp->free == BUFCTL_END)
2872 list_add(&slabp->list, &l3->slabs_full); 2892 list_add(&slabp->list, &l3->slabs_full);
2873 } else { 2893 else
2874 list_add(&slabp->list, &l3->slabs_partial); 2894 list_add(&slabp->list, &l3->slabs_partial);
2875 }
2876 2895
2877 spin_unlock(&l3->list_lock); 2896 spin_unlock(&l3->list_lock);
2878 goto done; 2897 goto done;
2879 2898
2880 must_grow: 2899must_grow:
2881 spin_unlock(&l3->list_lock); 2900 spin_unlock(&l3->list_lock);
2882 x = cache_grow(cachep, flags, nodeid); 2901 x = cache_grow(cachep, flags, nodeid);
2883 2902
@@ -2885,7 +2904,7 @@ static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int node
2885 return NULL; 2904 return NULL;
2886 2905
2887 goto retry; 2906 goto retry;
2888 done: 2907done:
2889 return obj; 2908 return obj;
2890} 2909}
2891#endif 2910#endif
@@ -2958,7 +2977,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
2958 } 2977 }
2959 2978
2960 free_block(cachep, ac->entry, batchcount, node); 2979 free_block(cachep, ac->entry, batchcount, node);
2961 free_done: 2980free_done:
2962#if STATS 2981#if STATS
2963 { 2982 {
2964 int i = 0; 2983 int i = 0;
@@ -2979,16 +2998,12 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
2979#endif 2998#endif
2980 spin_unlock(&l3->list_lock); 2999 spin_unlock(&l3->list_lock);
2981 ac->avail -= batchcount; 3000 ac->avail -= batchcount;
2982 memmove(ac->entry, &(ac->entry[batchcount]), 3001 memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail);
2983 sizeof(void *) * ac->avail);
2984} 3002}
2985 3003
2986/* 3004/*
2987 * __cache_free 3005 * Release an obj back to its cache. If the obj has a constructed state, it must
2988 * Release an obj back to its cache. If the obj has a constructed 3006 * be in this state _before_ it is released. Called with disabled ints.
2989 * state, it must be in this state _before_ it is released.
2990 *
2991 * Called with disabled ints.
2992 */ 3007 */
2993static inline void __cache_free(struct kmem_cache *cachep, void *objp) 3008static inline void __cache_free(struct kmem_cache *cachep, void *objp)
2994{ 3009{
@@ -3007,9 +3022,9 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp)
3007 if (unlikely(slabp->nodeid != numa_node_id())) { 3022 if (unlikely(slabp->nodeid != numa_node_id())) {
3008 struct array_cache *alien = NULL; 3023 struct array_cache *alien = NULL;
3009 int nodeid = slabp->nodeid; 3024 int nodeid = slabp->nodeid;
3010 struct kmem_list3 *l3 = 3025 struct kmem_list3 *l3;
3011 cachep->nodelists[numa_node_id()];
3012 3026
3027 l3 = cachep->nodelists[numa_node_id()];
3013 STATS_INC_NODEFREES(cachep); 3028 STATS_INC_NODEFREES(cachep);
3014 if (l3->alien && l3->alien[nodeid]) { 3029 if (l3->alien && l3->alien[nodeid]) {
3015 alien = l3->alien[nodeid]; 3030 alien = l3->alien[nodeid];
@@ -3093,7 +3108,7 @@ int fastcall kmem_ptr_validate(struct kmem_cache *cachep, void *ptr)
3093 if (unlikely(page_get_cache(page) != cachep)) 3108 if (unlikely(page_get_cache(page) != cachep))
3094 goto out; 3109 goto out;
3095 return 1; 3110 return 1;
3096 out: 3111out:
3097 return 0; 3112 return 0;
3098} 3113}
3099 3114
@@ -3119,7 +3134,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
3119 local_irq_save(save_flags); 3134 local_irq_save(save_flags);
3120 3135
3121 if (nodeid == -1 || nodeid == numa_node_id() || 3136 if (nodeid == -1 || nodeid == numa_node_id() ||
3122 !cachep->nodelists[nodeid]) 3137 !cachep->nodelists[nodeid])
3123 ptr = ____cache_alloc(cachep, flags); 3138 ptr = ____cache_alloc(cachep, flags);
3124 else 3139 else
3125 ptr = __cache_alloc_node(cachep, flags, nodeid); 3140 ptr = __cache_alloc_node(cachep, flags, nodeid);
@@ -3148,6 +3163,7 @@ EXPORT_SYMBOL(kmalloc_node);
3148 * kmalloc - allocate memory 3163 * kmalloc - allocate memory
3149 * @size: how many bytes of memory are required. 3164 * @size: how many bytes of memory are required.
3150 * @flags: the type of memory to allocate. 3165 * @flags: the type of memory to allocate.
3166 * @caller: function caller for debug tracking of the caller
3151 * 3167 *
3152 * kmalloc is the normal method of allocating memory 3168 * kmalloc is the normal method of allocating memory
3153 * in the kernel. 3169 * in the kernel.
@@ -3236,7 +3252,7 @@ void *__alloc_percpu(size_t size)
3236 /* Catch derefs w/o wrappers */ 3252 /* Catch derefs w/o wrappers */
3237 return (void *)(~(unsigned long)pdata); 3253 return (void *)(~(unsigned long)pdata);
3238 3254
3239 unwind_oom: 3255unwind_oom:
3240 while (--i >= 0) { 3256 while (--i >= 0) {
3241 if (!cpu_possible(i)) 3257 if (!cpu_possible(i))
3242 continue; 3258 continue;
@@ -3339,18 +3355,20 @@ static int alloc_kmemlist(struct kmem_cache *cachep)
3339 struct array_cache *nc = NULL, *new; 3355 struct array_cache *nc = NULL, *new;
3340 struct array_cache **new_alien = NULL; 3356 struct array_cache **new_alien = NULL;
3341#ifdef CONFIG_NUMA 3357#ifdef CONFIG_NUMA
3342 if (!(new_alien = alloc_alien_cache(node, cachep->limit))) 3358 new_alien = alloc_alien_cache(node, cachep->limit);
3359 if (!new_alien)
3343 goto fail; 3360 goto fail;
3344#endif 3361#endif
3345 if (!(new = alloc_arraycache(node, (cachep->shared * 3362 new = alloc_arraycache(node, cachep->shared*cachep->batchcount,
3346 cachep->batchcount), 3363 0xbaadf00d);
3347 0xbaadf00d))) 3364 if (!new)
3348 goto fail; 3365 goto fail;
3349 if ((l3 = cachep->nodelists[node])) { 3366 l3 = cachep->nodelists[node];
3350 3367 if (l3) {
3351 spin_lock_irq(&l3->list_lock); 3368 spin_lock_irq(&l3->list_lock);
3352 3369
3353 if ((nc = cachep->nodelists[node]->shared)) 3370 nc = cachep->nodelists[node]->shared;
3371 if (nc)
3354 free_block(cachep, nc->entry, nc->avail, node); 3372 free_block(cachep, nc->entry, nc->avail, node);
3355 3373
3356 l3->shared = new; 3374 l3->shared = new;
@@ -3359,27 +3377,27 @@ static int alloc_kmemlist(struct kmem_cache *cachep)
3359 new_alien = NULL; 3377 new_alien = NULL;
3360 } 3378 }
3361 l3->free_limit = (1 + nr_cpus_node(node)) * 3379 l3->free_limit = (1 + nr_cpus_node(node)) *
3362 cachep->batchcount + cachep->num; 3380 cachep->batchcount + cachep->num;
3363 spin_unlock_irq(&l3->list_lock); 3381 spin_unlock_irq(&l3->list_lock);
3364 kfree(nc); 3382 kfree(nc);
3365 free_alien_cache(new_alien); 3383 free_alien_cache(new_alien);
3366 continue; 3384 continue;
3367 } 3385 }
3368 if (!(l3 = kmalloc_node(sizeof(struct kmem_list3), 3386 l3 = kmalloc_node(sizeof(struct kmem_list3), GFP_KERNEL, node);
3369 GFP_KERNEL, node))) 3387 if (!l3)
3370 goto fail; 3388 goto fail;
3371 3389
3372 kmem_list3_init(l3); 3390 kmem_list3_init(l3);
3373 l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + 3391 l3->next_reap = jiffies + REAPTIMEOUT_LIST3 +
3374 ((unsigned long)cachep) % REAPTIMEOUT_LIST3; 3392 ((unsigned long)cachep) % REAPTIMEOUT_LIST3;
3375 l3->shared = new; 3393 l3->shared = new;
3376 l3->alien = new_alien; 3394 l3->alien = new_alien;
3377 l3->free_limit = (1 + nr_cpus_node(node)) * 3395 l3->free_limit = (1 + nr_cpus_node(node)) *
3378 cachep->batchcount + cachep->num; 3396 cachep->batchcount + cachep->num;
3379 cachep->nodelists[node] = l3; 3397 cachep->nodelists[node] = l3;
3380 } 3398 }
3381 return err; 3399 return err;
3382 fail: 3400fail:
3383 err = -ENOMEM; 3401 err = -ENOMEM;
3384 return err; 3402 return err;
3385} 3403}
@@ -3391,7 +3409,7 @@ struct ccupdate_struct {
3391 3409
3392static void do_ccupdate_local(void *info) 3410static void do_ccupdate_local(void *info)
3393{ 3411{
3394 struct ccupdate_struct *new = (struct ccupdate_struct *)info; 3412 struct ccupdate_struct *new = info;
3395 struct array_cache *old; 3413 struct array_cache *old;
3396 3414
3397 check_irq_off(); 3415 check_irq_off();
@@ -3401,16 +3419,17 @@ static void do_ccupdate_local(void *info)
3401 new->new[smp_processor_id()] = old; 3419 new->new[smp_processor_id()] = old;
3402} 3420}
3403 3421
3404static int do_tune_cpucache(struct kmem_cache *cachep, int limit, int batchcount, 3422/* Always called with the cache_chain_mutex held */
3405 int shared) 3423static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
3424 int batchcount, int shared)
3406{ 3425{
3407 struct ccupdate_struct new; 3426 struct ccupdate_struct new;
3408 int i, err; 3427 int i, err;
3409 3428
3410 memset(&new.new, 0, sizeof(new.new)); 3429 memset(&new.new, 0, sizeof(new.new));
3411 for_each_online_cpu(i) { 3430 for_each_online_cpu(i) {
3412 new.new[i] = 3431 new.new[i] = alloc_arraycache(cpu_to_node(i), limit,
3413 alloc_arraycache(cpu_to_node(i), limit, batchcount); 3432 batchcount);
3414 if (!new.new[i]) { 3433 if (!new.new[i]) {
3415 for (i--; i >= 0; i--) 3434 for (i--; i >= 0; i--)
3416 kfree(new.new[i]); 3435 kfree(new.new[i]);
@@ -3419,14 +3438,12 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, int batchcount
3419 } 3438 }
3420 new.cachep = cachep; 3439 new.cachep = cachep;
3421 3440
3422 smp_call_function_all_cpus(do_ccupdate_local, (void *)&new); 3441 on_each_cpu(do_ccupdate_local, (void *)&new, 1, 1);
3423 3442
3424 check_irq_on(); 3443 check_irq_on();
3425 spin_lock(&cachep->spinlock);
3426 cachep->batchcount = batchcount; 3444 cachep->batchcount = batchcount;
3427 cachep->limit = limit; 3445 cachep->limit = limit;
3428 cachep->shared = shared; 3446 cachep->shared = shared;
3429 spin_unlock(&cachep->spinlock);
3430 3447
3431 for_each_online_cpu(i) { 3448 for_each_online_cpu(i) {
3432 struct array_cache *ccold = new.new[i]; 3449 struct array_cache *ccold = new.new[i];
@@ -3447,15 +3464,17 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, int batchcount
3447 return 0; 3464 return 0;
3448} 3465}
3449 3466
3467/* Called with cache_chain_mutex held always */
3450static void enable_cpucache(struct kmem_cache *cachep) 3468static void enable_cpucache(struct kmem_cache *cachep)
3451{ 3469{
3452 int err; 3470 int err;
3453 int limit, shared; 3471 int limit, shared;
3454 3472
3455 /* The head array serves three purposes: 3473 /*
3474 * The head array serves three purposes:
3456 * - create a LIFO ordering, i.e. return objects that are cache-warm 3475 * - create a LIFO ordering, i.e. return objects that are cache-warm
3457 * - reduce the number of spinlock operations. 3476 * - reduce the number of spinlock operations.
3458 * - reduce the number of linked list operations on the slab and 3477 * - reduce the number of linked list operations on the slab and
3459 * bufctl chains: array operations are cheaper. 3478 * bufctl chains: array operations are cheaper.
3460 * The numbers are guessed, we should auto-tune as described by 3479 * The numbers are guessed, we should auto-tune as described by
3461 * Bonwick. 3480 * Bonwick.
@@ -3471,7 +3490,8 @@ static void enable_cpucache(struct kmem_cache *cachep)
3471 else 3490 else
3472 limit = 120; 3491 limit = 120;
3473 3492
3474 /* Cpu bound tasks (e.g. network routing) can exhibit cpu bound 3493 /*
3494 * CPU bound tasks (e.g. network routing) can exhibit cpu bound
3475 * allocation behaviour: Most allocs on one cpu, most free operations 3495 * allocation behaviour: Most allocs on one cpu, most free operations
3476 * on another cpu. For these cases, an efficient object passing between 3496 * on another cpu. For these cases, an efficient object passing between
3477 * cpus is necessary. This is provided by a shared array. The array 3497 * cpus is necessary. This is provided by a shared array. The array
@@ -3486,9 +3506,9 @@ static void enable_cpucache(struct kmem_cache *cachep)
3486#endif 3506#endif
3487 3507
3488#if DEBUG 3508#if DEBUG
3489 /* With debugging enabled, large batchcount lead to excessively 3509 /*
3490 * long periods with disabled local interrupts. Limit the 3510 * With debugging enabled, large batchcount lead to excessively long
3491 * batchcount 3511 * periods with disabled local interrupts. Limit the batchcount
3492 */ 3512 */
3493 if (limit > 32) 3513 if (limit > 32)
3494 limit = 32; 3514 limit = 32;
@@ -3499,23 +3519,32 @@ static void enable_cpucache(struct kmem_cache *cachep)
3499 cachep->name, -err); 3519 cachep->name, -err);
3500} 3520}
3501 3521
3502static void drain_array_locked(struct kmem_cache *cachep, struct array_cache *ac, 3522/*
3503 int force, int node) 3523 * Drain an array if it contains any elements taking the l3 lock only if
3524 * necessary. Note that the l3 listlock also protects the array_cache
3525 * if drain_array() is used on the shared array.
3526 */
3527void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3,
3528 struct array_cache *ac, int force, int node)
3504{ 3529{
3505 int tofree; 3530 int tofree;
3506 3531
3507 check_spinlock_acquired_node(cachep, node); 3532 if (!ac || !ac->avail)
3533 return;
3508 if (ac->touched && !force) { 3534 if (ac->touched && !force) {
3509 ac->touched = 0; 3535 ac->touched = 0;
3510 } else if (ac->avail) { 3536 } else {
3511 tofree = force ? ac->avail : (ac->limit + 4) / 5; 3537 spin_lock_irq(&l3->list_lock);
3512 if (tofree > ac->avail) { 3538 if (ac->avail) {
3513 tofree = (ac->avail + 1) / 2; 3539 tofree = force ? ac->avail : (ac->limit + 4) / 5;
3540 if (tofree > ac->avail)
3541 tofree = (ac->avail + 1) / 2;
3542 free_block(cachep, ac->entry, tofree, node);
3543 ac->avail -= tofree;
3544 memmove(ac->entry, &(ac->entry[tofree]),
3545 sizeof(void *) * ac->avail);
3514 } 3546 }
3515 free_block(cachep, ac->entry, tofree, node); 3547 spin_unlock_irq(&l3->list_lock);
3516 ac->avail -= tofree;
3517 memmove(ac->entry, &(ac->entry[tofree]),
3518 sizeof(void *) * ac->avail);
3519 } 3548 }
3520} 3549}
3521 3550
@@ -3528,13 +3557,14 @@ static void drain_array_locked(struct kmem_cache *cachep, struct array_cache *ac
3528 * - clear the per-cpu caches for this CPU. 3557 * - clear the per-cpu caches for this CPU.
3529 * - return freeable pages to the main free memory pool. 3558 * - return freeable pages to the main free memory pool.
3530 * 3559 *
3531 * If we cannot acquire the cache chain mutex then just give up - we'll 3560 * If we cannot acquire the cache chain mutex then just give up - we'll try
3532 * try again on the next iteration. 3561 * again on the next iteration.
3533 */ 3562 */
3534static void cache_reap(void *unused) 3563static void cache_reap(void *unused)
3535{ 3564{
3536 struct list_head *walk; 3565 struct list_head *walk;
3537 struct kmem_list3 *l3; 3566 struct kmem_list3 *l3;
3567 int node = numa_node_id();
3538 3568
3539 if (!mutex_trylock(&cache_chain_mutex)) { 3569 if (!mutex_trylock(&cache_chain_mutex)) {
3540 /* Give up. Setup the next iteration. */ 3570 /* Give up. Setup the next iteration. */
@@ -3550,65 +3580,72 @@ static void cache_reap(void *unused)
3550 struct slab *slabp; 3580 struct slab *slabp;
3551 3581
3552 searchp = list_entry(walk, struct kmem_cache, next); 3582 searchp = list_entry(walk, struct kmem_cache, next);
3553
3554 if (searchp->flags & SLAB_NO_REAP)
3555 goto next;
3556
3557 check_irq_on(); 3583 check_irq_on();
3558 3584
3559 l3 = searchp->nodelists[numa_node_id()]; 3585 /*
3586 * We only take the l3 lock if absolutely necessary and we
3587 * have established with reasonable certainty that
3588 * we can do some work if the lock was obtained.
3589 */
3590 l3 = searchp->nodelists[node];
3591
3560 reap_alien(searchp, l3); 3592 reap_alien(searchp, l3);
3561 spin_lock_irq(&l3->list_lock);
3562 3593
3563 drain_array_locked(searchp, cpu_cache_get(searchp), 0, 3594 drain_array(searchp, l3, cpu_cache_get(searchp), 0, node);
3564 numa_node_id());
3565 3595
3596 /*
3597 * These are racy checks but it does not matter
3598 * if we skip one check or scan twice.
3599 */
3566 if (time_after(l3->next_reap, jiffies)) 3600 if (time_after(l3->next_reap, jiffies))
3567 goto next_unlock; 3601 goto next;
3568 3602
3569 l3->next_reap = jiffies + REAPTIMEOUT_LIST3; 3603 l3->next_reap = jiffies + REAPTIMEOUT_LIST3;
3570 3604
3571 if (l3->shared) 3605 drain_array(searchp, l3, l3->shared, 0, node);
3572 drain_array_locked(searchp, l3->shared, 0,
3573 numa_node_id());
3574 3606
3575 if (l3->free_touched) { 3607 if (l3->free_touched) {
3576 l3->free_touched = 0; 3608 l3->free_touched = 0;
3577 goto next_unlock; 3609 goto next;
3578 } 3610 }
3579 3611
3580 tofree = 3612 tofree = (l3->free_limit + 5 * searchp->num - 1) /
3581 (l3->free_limit + 5 * searchp->num - 3613 (5 * searchp->num);
3582 1) / (5 * searchp->num);
3583 do { 3614 do {
3615 /*
3616 * Do not lock if there are no free blocks.
3617 */
3618 if (list_empty(&l3->slabs_free))
3619 break;
3620
3621 spin_lock_irq(&l3->list_lock);
3584 p = l3->slabs_free.next; 3622 p = l3->slabs_free.next;
3585 if (p == &(l3->slabs_free)) 3623 if (p == &(l3->slabs_free)) {
3624 spin_unlock_irq(&l3->list_lock);
3586 break; 3625 break;
3626 }
3587 3627
3588 slabp = list_entry(p, struct slab, list); 3628 slabp = list_entry(p, struct slab, list);
3589 BUG_ON(slabp->inuse); 3629 BUG_ON(slabp->inuse);
3590 list_del(&slabp->list); 3630 list_del(&slabp->list);
3591 STATS_INC_REAPED(searchp); 3631 STATS_INC_REAPED(searchp);
3592 3632
3593 /* Safe to drop the lock. The slab is no longer 3633 /*
3594 * linked to the cache. 3634 * Safe to drop the lock. The slab is no longer linked
3595 * searchp cannot disappear, we hold 3635 * to the cache. searchp cannot disappear, we hold
3596 * cache_chain_lock 3636 * cache_chain_lock
3597 */ 3637 */
3598 l3->free_objects -= searchp->num; 3638 l3->free_objects -= searchp->num;
3599 spin_unlock_irq(&l3->list_lock); 3639 spin_unlock_irq(&l3->list_lock);
3600 slab_destroy(searchp, slabp); 3640 slab_destroy(searchp, slabp);
3601 spin_lock_irq(&l3->list_lock);
3602 } while (--tofree > 0); 3641 } while (--tofree > 0);
3603 next_unlock: 3642next:
3604 spin_unlock_irq(&l3->list_lock);
3605 next:
3606 cond_resched(); 3643 cond_resched();
3607 } 3644 }
3608 check_irq_on(); 3645 check_irq_on();
3609 mutex_unlock(&cache_chain_mutex); 3646 mutex_unlock(&cache_chain_mutex);
3610 next_reap_node(); 3647 next_reap_node();
3611 /* Setup the next iteration */ 3648 /* Set up the next iteration */
3612 schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC); 3649 schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC);
3613} 3650}
3614 3651
@@ -3658,8 +3695,8 @@ static void *s_next(struct seq_file *m, void *p, loff_t *pos)
3658{ 3695{
3659 struct kmem_cache *cachep = p; 3696 struct kmem_cache *cachep = p;
3660 ++*pos; 3697 ++*pos;
3661 return cachep->next.next == &cache_chain ? NULL 3698 return cachep->next.next == &cache_chain ?
3662 : list_entry(cachep->next.next, struct kmem_cache, next); 3699 NULL : list_entry(cachep->next.next, struct kmem_cache, next);
3663} 3700}
3664 3701
3665static void s_stop(struct seq_file *m, void *p) 3702static void s_stop(struct seq_file *m, void *p)
@@ -3681,7 +3718,6 @@ static int s_show(struct seq_file *m, void *p)
3681 int node; 3718 int node;
3682 struct kmem_list3 *l3; 3719 struct kmem_list3 *l3;
3683 3720
3684 spin_lock(&cachep->spinlock);
3685 active_objs = 0; 3721 active_objs = 0;
3686 num_slabs = 0; 3722 num_slabs = 0;
3687 for_each_online_node(node) { 3723 for_each_online_node(node) {
@@ -3748,7 +3784,9 @@ static int s_show(struct seq_file *m, void *p)
3748 unsigned long node_frees = cachep->node_frees; 3784 unsigned long node_frees = cachep->node_frees;
3749 3785
3750 seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu \ 3786 seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu \
3751 %4lu %4lu %4lu %4lu", allocs, high, grown, reaped, errors, max_freeable, node_allocs, node_frees); 3787 %4lu %4lu %4lu %4lu", allocs, high, grown,
3788 reaped, errors, max_freeable, node_allocs,
3789 node_frees);
3752 } 3790 }
3753 /* cpu stats */ 3791 /* cpu stats */
3754 { 3792 {
@@ -3762,7 +3800,6 @@ static int s_show(struct seq_file *m, void *p)
3762 } 3800 }
3763#endif 3801#endif
3764 seq_putc(m, '\n'); 3802 seq_putc(m, '\n');
3765 spin_unlock(&cachep->spinlock);
3766 return 0; 3803 return 0;
3767} 3804}
3768 3805
@@ -3820,13 +3857,12 @@ ssize_t slabinfo_write(struct file *file, const char __user * buffer,
3820 mutex_lock(&cache_chain_mutex); 3857 mutex_lock(&cache_chain_mutex);
3821 res = -EINVAL; 3858 res = -EINVAL;
3822 list_for_each(p, &cache_chain) { 3859 list_for_each(p, &cache_chain) {
3823 struct kmem_cache *cachep = list_entry(p, struct kmem_cache, 3860 struct kmem_cache *cachep;
3824 next);
3825 3861
3862 cachep = list_entry(p, struct kmem_cache, next);
3826 if (!strcmp(cachep->name, kbuf)) { 3863 if (!strcmp(cachep->name, kbuf)) {
3827 if (limit < 1 || 3864 if (limit < 1 || batchcount < 1 ||
3828 batchcount < 1 || 3865 batchcount > limit || shared < 0) {
3829 batchcount > limit || shared < 0) {
3830 res = 0; 3866 res = 0;
3831 } else { 3867 } else {
3832 res = do_tune_cpucache(cachep, limit, 3868 res = do_tune_cpucache(cachep, limit,