diff options
Diffstat (limited to 'mm/slab.c')
-rw-r--r-- | mm/slab.c | 1233 |
1 files changed, 760 insertions, 473 deletions
@@ -50,7 +50,7 @@ | |||
50 | * The head array is strictly LIFO and should improve the cache hit rates. | 50 | * The head array is strictly LIFO and should improve the cache hit rates. |
51 | * On SMP, it additionally reduces the spinlock operations. | 51 | * On SMP, it additionally reduces the spinlock operations. |
52 | * | 52 | * |
53 | * The c_cpuarray may not be read with enabled local interrupts - | 53 | * The c_cpuarray may not be read with enabled local interrupts - |
54 | * it's changed with a smp_call_function(). | 54 | * it's changed with a smp_call_function(). |
55 | * | 55 | * |
56 | * SMP synchronization: | 56 | * SMP synchronization: |
@@ -94,6 +94,7 @@ | |||
94 | #include <linux/interrupt.h> | 94 | #include <linux/interrupt.h> |
95 | #include <linux/init.h> | 95 | #include <linux/init.h> |
96 | #include <linux/compiler.h> | 96 | #include <linux/compiler.h> |
97 | #include <linux/cpuset.h> | ||
97 | #include <linux/seq_file.h> | 98 | #include <linux/seq_file.h> |
98 | #include <linux/notifier.h> | 99 | #include <linux/notifier.h> |
99 | #include <linux/kallsyms.h> | 100 | #include <linux/kallsyms.h> |
@@ -170,15 +171,15 @@ | |||
170 | #if DEBUG | 171 | #if DEBUG |
171 | # define CREATE_MASK (SLAB_DEBUG_INITIAL | SLAB_RED_ZONE | \ | 172 | # define CREATE_MASK (SLAB_DEBUG_INITIAL | SLAB_RED_ZONE | \ |
172 | SLAB_POISON | SLAB_HWCACHE_ALIGN | \ | 173 | SLAB_POISON | SLAB_HWCACHE_ALIGN | \ |
173 | SLAB_NO_REAP | SLAB_CACHE_DMA | \ | 174 | SLAB_CACHE_DMA | \ |
174 | SLAB_MUST_HWCACHE_ALIGN | SLAB_STORE_USER | \ | 175 | SLAB_MUST_HWCACHE_ALIGN | SLAB_STORE_USER | \ |
175 | SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ | 176 | SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ |
176 | SLAB_DESTROY_BY_RCU) | 177 | SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD) |
177 | #else | 178 | #else |
178 | # define CREATE_MASK (SLAB_HWCACHE_ALIGN | SLAB_NO_REAP | \ | 179 | # define CREATE_MASK (SLAB_HWCACHE_ALIGN | \ |
179 | SLAB_CACHE_DMA | SLAB_MUST_HWCACHE_ALIGN | \ | 180 | SLAB_CACHE_DMA | SLAB_MUST_HWCACHE_ALIGN | \ |
180 | SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ | 181 | SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ |
181 | SLAB_DESTROY_BY_RCU) | 182 | SLAB_DESTROY_BY_RCU | SLAB_MEM_SPREAD) |
182 | #endif | 183 | #endif |
183 | 184 | ||
184 | /* | 185 | /* |
@@ -203,7 +204,8 @@ | |||
203 | typedef unsigned int kmem_bufctl_t; | 204 | typedef unsigned int kmem_bufctl_t; |
204 | #define BUFCTL_END (((kmem_bufctl_t)(~0U))-0) | 205 | #define BUFCTL_END (((kmem_bufctl_t)(~0U))-0) |
205 | #define BUFCTL_FREE (((kmem_bufctl_t)(~0U))-1) | 206 | #define BUFCTL_FREE (((kmem_bufctl_t)(~0U))-1) |
206 | #define SLAB_LIMIT (((kmem_bufctl_t)(~0U))-2) | 207 | #define BUFCTL_ACTIVE (((kmem_bufctl_t)(~0U))-2) |
208 | #define SLAB_LIMIT (((kmem_bufctl_t)(~0U))-3) | ||
207 | 209 | ||
208 | /* Max number of objs-per-slab for caches which use off-slab slabs. | 210 | /* Max number of objs-per-slab for caches which use off-slab slabs. |
209 | * Needed to avoid a possible looping condition in cache_grow(). | 211 | * Needed to avoid a possible looping condition in cache_grow(). |
@@ -266,16 +268,17 @@ struct array_cache { | |||
266 | unsigned int batchcount; | 268 | unsigned int batchcount; |
267 | unsigned int touched; | 269 | unsigned int touched; |
268 | spinlock_t lock; | 270 | spinlock_t lock; |
269 | void *entry[0]; /* | 271 | void *entry[0]; /* |
270 | * Must have this definition in here for the proper | 272 | * Must have this definition in here for the proper |
271 | * alignment of array_cache. Also simplifies accessing | 273 | * alignment of array_cache. Also simplifies accessing |
272 | * the entries. | 274 | * the entries. |
273 | * [0] is for gcc 2.95. It should really be []. | 275 | * [0] is for gcc 2.95. It should really be []. |
274 | */ | 276 | */ |
275 | }; | 277 | }; |
276 | 278 | ||
277 | /* bootstrap: The caches do not work without cpuarrays anymore, | 279 | /* |
278 | * but the cpuarrays are allocated from the generic caches... | 280 | * bootstrap: The caches do not work without cpuarrays anymore, but the |
281 | * cpuarrays are allocated from the generic caches... | ||
279 | */ | 282 | */ |
280 | #define BOOT_CPUCACHE_ENTRIES 1 | 283 | #define BOOT_CPUCACHE_ENTRIES 1 |
281 | struct arraycache_init { | 284 | struct arraycache_init { |
@@ -291,13 +294,13 @@ struct kmem_list3 { | |||
291 | struct list_head slabs_full; | 294 | struct list_head slabs_full; |
292 | struct list_head slabs_free; | 295 | struct list_head slabs_free; |
293 | unsigned long free_objects; | 296 | unsigned long free_objects; |
294 | unsigned long next_reap; | ||
295 | int free_touched; | ||
296 | unsigned int free_limit; | 297 | unsigned int free_limit; |
297 | unsigned int colour_next; /* Per-node cache coloring */ | 298 | unsigned int colour_next; /* Per-node cache coloring */ |
298 | spinlock_t list_lock; | 299 | spinlock_t list_lock; |
299 | struct array_cache *shared; /* shared per node */ | 300 | struct array_cache *shared; /* shared per node */ |
300 | struct array_cache **alien; /* on other nodes */ | 301 | struct array_cache **alien; /* on other nodes */ |
302 | unsigned long next_reap; /* updated without locking */ | ||
303 | int free_touched; /* updated without locking */ | ||
301 | }; | 304 | }; |
302 | 305 | ||
303 | /* | 306 | /* |
@@ -310,10 +313,8 @@ struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS]; | |||
310 | #define SIZE_L3 (1 + MAX_NUMNODES) | 313 | #define SIZE_L3 (1 + MAX_NUMNODES) |
311 | 314 | ||
312 | /* | 315 | /* |
313 | * This function must be completely optimized away if | 316 | * This function must be completely optimized away if a constant is passed to |
314 | * a constant is passed to it. Mostly the same as | 317 | * it. Mostly the same as what is in linux/slab.h except it returns an index. |
315 | * what is in linux/slab.h except it returns an | ||
316 | * index. | ||
317 | */ | 318 | */ |
318 | static __always_inline int index_of(const size_t size) | 319 | static __always_inline int index_of(const size_t size) |
319 | { | 320 | { |
@@ -351,14 +352,14 @@ static void kmem_list3_init(struct kmem_list3 *parent) | |||
351 | parent->free_touched = 0; | 352 | parent->free_touched = 0; |
352 | } | 353 | } |
353 | 354 | ||
354 | #define MAKE_LIST(cachep, listp, slab, nodeid) \ | 355 | #define MAKE_LIST(cachep, listp, slab, nodeid) \ |
355 | do { \ | 356 | do { \ |
356 | INIT_LIST_HEAD(listp); \ | 357 | INIT_LIST_HEAD(listp); \ |
357 | list_splice(&(cachep->nodelists[nodeid]->slab), listp); \ | 358 | list_splice(&(cachep->nodelists[nodeid]->slab), listp); \ |
358 | } while (0) | 359 | } while (0) |
359 | 360 | ||
360 | #define MAKE_ALL_LISTS(cachep, ptr, nodeid) \ | 361 | #define MAKE_ALL_LISTS(cachep, ptr, nodeid) \ |
361 | do { \ | 362 | do { \ |
362 | MAKE_LIST((cachep), (&(ptr)->slabs_full), slabs_full, nodeid); \ | 363 | MAKE_LIST((cachep), (&(ptr)->slabs_full), slabs_full, nodeid); \ |
363 | MAKE_LIST((cachep), (&(ptr)->slabs_partial), slabs_partial, nodeid); \ | 364 | MAKE_LIST((cachep), (&(ptr)->slabs_partial), slabs_partial, nodeid); \ |
364 | MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \ | 365 | MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \ |
@@ -373,28 +374,30 @@ static void kmem_list3_init(struct kmem_list3 *parent) | |||
373 | struct kmem_cache { | 374 | struct kmem_cache { |
374 | /* 1) per-cpu data, touched during every alloc/free */ | 375 | /* 1) per-cpu data, touched during every alloc/free */ |
375 | struct array_cache *array[NR_CPUS]; | 376 | struct array_cache *array[NR_CPUS]; |
377 | /* 2) Cache tunables. Protected by cache_chain_mutex */ | ||
376 | unsigned int batchcount; | 378 | unsigned int batchcount; |
377 | unsigned int limit; | 379 | unsigned int limit; |
378 | unsigned int shared; | 380 | unsigned int shared; |
381 | |||
379 | unsigned int buffer_size; | 382 | unsigned int buffer_size; |
380 | /* 2) touched by every alloc & free from the backend */ | 383 | /* 3) touched by every alloc & free from the backend */ |
381 | struct kmem_list3 *nodelists[MAX_NUMNODES]; | 384 | struct kmem_list3 *nodelists[MAX_NUMNODES]; |
382 | unsigned int flags; /* constant flags */ | ||
383 | unsigned int num; /* # of objs per slab */ | ||
384 | spinlock_t spinlock; | ||
385 | 385 | ||
386 | /* 3) cache_grow/shrink */ | 386 | unsigned int flags; /* constant flags */ |
387 | unsigned int num; /* # of objs per slab */ | ||
388 | |||
389 | /* 4) cache_grow/shrink */ | ||
387 | /* order of pgs per slab (2^n) */ | 390 | /* order of pgs per slab (2^n) */ |
388 | unsigned int gfporder; | 391 | unsigned int gfporder; |
389 | 392 | ||
390 | /* force GFP flags, e.g. GFP_DMA */ | 393 | /* force GFP flags, e.g. GFP_DMA */ |
391 | gfp_t gfpflags; | 394 | gfp_t gfpflags; |
392 | 395 | ||
393 | size_t colour; /* cache colouring range */ | 396 | size_t colour; /* cache colouring range */ |
394 | unsigned int colour_off; /* colour offset */ | 397 | unsigned int colour_off; /* colour offset */ |
395 | struct kmem_cache *slabp_cache; | 398 | struct kmem_cache *slabp_cache; |
396 | unsigned int slab_size; | 399 | unsigned int slab_size; |
397 | unsigned int dflags; /* dynamic flags */ | 400 | unsigned int dflags; /* dynamic flags */ |
398 | 401 | ||
399 | /* constructor func */ | 402 | /* constructor func */ |
400 | void (*ctor) (void *, struct kmem_cache *, unsigned long); | 403 | void (*ctor) (void *, struct kmem_cache *, unsigned long); |
@@ -402,11 +405,11 @@ struct kmem_cache { | |||
402 | /* de-constructor func */ | 405 | /* de-constructor func */ |
403 | void (*dtor) (void *, struct kmem_cache *, unsigned long); | 406 | void (*dtor) (void *, struct kmem_cache *, unsigned long); |
404 | 407 | ||
405 | /* 4) cache creation/removal */ | 408 | /* 5) cache creation/removal */ |
406 | const char *name; | 409 | const char *name; |
407 | struct list_head next; | 410 | struct list_head next; |
408 | 411 | ||
409 | /* 5) statistics */ | 412 | /* 6) statistics */ |
410 | #if STATS | 413 | #if STATS |
411 | unsigned long num_active; | 414 | unsigned long num_active; |
412 | unsigned long num_allocations; | 415 | unsigned long num_allocations; |
@@ -438,8 +441,9 @@ struct kmem_cache { | |||
438 | #define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB) | 441 | #define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB) |
439 | 442 | ||
440 | #define BATCHREFILL_LIMIT 16 | 443 | #define BATCHREFILL_LIMIT 16 |
441 | /* Optimization question: fewer reaps means less | 444 | /* |
442 | * probability for unnessary cpucache drain/refill cycles. | 445 | * Optimization question: fewer reaps means less probability for unnessary |
446 | * cpucache drain/refill cycles. | ||
443 | * | 447 | * |
444 | * OTOH the cpuarrays can contain lots of objects, | 448 | * OTOH the cpuarrays can contain lots of objects, |
445 | * which could lock up otherwise freeable slabs. | 449 | * which could lock up otherwise freeable slabs. |
@@ -453,17 +457,19 @@ struct kmem_cache { | |||
453 | #define STATS_INC_ALLOCED(x) ((x)->num_allocations++) | 457 | #define STATS_INC_ALLOCED(x) ((x)->num_allocations++) |
454 | #define STATS_INC_GROWN(x) ((x)->grown++) | 458 | #define STATS_INC_GROWN(x) ((x)->grown++) |
455 | #define STATS_INC_REAPED(x) ((x)->reaped++) | 459 | #define STATS_INC_REAPED(x) ((x)->reaped++) |
456 | #define STATS_SET_HIGH(x) do { if ((x)->num_active > (x)->high_mark) \ | 460 | #define STATS_SET_HIGH(x) \ |
457 | (x)->high_mark = (x)->num_active; \ | 461 | do { \ |
458 | } while (0) | 462 | if ((x)->num_active > (x)->high_mark) \ |
463 | (x)->high_mark = (x)->num_active; \ | ||
464 | } while (0) | ||
459 | #define STATS_INC_ERR(x) ((x)->errors++) | 465 | #define STATS_INC_ERR(x) ((x)->errors++) |
460 | #define STATS_INC_NODEALLOCS(x) ((x)->node_allocs++) | 466 | #define STATS_INC_NODEALLOCS(x) ((x)->node_allocs++) |
461 | #define STATS_INC_NODEFREES(x) ((x)->node_frees++) | 467 | #define STATS_INC_NODEFREES(x) ((x)->node_frees++) |
462 | #define STATS_SET_FREEABLE(x, i) \ | 468 | #define STATS_SET_FREEABLE(x, i) \ |
463 | do { if ((x)->max_freeable < i) \ | 469 | do { \ |
464 | (x)->max_freeable = i; \ | 470 | if ((x)->max_freeable < i) \ |
465 | } while (0) | 471 | (x)->max_freeable = i; \ |
466 | 472 | } while (0) | |
467 | #define STATS_INC_ALLOCHIT(x) atomic_inc(&(x)->allochit) | 473 | #define STATS_INC_ALLOCHIT(x) atomic_inc(&(x)->allochit) |
468 | #define STATS_INC_ALLOCMISS(x) atomic_inc(&(x)->allocmiss) | 474 | #define STATS_INC_ALLOCMISS(x) atomic_inc(&(x)->allocmiss) |
469 | #define STATS_INC_FREEHIT(x) atomic_inc(&(x)->freehit) | 475 | #define STATS_INC_FREEHIT(x) atomic_inc(&(x)->freehit) |
@@ -478,9 +484,7 @@ struct kmem_cache { | |||
478 | #define STATS_INC_ERR(x) do { } while (0) | 484 | #define STATS_INC_ERR(x) do { } while (0) |
479 | #define STATS_INC_NODEALLOCS(x) do { } while (0) | 485 | #define STATS_INC_NODEALLOCS(x) do { } while (0) |
480 | #define STATS_INC_NODEFREES(x) do { } while (0) | 486 | #define STATS_INC_NODEFREES(x) do { } while (0) |
481 | #define STATS_SET_FREEABLE(x, i) \ | 487 | #define STATS_SET_FREEABLE(x, i) do { } while (0) |
482 | do { } while (0) | ||
483 | |||
484 | #define STATS_INC_ALLOCHIT(x) do { } while (0) | 488 | #define STATS_INC_ALLOCHIT(x) do { } while (0) |
485 | #define STATS_INC_ALLOCMISS(x) do { } while (0) | 489 | #define STATS_INC_ALLOCMISS(x) do { } while (0) |
486 | #define STATS_INC_FREEHIT(x) do { } while (0) | 490 | #define STATS_INC_FREEHIT(x) do { } while (0) |
@@ -488,7 +492,8 @@ struct kmem_cache { | |||
488 | #endif | 492 | #endif |
489 | 493 | ||
490 | #if DEBUG | 494 | #if DEBUG |
491 | /* Magic nums for obj red zoning. | 495 | /* |
496 | * Magic nums for obj red zoning. | ||
492 | * Placed in the first word before and the first word after an obj. | 497 | * Placed in the first word before and the first word after an obj. |
493 | */ | 498 | */ |
494 | #define RED_INACTIVE 0x5A2CF071UL /* when obj is inactive */ | 499 | #define RED_INACTIVE 0x5A2CF071UL /* when obj is inactive */ |
@@ -499,7 +504,8 @@ struct kmem_cache { | |||
499 | #define POISON_FREE 0x6b /* for use-after-free poisoning */ | 504 | #define POISON_FREE 0x6b /* for use-after-free poisoning */ |
500 | #define POISON_END 0xa5 /* end-byte of poisoning */ | 505 | #define POISON_END 0xa5 /* end-byte of poisoning */ |
501 | 506 | ||
502 | /* memory layout of objects: | 507 | /* |
508 | * memory layout of objects: | ||
503 | * 0 : objp | 509 | * 0 : objp |
504 | * 0 .. cachep->obj_offset - BYTES_PER_WORD - 1: padding. This ensures that | 510 | * 0 .. cachep->obj_offset - BYTES_PER_WORD - 1: padding. This ensures that |
505 | * the end of an object is aligned with the end of the real | 511 | * the end of an object is aligned with the end of the real |
@@ -508,7 +514,8 @@ struct kmem_cache { | |||
508 | * redzone word. | 514 | * redzone word. |
509 | * cachep->obj_offset: The real object. | 515 | * cachep->obj_offset: The real object. |
510 | * cachep->buffer_size - 2* BYTES_PER_WORD: redzone word [BYTES_PER_WORD long] | 516 | * cachep->buffer_size - 2* BYTES_PER_WORD: redzone word [BYTES_PER_WORD long] |
511 | * cachep->buffer_size - 1* BYTES_PER_WORD: last caller address [BYTES_PER_WORD long] | 517 | * cachep->buffer_size - 1* BYTES_PER_WORD: last caller address |
518 | * [BYTES_PER_WORD long] | ||
512 | */ | 519 | */ |
513 | static int obj_offset(struct kmem_cache *cachep) | 520 | static int obj_offset(struct kmem_cache *cachep) |
514 | { | 521 | { |
@@ -552,8 +559,8 @@ static void **dbg_userword(struct kmem_cache *cachep, void *objp) | |||
552 | #endif | 559 | #endif |
553 | 560 | ||
554 | /* | 561 | /* |
555 | * Maximum size of an obj (in 2^order pages) | 562 | * Maximum size of an obj (in 2^order pages) and absolute limit for the gfp |
556 | * and absolute limit for the gfp order. | 563 | * order. |
557 | */ | 564 | */ |
558 | #if defined(CONFIG_LARGE_ALLOCS) | 565 | #if defined(CONFIG_LARGE_ALLOCS) |
559 | #define MAX_OBJ_ORDER 13 /* up to 32Mb */ | 566 | #define MAX_OBJ_ORDER 13 /* up to 32Mb */ |
@@ -573,9 +580,10 @@ static void **dbg_userword(struct kmem_cache *cachep, void *objp) | |||
573 | #define BREAK_GFP_ORDER_LO 0 | 580 | #define BREAK_GFP_ORDER_LO 0 |
574 | static int slab_break_gfp_order = BREAK_GFP_ORDER_LO; | 581 | static int slab_break_gfp_order = BREAK_GFP_ORDER_LO; |
575 | 582 | ||
576 | /* Functions for storing/retrieving the cachep and or slab from the | 583 | /* |
577 | * global 'mem_map'. These are used to find the slab an obj belongs to. | 584 | * Functions for storing/retrieving the cachep and or slab from the page |
578 | * With kfree(), these are used to find the cache which an obj belongs to. | 585 | * allocator. These are used to find the slab an obj belongs to. With kfree(), |
586 | * these are used to find the cache which an obj belongs to. | ||
579 | */ | 587 | */ |
580 | static inline void page_set_cache(struct page *page, struct kmem_cache *cache) | 588 | static inline void page_set_cache(struct page *page, struct kmem_cache *cache) |
581 | { | 589 | { |
@@ -584,6 +592,8 @@ static inline void page_set_cache(struct page *page, struct kmem_cache *cache) | |||
584 | 592 | ||
585 | static inline struct kmem_cache *page_get_cache(struct page *page) | 593 | static inline struct kmem_cache *page_get_cache(struct page *page) |
586 | { | 594 | { |
595 | if (unlikely(PageCompound(page))) | ||
596 | page = (struct page *)page_private(page); | ||
587 | return (struct kmem_cache *)page->lru.next; | 597 | return (struct kmem_cache *)page->lru.next; |
588 | } | 598 | } |
589 | 599 | ||
@@ -594,6 +604,8 @@ static inline void page_set_slab(struct page *page, struct slab *slab) | |||
594 | 604 | ||
595 | static inline struct slab *page_get_slab(struct page *page) | 605 | static inline struct slab *page_get_slab(struct page *page) |
596 | { | 606 | { |
607 | if (unlikely(PageCompound(page))) | ||
608 | page = (struct page *)page_private(page); | ||
597 | return (struct slab *)page->lru.prev; | 609 | return (struct slab *)page->lru.prev; |
598 | } | 610 | } |
599 | 611 | ||
@@ -609,7 +621,21 @@ static inline struct slab *virt_to_slab(const void *obj) | |||
609 | return page_get_slab(page); | 621 | return page_get_slab(page); |
610 | } | 622 | } |
611 | 623 | ||
612 | /* These are the default caches for kmalloc. Custom caches can have other sizes. */ | 624 | static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab, |
625 | unsigned int idx) | ||
626 | { | ||
627 | return slab->s_mem + cache->buffer_size * idx; | ||
628 | } | ||
629 | |||
630 | static inline unsigned int obj_to_index(struct kmem_cache *cache, | ||
631 | struct slab *slab, void *obj) | ||
632 | { | ||
633 | return (unsigned)(obj - slab->s_mem) / cache->buffer_size; | ||
634 | } | ||
635 | |||
636 | /* | ||
637 | * These are the default caches for kmalloc. Custom caches can have other sizes. | ||
638 | */ | ||
613 | struct cache_sizes malloc_sizes[] = { | 639 | struct cache_sizes malloc_sizes[] = { |
614 | #define CACHE(x) { .cs_size = (x) }, | 640 | #define CACHE(x) { .cs_size = (x) }, |
615 | #include <linux/kmalloc_sizes.h> | 641 | #include <linux/kmalloc_sizes.h> |
@@ -642,8 +668,6 @@ static struct kmem_cache cache_cache = { | |||
642 | .limit = BOOT_CPUCACHE_ENTRIES, | 668 | .limit = BOOT_CPUCACHE_ENTRIES, |
643 | .shared = 1, | 669 | .shared = 1, |
644 | .buffer_size = sizeof(struct kmem_cache), | 670 | .buffer_size = sizeof(struct kmem_cache), |
645 | .flags = SLAB_NO_REAP, | ||
646 | .spinlock = SPIN_LOCK_UNLOCKED, | ||
647 | .name = "kmem_cache", | 671 | .name = "kmem_cache", |
648 | #if DEBUG | 672 | #if DEBUG |
649 | .obj_size = sizeof(struct kmem_cache), | 673 | .obj_size = sizeof(struct kmem_cache), |
@@ -655,8 +679,8 @@ static DEFINE_MUTEX(cache_chain_mutex); | |||
655 | static struct list_head cache_chain; | 679 | static struct list_head cache_chain; |
656 | 680 | ||
657 | /* | 681 | /* |
658 | * vm_enough_memory() looks at this to determine how many | 682 | * vm_enough_memory() looks at this to determine how many slab-allocated pages |
659 | * slab-allocated pages are possibly freeable under pressure | 683 | * are possibly freeable under pressure |
660 | * | 684 | * |
661 | * SLAB_RECLAIM_ACCOUNT turns this on per-slab | 685 | * SLAB_RECLAIM_ACCOUNT turns this on per-slab |
662 | */ | 686 | */ |
@@ -675,7 +699,8 @@ static enum { | |||
675 | 699 | ||
676 | static DEFINE_PER_CPU(struct work_struct, reap_work); | 700 | static DEFINE_PER_CPU(struct work_struct, reap_work); |
677 | 701 | ||
678 | static void free_block(struct kmem_cache *cachep, void **objpp, int len, int node); | 702 | static void free_block(struct kmem_cache *cachep, void **objpp, int len, |
703 | int node); | ||
679 | static void enable_cpucache(struct kmem_cache *cachep); | 704 | static void enable_cpucache(struct kmem_cache *cachep); |
680 | static void cache_reap(void *unused); | 705 | static void cache_reap(void *unused); |
681 | static int __node_shrink(struct kmem_cache *cachep, int node); | 706 | static int __node_shrink(struct kmem_cache *cachep, int node); |
@@ -685,7 +710,8 @@ static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) | |||
685 | return cachep->array[smp_processor_id()]; | 710 | return cachep->array[smp_processor_id()]; |
686 | } | 711 | } |
687 | 712 | ||
688 | static inline struct kmem_cache *__find_general_cachep(size_t size, gfp_t gfpflags) | 713 | static inline struct kmem_cache *__find_general_cachep(size_t size, |
714 | gfp_t gfpflags) | ||
689 | { | 715 | { |
690 | struct cache_sizes *csizep = malloc_sizes; | 716 | struct cache_sizes *csizep = malloc_sizes; |
691 | 717 | ||
@@ -720,8 +746,9 @@ static size_t slab_mgmt_size(size_t nr_objs, size_t align) | |||
720 | return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align); | 746 | return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align); |
721 | } | 747 | } |
722 | 748 | ||
723 | /* Calculate the number of objects and left-over bytes for a given | 749 | /* |
724 | buffer size. */ | 750 | * Calculate the number of objects and left-over bytes for a given buffer size. |
751 | */ | ||
725 | static void cache_estimate(unsigned long gfporder, size_t buffer_size, | 752 | static void cache_estimate(unsigned long gfporder, size_t buffer_size, |
726 | size_t align, int flags, size_t *left_over, | 753 | size_t align, int flags, size_t *left_over, |
727 | unsigned int *num) | 754 | unsigned int *num) |
@@ -782,7 +809,8 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size, | |||
782 | 809 | ||
783 | #define slab_error(cachep, msg) __slab_error(__FUNCTION__, cachep, msg) | 810 | #define slab_error(cachep, msg) __slab_error(__FUNCTION__, cachep, msg) |
784 | 811 | ||
785 | static void __slab_error(const char *function, struct kmem_cache *cachep, char *msg) | 812 | static void __slab_error(const char *function, struct kmem_cache *cachep, |
813 | char *msg) | ||
786 | { | 814 | { |
787 | printk(KERN_ERR "slab error in %s(): cache `%s': %s\n", | 815 | printk(KERN_ERR "slab error in %s(): cache `%s': %s\n", |
788 | function, cachep->name, msg); | 816 | function, cachep->name, msg); |
@@ -804,7 +832,7 @@ static void init_reap_node(int cpu) | |||
804 | 832 | ||
805 | node = next_node(cpu_to_node(cpu), node_online_map); | 833 | node = next_node(cpu_to_node(cpu), node_online_map); |
806 | if (node == MAX_NUMNODES) | 834 | if (node == MAX_NUMNODES) |
807 | node = 0; | 835 | node = first_node(node_online_map); |
808 | 836 | ||
809 | __get_cpu_var(reap_node) = node; | 837 | __get_cpu_var(reap_node) = node; |
810 | } | 838 | } |
@@ -870,8 +898,33 @@ static struct array_cache *alloc_arraycache(int node, int entries, | |||
870 | return nc; | 898 | return nc; |
871 | } | 899 | } |
872 | 900 | ||
901 | /* | ||
902 | * Transfer objects in one arraycache to another. | ||
903 | * Locking must be handled by the caller. | ||
904 | * | ||
905 | * Return the number of entries transferred. | ||
906 | */ | ||
907 | static int transfer_objects(struct array_cache *to, | ||
908 | struct array_cache *from, unsigned int max) | ||
909 | { | ||
910 | /* Figure out how many entries to transfer */ | ||
911 | int nr = min(min(from->avail, max), to->limit - to->avail); | ||
912 | |||
913 | if (!nr) | ||
914 | return 0; | ||
915 | |||
916 | memcpy(to->entry + to->avail, from->entry + from->avail -nr, | ||
917 | sizeof(void *) *nr); | ||
918 | |||
919 | from->avail -= nr; | ||
920 | to->avail += nr; | ||
921 | to->touched = 1; | ||
922 | return nr; | ||
923 | } | ||
924 | |||
873 | #ifdef CONFIG_NUMA | 925 | #ifdef CONFIG_NUMA |
874 | static void *__cache_alloc_node(struct kmem_cache *, gfp_t, int); | 926 | static void *__cache_alloc_node(struct kmem_cache *, gfp_t, int); |
927 | static void *alternate_node_alloc(struct kmem_cache *, gfp_t); | ||
875 | 928 | ||
876 | static struct array_cache **alloc_alien_cache(int node, int limit) | 929 | static struct array_cache **alloc_alien_cache(int node, int limit) |
877 | { | 930 | { |
@@ -906,10 +959,8 @@ static void free_alien_cache(struct array_cache **ac_ptr) | |||
906 | 959 | ||
907 | if (!ac_ptr) | 960 | if (!ac_ptr) |
908 | return; | 961 | return; |
909 | |||
910 | for_each_node(i) | 962 | for_each_node(i) |
911 | kfree(ac_ptr[i]); | 963 | kfree(ac_ptr[i]); |
912 | |||
913 | kfree(ac_ptr); | 964 | kfree(ac_ptr); |
914 | } | 965 | } |
915 | 966 | ||
@@ -920,6 +971,13 @@ static void __drain_alien_cache(struct kmem_cache *cachep, | |||
920 | 971 | ||
921 | if (ac->avail) { | 972 | if (ac->avail) { |
922 | spin_lock(&rl3->list_lock); | 973 | spin_lock(&rl3->list_lock); |
974 | /* | ||
975 | * Stuff objects into the remote nodes shared array first. | ||
976 | * That way we could avoid the overhead of putting the objects | ||
977 | * into the free lists and getting them back later. | ||
978 | */ | ||
979 | transfer_objects(rl3->shared, ac, ac->limit); | ||
980 | |||
923 | free_block(cachep, ac->entry, ac->avail, node); | 981 | free_block(cachep, ac->entry, ac->avail, node); |
924 | ac->avail = 0; | 982 | ac->avail = 0; |
925 | spin_unlock(&rl3->list_lock); | 983 | spin_unlock(&rl3->list_lock); |
@@ -935,15 +993,16 @@ static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3) | |||
935 | 993 | ||
936 | if (l3->alien) { | 994 | if (l3->alien) { |
937 | struct array_cache *ac = l3->alien[node]; | 995 | struct array_cache *ac = l3->alien[node]; |
938 | if (ac && ac->avail) { | 996 | |
939 | spin_lock_irq(&ac->lock); | 997 | if (ac && ac->avail && spin_trylock_irq(&ac->lock)) { |
940 | __drain_alien_cache(cachep, ac, node); | 998 | __drain_alien_cache(cachep, ac, node); |
941 | spin_unlock_irq(&ac->lock); | 999 | spin_unlock_irq(&ac->lock); |
942 | } | 1000 | } |
943 | } | 1001 | } |
944 | } | 1002 | } |
945 | 1003 | ||
946 | static void drain_alien_cache(struct kmem_cache *cachep, struct array_cache **alien) | 1004 | static void drain_alien_cache(struct kmem_cache *cachep, |
1005 | struct array_cache **alien) | ||
947 | { | 1006 | { |
948 | int i = 0; | 1007 | int i = 0; |
949 | struct array_cache *ac; | 1008 | struct array_cache *ac; |
@@ -986,20 +1045,22 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, | |||
986 | switch (action) { | 1045 | switch (action) { |
987 | case CPU_UP_PREPARE: | 1046 | case CPU_UP_PREPARE: |
988 | mutex_lock(&cache_chain_mutex); | 1047 | mutex_lock(&cache_chain_mutex); |
989 | /* we need to do this right in the beginning since | 1048 | /* |
1049 | * We need to do this right in the beginning since | ||
990 | * alloc_arraycache's are going to use this list. | 1050 | * alloc_arraycache's are going to use this list. |
991 | * kmalloc_node allows us to add the slab to the right | 1051 | * kmalloc_node allows us to add the slab to the right |
992 | * kmem_list3 and not this cpu's kmem_list3 | 1052 | * kmem_list3 and not this cpu's kmem_list3 |
993 | */ | 1053 | */ |
994 | 1054 | ||
995 | list_for_each_entry(cachep, &cache_chain, next) { | 1055 | list_for_each_entry(cachep, &cache_chain, next) { |
996 | /* setup the size64 kmemlist for cpu before we can | 1056 | /* |
1057 | * Set up the size64 kmemlist for cpu before we can | ||
997 | * begin anything. Make sure some other cpu on this | 1058 | * begin anything. Make sure some other cpu on this |
998 | * node has not already allocated this | 1059 | * node has not already allocated this |
999 | */ | 1060 | */ |
1000 | if (!cachep->nodelists[node]) { | 1061 | if (!cachep->nodelists[node]) { |
1001 | if (!(l3 = kmalloc_node(memsize, | 1062 | l3 = kmalloc_node(memsize, GFP_KERNEL, node); |
1002 | GFP_KERNEL, node))) | 1063 | if (!l3) |
1003 | goto bad; | 1064 | goto bad; |
1004 | kmem_list3_init(l3); | 1065 | kmem_list3_init(l3); |
1005 | l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + | 1066 | l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + |
@@ -1015,13 +1076,15 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, | |||
1015 | 1076 | ||
1016 | spin_lock_irq(&cachep->nodelists[node]->list_lock); | 1077 | spin_lock_irq(&cachep->nodelists[node]->list_lock); |
1017 | cachep->nodelists[node]->free_limit = | 1078 | cachep->nodelists[node]->free_limit = |
1018 | (1 + nr_cpus_node(node)) * | 1079 | (1 + nr_cpus_node(node)) * |
1019 | cachep->batchcount + cachep->num; | 1080 | cachep->batchcount + cachep->num; |
1020 | spin_unlock_irq(&cachep->nodelists[node]->list_lock); | 1081 | spin_unlock_irq(&cachep->nodelists[node]->list_lock); |
1021 | } | 1082 | } |
1022 | 1083 | ||
1023 | /* Now we can go ahead with allocating the shared array's | 1084 | /* |
1024 | & array cache's */ | 1085 | * Now we can go ahead with allocating the shared arrays and |
1086 | * array caches | ||
1087 | */ | ||
1025 | list_for_each_entry(cachep, &cache_chain, next) { | 1088 | list_for_each_entry(cachep, &cache_chain, next) { |
1026 | struct array_cache *nc; | 1089 | struct array_cache *nc; |
1027 | struct array_cache *shared; | 1090 | struct array_cache *shared; |
@@ -1041,7 +1104,6 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, | |||
1041 | if (!alien) | 1104 | if (!alien) |
1042 | goto bad; | 1105 | goto bad; |
1043 | cachep->array[cpu] = nc; | 1106 | cachep->array[cpu] = nc; |
1044 | |||
1045 | l3 = cachep->nodelists[node]; | 1107 | l3 = cachep->nodelists[node]; |
1046 | BUG_ON(!l3); | 1108 | BUG_ON(!l3); |
1047 | 1109 | ||
@@ -1061,7 +1123,6 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, | |||
1061 | } | 1123 | } |
1062 | #endif | 1124 | #endif |
1063 | spin_unlock_irq(&l3->list_lock); | 1125 | spin_unlock_irq(&l3->list_lock); |
1064 | |||
1065 | kfree(shared); | 1126 | kfree(shared); |
1066 | free_alien_cache(alien); | 1127 | free_alien_cache(alien); |
1067 | } | 1128 | } |
@@ -1083,7 +1144,6 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, | |||
1083 | /* fall thru */ | 1144 | /* fall thru */ |
1084 | case CPU_UP_CANCELED: | 1145 | case CPU_UP_CANCELED: |
1085 | mutex_lock(&cache_chain_mutex); | 1146 | mutex_lock(&cache_chain_mutex); |
1086 | |||
1087 | list_for_each_entry(cachep, &cache_chain, next) { | 1147 | list_for_each_entry(cachep, &cache_chain, next) { |
1088 | struct array_cache *nc; | 1148 | struct array_cache *nc; |
1089 | struct array_cache *shared; | 1149 | struct array_cache *shared; |
@@ -1150,7 +1210,7 @@ free_array_cache: | |||
1150 | #endif | 1210 | #endif |
1151 | } | 1211 | } |
1152 | return NOTIFY_OK; | 1212 | return NOTIFY_OK; |
1153 | bad: | 1213 | bad: |
1154 | mutex_unlock(&cache_chain_mutex); | 1214 | mutex_unlock(&cache_chain_mutex); |
1155 | return NOTIFY_BAD; | 1215 | return NOTIFY_BAD; |
1156 | } | 1216 | } |
@@ -1160,7 +1220,8 @@ static struct notifier_block cpucache_notifier = { &cpuup_callback, NULL, 0 }; | |||
1160 | /* | 1220 | /* |
1161 | * swap the static kmem_list3 with kmalloced memory | 1221 | * swap the static kmem_list3 with kmalloced memory |
1162 | */ | 1222 | */ |
1163 | static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list, int nodeid) | 1223 | static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list, |
1224 | int nodeid) | ||
1164 | { | 1225 | { |
1165 | struct kmem_list3 *ptr; | 1226 | struct kmem_list3 *ptr; |
1166 | 1227 | ||
@@ -1175,8 +1236,9 @@ static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list, int no | |||
1175 | local_irq_enable(); | 1236 | local_irq_enable(); |
1176 | } | 1237 | } |
1177 | 1238 | ||
1178 | /* Initialisation. | 1239 | /* |
1179 | * Called after the gfp() functions have been enabled, and before smp_init(). | 1240 | * Initialisation. Called after the page allocator have been initialised and |
1241 | * before smp_init(). | ||
1180 | */ | 1242 | */ |
1181 | void __init kmem_cache_init(void) | 1243 | void __init kmem_cache_init(void) |
1182 | { | 1244 | { |
@@ -1201,9 +1263,9 @@ void __init kmem_cache_init(void) | |||
1201 | 1263 | ||
1202 | /* Bootstrap is tricky, because several objects are allocated | 1264 | /* Bootstrap is tricky, because several objects are allocated |
1203 | * from caches that do not exist yet: | 1265 | * from caches that do not exist yet: |
1204 | * 1) initialize the cache_cache cache: it contains the struct kmem_cache | 1266 | * 1) initialize the cache_cache cache: it contains the struct |
1205 | * structures of all caches, except cache_cache itself: cache_cache | 1267 | * kmem_cache structures of all caches, except cache_cache itself: |
1206 | * is statically allocated. | 1268 | * cache_cache is statically allocated. |
1207 | * Initially an __init data area is used for the head array and the | 1269 | * Initially an __init data area is used for the head array and the |
1208 | * kmem_list3 structures, it's replaced with a kmalloc allocated | 1270 | * kmem_list3 structures, it's replaced with a kmalloc allocated |
1209 | * array at the end of the bootstrap. | 1271 | * array at the end of the bootstrap. |
@@ -1226,7 +1288,8 @@ void __init kmem_cache_init(void) | |||
1226 | cache_cache.array[smp_processor_id()] = &initarray_cache.cache; | 1288 | cache_cache.array[smp_processor_id()] = &initarray_cache.cache; |
1227 | cache_cache.nodelists[numa_node_id()] = &initkmem_list3[CACHE_CACHE]; | 1289 | cache_cache.nodelists[numa_node_id()] = &initkmem_list3[CACHE_CACHE]; |
1228 | 1290 | ||
1229 | cache_cache.buffer_size = ALIGN(cache_cache.buffer_size, cache_line_size()); | 1291 | cache_cache.buffer_size = ALIGN(cache_cache.buffer_size, |
1292 | cache_line_size()); | ||
1230 | 1293 | ||
1231 | for (order = 0; order < MAX_ORDER; order++) { | 1294 | for (order = 0; order < MAX_ORDER; order++) { |
1232 | cache_estimate(order, cache_cache.buffer_size, | 1295 | cache_estimate(order, cache_cache.buffer_size, |
@@ -1245,24 +1308,26 @@ void __init kmem_cache_init(void) | |||
1245 | sizes = malloc_sizes; | 1308 | sizes = malloc_sizes; |
1246 | names = cache_names; | 1309 | names = cache_names; |
1247 | 1310 | ||
1248 | /* Initialize the caches that provide memory for the array cache | 1311 | /* |
1249 | * and the kmem_list3 structures first. | 1312 | * Initialize the caches that provide memory for the array cache and the |
1250 | * Without this, further allocations will bug | 1313 | * kmem_list3 structures first. Without this, further allocations will |
1314 | * bug. | ||
1251 | */ | 1315 | */ |
1252 | 1316 | ||
1253 | sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name, | 1317 | sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name, |
1254 | sizes[INDEX_AC].cs_size, | 1318 | sizes[INDEX_AC].cs_size, |
1255 | ARCH_KMALLOC_MINALIGN, | 1319 | ARCH_KMALLOC_MINALIGN, |
1256 | (ARCH_KMALLOC_FLAGS | | 1320 | ARCH_KMALLOC_FLAGS|SLAB_PANIC, |
1257 | SLAB_PANIC), NULL, NULL); | 1321 | NULL, NULL); |
1258 | 1322 | ||
1259 | if (INDEX_AC != INDEX_L3) | 1323 | if (INDEX_AC != INDEX_L3) { |
1260 | sizes[INDEX_L3].cs_cachep = | 1324 | sizes[INDEX_L3].cs_cachep = |
1261 | kmem_cache_create(names[INDEX_L3].name, | 1325 | kmem_cache_create(names[INDEX_L3].name, |
1262 | sizes[INDEX_L3].cs_size, | 1326 | sizes[INDEX_L3].cs_size, |
1263 | ARCH_KMALLOC_MINALIGN, | 1327 | ARCH_KMALLOC_MINALIGN, |
1264 | (ARCH_KMALLOC_FLAGS | SLAB_PANIC), NULL, | 1328 | ARCH_KMALLOC_FLAGS|SLAB_PANIC, |
1265 | NULL); | 1329 | NULL, NULL); |
1330 | } | ||
1266 | 1331 | ||
1267 | while (sizes->cs_size != ULONG_MAX) { | 1332 | while (sizes->cs_size != ULONG_MAX) { |
1268 | /* | 1333 | /* |
@@ -1272,13 +1337,13 @@ void __init kmem_cache_init(void) | |||
1272 | * Note for systems short on memory removing the alignment will | 1337 | * Note for systems short on memory removing the alignment will |
1273 | * allow tighter packing of the smaller caches. | 1338 | * allow tighter packing of the smaller caches. |
1274 | */ | 1339 | */ |
1275 | if (!sizes->cs_cachep) | 1340 | if (!sizes->cs_cachep) { |
1276 | sizes->cs_cachep = kmem_cache_create(names->name, | 1341 | sizes->cs_cachep = kmem_cache_create(names->name, |
1277 | sizes->cs_size, | 1342 | sizes->cs_size, |
1278 | ARCH_KMALLOC_MINALIGN, | 1343 | ARCH_KMALLOC_MINALIGN, |
1279 | (ARCH_KMALLOC_FLAGS | 1344 | ARCH_KMALLOC_FLAGS|SLAB_PANIC, |
1280 | | SLAB_PANIC), | 1345 | NULL, NULL); |
1281 | NULL, NULL); | 1346 | } |
1282 | 1347 | ||
1283 | /* Inc off-slab bufctl limit until the ceiling is hit. */ | 1348 | /* Inc off-slab bufctl limit until the ceiling is hit. */ |
1284 | if (!(OFF_SLAB(sizes->cs_cachep))) { | 1349 | if (!(OFF_SLAB(sizes->cs_cachep))) { |
@@ -1287,13 +1352,11 @@ void __init kmem_cache_init(void) | |||
1287 | } | 1352 | } |
1288 | 1353 | ||
1289 | sizes->cs_dmacachep = kmem_cache_create(names->name_dma, | 1354 | sizes->cs_dmacachep = kmem_cache_create(names->name_dma, |
1290 | sizes->cs_size, | 1355 | sizes->cs_size, |
1291 | ARCH_KMALLOC_MINALIGN, | 1356 | ARCH_KMALLOC_MINALIGN, |
1292 | (ARCH_KMALLOC_FLAGS | | 1357 | ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA| |
1293 | SLAB_CACHE_DMA | | 1358 | SLAB_PANIC, |
1294 | SLAB_PANIC), NULL, | 1359 | NULL, NULL); |
1295 | NULL); | ||
1296 | |||
1297 | sizes++; | 1360 | sizes++; |
1298 | names++; | 1361 | names++; |
1299 | } | 1362 | } |
@@ -1345,20 +1408,22 @@ void __init kmem_cache_init(void) | |||
1345 | struct kmem_cache *cachep; | 1408 | struct kmem_cache *cachep; |
1346 | mutex_lock(&cache_chain_mutex); | 1409 | mutex_lock(&cache_chain_mutex); |
1347 | list_for_each_entry(cachep, &cache_chain, next) | 1410 | list_for_each_entry(cachep, &cache_chain, next) |
1348 | enable_cpucache(cachep); | 1411 | enable_cpucache(cachep); |
1349 | mutex_unlock(&cache_chain_mutex); | 1412 | mutex_unlock(&cache_chain_mutex); |
1350 | } | 1413 | } |
1351 | 1414 | ||
1352 | /* Done! */ | 1415 | /* Done! */ |
1353 | g_cpucache_up = FULL; | 1416 | g_cpucache_up = FULL; |
1354 | 1417 | ||
1355 | /* Register a cpu startup notifier callback | 1418 | /* |
1356 | * that initializes cpu_cache_get for all new cpus | 1419 | * Register a cpu startup notifier callback that initializes |
1420 | * cpu_cache_get for all new cpus | ||
1357 | */ | 1421 | */ |
1358 | register_cpu_notifier(&cpucache_notifier); | 1422 | register_cpu_notifier(&cpucache_notifier); |
1359 | 1423 | ||
1360 | /* The reap timers are started later, with a module init call: | 1424 | /* |
1361 | * That part of the kernel is not yet operational. | 1425 | * The reap timers are started later, with a module init call: That part |
1426 | * of the kernel is not yet operational. | ||
1362 | */ | 1427 | */ |
1363 | } | 1428 | } |
1364 | 1429 | ||
@@ -1366,16 +1431,13 @@ static int __init cpucache_init(void) | |||
1366 | { | 1431 | { |
1367 | int cpu; | 1432 | int cpu; |
1368 | 1433 | ||
1369 | /* | 1434 | /* |
1370 | * Register the timers that return unneeded | 1435 | * Register the timers that return unneeded pages to the page allocator |
1371 | * pages to gfp. | ||
1372 | */ | 1436 | */ |
1373 | for_each_online_cpu(cpu) | 1437 | for_each_online_cpu(cpu) |
1374 | start_cpu_timer(cpu); | 1438 | start_cpu_timer(cpu); |
1375 | |||
1376 | return 0; | 1439 | return 0; |
1377 | } | 1440 | } |
1378 | |||
1379 | __initcall(cpucache_init); | 1441 | __initcall(cpucache_init); |
1380 | 1442 | ||
1381 | /* | 1443 | /* |
@@ -1402,7 +1464,7 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) | |||
1402 | atomic_add(i, &slab_reclaim_pages); | 1464 | atomic_add(i, &slab_reclaim_pages); |
1403 | add_page_state(nr_slab, i); | 1465 | add_page_state(nr_slab, i); |
1404 | while (i--) { | 1466 | while (i--) { |
1405 | SetPageSlab(page); | 1467 | __SetPageSlab(page); |
1406 | page++; | 1468 | page++; |
1407 | } | 1469 | } |
1408 | return addr; | 1470 | return addr; |
@@ -1418,8 +1480,8 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr) | |||
1418 | const unsigned long nr_freed = i; | 1480 | const unsigned long nr_freed = i; |
1419 | 1481 | ||
1420 | while (i--) { | 1482 | while (i--) { |
1421 | if (!TestClearPageSlab(page)) | 1483 | BUG_ON(!PageSlab(page)); |
1422 | BUG(); | 1484 | __ClearPageSlab(page); |
1423 | page++; | 1485 | page++; |
1424 | } | 1486 | } |
1425 | sub_page_state(nr_slab, nr_freed); | 1487 | sub_page_state(nr_slab, nr_freed); |
@@ -1489,9 +1551,8 @@ static void dump_line(char *data, int offset, int limit) | |||
1489 | { | 1551 | { |
1490 | int i; | 1552 | int i; |
1491 | printk(KERN_ERR "%03x:", offset); | 1553 | printk(KERN_ERR "%03x:", offset); |
1492 | for (i = 0; i < limit; i++) { | 1554 | for (i = 0; i < limit; i++) |
1493 | printk(" %02x", (unsigned char)data[offset + i]); | 1555 | printk(" %02x", (unsigned char)data[offset + i]); |
1494 | } | ||
1495 | printk("\n"); | 1556 | printk("\n"); |
1496 | } | 1557 | } |
1497 | #endif | 1558 | #endif |
@@ -1505,15 +1566,15 @@ static void print_objinfo(struct kmem_cache *cachep, void *objp, int lines) | |||
1505 | 1566 | ||
1506 | if (cachep->flags & SLAB_RED_ZONE) { | 1567 | if (cachep->flags & SLAB_RED_ZONE) { |
1507 | printk(KERN_ERR "Redzone: 0x%lx/0x%lx.\n", | 1568 | printk(KERN_ERR "Redzone: 0x%lx/0x%lx.\n", |
1508 | *dbg_redzone1(cachep, objp), | 1569 | *dbg_redzone1(cachep, objp), |
1509 | *dbg_redzone2(cachep, objp)); | 1570 | *dbg_redzone2(cachep, objp)); |
1510 | } | 1571 | } |
1511 | 1572 | ||
1512 | if (cachep->flags & SLAB_STORE_USER) { | 1573 | if (cachep->flags & SLAB_STORE_USER) { |
1513 | printk(KERN_ERR "Last user: [<%p>]", | 1574 | printk(KERN_ERR "Last user: [<%p>]", |
1514 | *dbg_userword(cachep, objp)); | 1575 | *dbg_userword(cachep, objp)); |
1515 | print_symbol("(%s)", | 1576 | print_symbol("(%s)", |
1516 | (unsigned long)*dbg_userword(cachep, objp)); | 1577 | (unsigned long)*dbg_userword(cachep, objp)); |
1517 | printk("\n"); | 1578 | printk("\n"); |
1518 | } | 1579 | } |
1519 | realobj = (char *)objp + obj_offset(cachep); | 1580 | realobj = (char *)objp + obj_offset(cachep); |
@@ -1546,8 +1607,8 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp) | |||
1546 | /* Print header */ | 1607 | /* Print header */ |
1547 | if (lines == 0) { | 1608 | if (lines == 0) { |
1548 | printk(KERN_ERR | 1609 | printk(KERN_ERR |
1549 | "Slab corruption: start=%p, len=%d\n", | 1610 | "Slab corruption: start=%p, len=%d\n", |
1550 | realobj, size); | 1611 | realobj, size); |
1551 | print_objinfo(cachep, objp, 0); | 1612 | print_objinfo(cachep, objp, 0); |
1552 | } | 1613 | } |
1553 | /* Hexdump the affected line */ | 1614 | /* Hexdump the affected line */ |
@@ -1568,18 +1629,18 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp) | |||
1568 | * exist: | 1629 | * exist: |
1569 | */ | 1630 | */ |
1570 | struct slab *slabp = virt_to_slab(objp); | 1631 | struct slab *slabp = virt_to_slab(objp); |
1571 | int objnr; | 1632 | unsigned int objnr; |
1572 | 1633 | ||
1573 | objnr = (unsigned)(objp - slabp->s_mem) / cachep->buffer_size; | 1634 | objnr = obj_to_index(cachep, slabp, objp); |
1574 | if (objnr) { | 1635 | if (objnr) { |
1575 | objp = slabp->s_mem + (objnr - 1) * cachep->buffer_size; | 1636 | objp = index_to_obj(cachep, slabp, objnr - 1); |
1576 | realobj = (char *)objp + obj_offset(cachep); | 1637 | realobj = (char *)objp + obj_offset(cachep); |
1577 | printk(KERN_ERR "Prev obj: start=%p, len=%d\n", | 1638 | printk(KERN_ERR "Prev obj: start=%p, len=%d\n", |
1578 | realobj, size); | 1639 | realobj, size); |
1579 | print_objinfo(cachep, objp, 2); | 1640 | print_objinfo(cachep, objp, 2); |
1580 | } | 1641 | } |
1581 | if (objnr + 1 < cachep->num) { | 1642 | if (objnr + 1 < cachep->num) { |
1582 | objp = slabp->s_mem + (objnr + 1) * cachep->buffer_size; | 1643 | objp = index_to_obj(cachep, slabp, objnr + 1); |
1583 | realobj = (char *)objp + obj_offset(cachep); | 1644 | realobj = (char *)objp + obj_offset(cachep); |
1584 | printk(KERN_ERR "Next obj: start=%p, len=%d\n", | 1645 | printk(KERN_ERR "Next obj: start=%p, len=%d\n", |
1585 | realobj, size); | 1646 | realobj, size); |
@@ -1591,22 +1652,25 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp) | |||
1591 | 1652 | ||
1592 | #if DEBUG | 1653 | #if DEBUG |
1593 | /** | 1654 | /** |
1594 | * slab_destroy_objs - call the registered destructor for each object in | 1655 | * slab_destroy_objs - destroy a slab and its objects |
1595 | * a slab that is to be destroyed. | 1656 | * @cachep: cache pointer being destroyed |
1657 | * @slabp: slab pointer being destroyed | ||
1658 | * | ||
1659 | * Call the registered destructor for each object in a slab that is being | ||
1660 | * destroyed. | ||
1596 | */ | 1661 | */ |
1597 | static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp) | 1662 | static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp) |
1598 | { | 1663 | { |
1599 | int i; | 1664 | int i; |
1600 | for (i = 0; i < cachep->num; i++) { | 1665 | for (i = 0; i < cachep->num; i++) { |
1601 | void *objp = slabp->s_mem + cachep->buffer_size * i; | 1666 | void *objp = index_to_obj(cachep, slabp, i); |
1602 | 1667 | ||
1603 | if (cachep->flags & SLAB_POISON) { | 1668 | if (cachep->flags & SLAB_POISON) { |
1604 | #ifdef CONFIG_DEBUG_PAGEALLOC | 1669 | #ifdef CONFIG_DEBUG_PAGEALLOC |
1605 | if ((cachep->buffer_size % PAGE_SIZE) == 0 | 1670 | if (cachep->buffer_size % PAGE_SIZE == 0 && |
1606 | && OFF_SLAB(cachep)) | 1671 | OFF_SLAB(cachep)) |
1607 | kernel_map_pages(virt_to_page(objp), | 1672 | kernel_map_pages(virt_to_page(objp), |
1608 | cachep->buffer_size / PAGE_SIZE, | 1673 | cachep->buffer_size / PAGE_SIZE, 1); |
1609 | 1); | ||
1610 | else | 1674 | else |
1611 | check_poison_obj(cachep, objp); | 1675 | check_poison_obj(cachep, objp); |
1612 | #else | 1676 | #else |
@@ -1631,7 +1695,7 @@ static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp) | |||
1631 | if (cachep->dtor) { | 1695 | if (cachep->dtor) { |
1632 | int i; | 1696 | int i; |
1633 | for (i = 0; i < cachep->num; i++) { | 1697 | for (i = 0; i < cachep->num; i++) { |
1634 | void *objp = slabp->s_mem + cachep->buffer_size * i; | 1698 | void *objp = index_to_obj(cachep, slabp, i); |
1635 | (cachep->dtor) (objp, cachep, 0); | 1699 | (cachep->dtor) (objp, cachep, 0); |
1636 | } | 1700 | } |
1637 | } | 1701 | } |
@@ -1639,9 +1703,13 @@ static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp) | |||
1639 | #endif | 1703 | #endif |
1640 | 1704 | ||
1641 | /** | 1705 | /** |
1706 | * slab_destroy - destroy and release all objects in a slab | ||
1707 | * @cachep: cache pointer being destroyed | ||
1708 | * @slabp: slab pointer being destroyed | ||
1709 | * | ||
1642 | * Destroy all the objs in a slab, and release the mem back to the system. | 1710 | * Destroy all the objs in a slab, and release the mem back to the system. |
1643 | * Before calling the slab must have been unlinked from the cache. | 1711 | * Before calling the slab must have been unlinked from the cache. The |
1644 | * The cache-lock is not held/needed. | 1712 | * cache-lock is not held/needed. |
1645 | */ | 1713 | */ |
1646 | static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp) | 1714 | static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp) |
1647 | { | 1715 | { |
@@ -1662,8 +1730,10 @@ static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp) | |||
1662 | } | 1730 | } |
1663 | } | 1731 | } |
1664 | 1732 | ||
1665 | /* For setting up all the kmem_list3s for cache whose buffer_size is same | 1733 | /* |
1666 | as size of kmem_list3. */ | 1734 | * For setting up all the kmem_list3s for cache whose buffer_size is same as |
1735 | * size of kmem_list3. | ||
1736 | */ | ||
1667 | static void set_up_list3s(struct kmem_cache *cachep, int index) | 1737 | static void set_up_list3s(struct kmem_cache *cachep, int index) |
1668 | { | 1738 | { |
1669 | int node; | 1739 | int node; |
@@ -1689,13 +1759,13 @@ static void set_up_list3s(struct kmem_cache *cachep, int index) | |||
1689 | * high order pages for slabs. When the gfp() functions are more friendly | 1759 | * high order pages for slabs. When the gfp() functions are more friendly |
1690 | * towards high-order requests, this should be changed. | 1760 | * towards high-order requests, this should be changed. |
1691 | */ | 1761 | */ |
1692 | static inline size_t calculate_slab_order(struct kmem_cache *cachep, | 1762 | static size_t calculate_slab_order(struct kmem_cache *cachep, |
1693 | size_t size, size_t align, unsigned long flags) | 1763 | size_t size, size_t align, unsigned long flags) |
1694 | { | 1764 | { |
1695 | size_t left_over = 0; | 1765 | size_t left_over = 0; |
1696 | int gfporder; | 1766 | int gfporder; |
1697 | 1767 | ||
1698 | for (gfporder = 0 ; gfporder <= MAX_GFP_ORDER; gfporder++) { | 1768 | for (gfporder = 0; gfporder <= MAX_GFP_ORDER; gfporder++) { |
1699 | unsigned int num; | 1769 | unsigned int num; |
1700 | size_t remainder; | 1770 | size_t remainder; |
1701 | 1771 | ||
@@ -1730,12 +1800,66 @@ static inline size_t calculate_slab_order(struct kmem_cache *cachep, | |||
1730 | /* | 1800 | /* |
1731 | * Acceptable internal fragmentation? | 1801 | * Acceptable internal fragmentation? |
1732 | */ | 1802 | */ |
1733 | if ((left_over * 8) <= (PAGE_SIZE << gfporder)) | 1803 | if (left_over * 8 <= (PAGE_SIZE << gfporder)) |
1734 | break; | 1804 | break; |
1735 | } | 1805 | } |
1736 | return left_over; | 1806 | return left_over; |
1737 | } | 1807 | } |
1738 | 1808 | ||
1809 | static void setup_cpu_cache(struct kmem_cache *cachep) | ||
1810 | { | ||
1811 | if (g_cpucache_up == FULL) { | ||
1812 | enable_cpucache(cachep); | ||
1813 | return; | ||
1814 | } | ||
1815 | if (g_cpucache_up == NONE) { | ||
1816 | /* | ||
1817 | * Note: the first kmem_cache_create must create the cache | ||
1818 | * that's used by kmalloc(24), otherwise the creation of | ||
1819 | * further caches will BUG(). | ||
1820 | */ | ||
1821 | cachep->array[smp_processor_id()] = &initarray_generic.cache; | ||
1822 | |||
1823 | /* | ||
1824 | * If the cache that's used by kmalloc(sizeof(kmem_list3)) is | ||
1825 | * the first cache, then we need to set up all its list3s, | ||
1826 | * otherwise the creation of further caches will BUG(). | ||
1827 | */ | ||
1828 | set_up_list3s(cachep, SIZE_AC); | ||
1829 | if (INDEX_AC == INDEX_L3) | ||
1830 | g_cpucache_up = PARTIAL_L3; | ||
1831 | else | ||
1832 | g_cpucache_up = PARTIAL_AC; | ||
1833 | } else { | ||
1834 | cachep->array[smp_processor_id()] = | ||
1835 | kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); | ||
1836 | |||
1837 | if (g_cpucache_up == PARTIAL_AC) { | ||
1838 | set_up_list3s(cachep, SIZE_L3); | ||
1839 | g_cpucache_up = PARTIAL_L3; | ||
1840 | } else { | ||
1841 | int node; | ||
1842 | for_each_online_node(node) { | ||
1843 | cachep->nodelists[node] = | ||
1844 | kmalloc_node(sizeof(struct kmem_list3), | ||
1845 | GFP_KERNEL, node); | ||
1846 | BUG_ON(!cachep->nodelists[node]); | ||
1847 | kmem_list3_init(cachep->nodelists[node]); | ||
1848 | } | ||
1849 | } | ||
1850 | } | ||
1851 | cachep->nodelists[numa_node_id()]->next_reap = | ||
1852 | jiffies + REAPTIMEOUT_LIST3 + | ||
1853 | ((unsigned long)cachep) % REAPTIMEOUT_LIST3; | ||
1854 | |||
1855 | cpu_cache_get(cachep)->avail = 0; | ||
1856 | cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES; | ||
1857 | cpu_cache_get(cachep)->batchcount = 1; | ||
1858 | cpu_cache_get(cachep)->touched = 0; | ||
1859 | cachep->batchcount = 1; | ||
1860 | cachep->limit = BOOT_CPUCACHE_ENTRIES; | ||
1861 | } | ||
1862 | |||
1739 | /** | 1863 | /** |
1740 | * kmem_cache_create - Create a cache. | 1864 | * kmem_cache_create - Create a cache. |
1741 | * @name: A string which is used in /proc/slabinfo to identify this cache. | 1865 | * @name: A string which is used in /proc/slabinfo to identify this cache. |
@@ -1751,9 +1875,8 @@ static inline size_t calculate_slab_order(struct kmem_cache *cachep, | |||
1751 | * and the @dtor is run before the pages are handed back. | 1875 | * and the @dtor is run before the pages are handed back. |
1752 | * | 1876 | * |
1753 | * @name must be valid until the cache is destroyed. This implies that | 1877 | * @name must be valid until the cache is destroyed. This implies that |
1754 | * the module calling this has to destroy the cache before getting | 1878 | * the module calling this has to destroy the cache before getting unloaded. |
1755 | * unloaded. | 1879 | * |
1756 | * | ||
1757 | * The flags are | 1880 | * The flags are |
1758 | * | 1881 | * |
1759 | * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5) | 1882 | * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5) |
@@ -1762,16 +1885,14 @@ static inline size_t calculate_slab_order(struct kmem_cache *cachep, | |||
1762 | * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check | 1885 | * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check |
1763 | * for buffer overruns. | 1886 | * for buffer overruns. |
1764 | * | 1887 | * |
1765 | * %SLAB_NO_REAP - Don't automatically reap this cache when we're under | ||
1766 | * memory pressure. | ||
1767 | * | ||
1768 | * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware | 1888 | * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware |
1769 | * cacheline. This can be beneficial if you're counting cycles as closely | 1889 | * cacheline. This can be beneficial if you're counting cycles as closely |
1770 | * as davem. | 1890 | * as davem. |
1771 | */ | 1891 | */ |
1772 | struct kmem_cache * | 1892 | struct kmem_cache * |
1773 | kmem_cache_create (const char *name, size_t size, size_t align, | 1893 | kmem_cache_create (const char *name, size_t size, size_t align, |
1774 | unsigned long flags, void (*ctor)(void*, struct kmem_cache *, unsigned long), | 1894 | unsigned long flags, |
1895 | void (*ctor)(void*, struct kmem_cache *, unsigned long), | ||
1775 | void (*dtor)(void*, struct kmem_cache *, unsigned long)) | 1896 | void (*dtor)(void*, struct kmem_cache *, unsigned long)) |
1776 | { | 1897 | { |
1777 | size_t left_over, slab_size, ralign; | 1898 | size_t left_over, slab_size, ralign; |
@@ -1781,12 +1902,10 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
1781 | /* | 1902 | /* |
1782 | * Sanity checks... these are all serious usage bugs. | 1903 | * Sanity checks... these are all serious usage bugs. |
1783 | */ | 1904 | */ |
1784 | if ((!name) || | 1905 | if (!name || in_interrupt() || (size < BYTES_PER_WORD) || |
1785 | in_interrupt() || | ||
1786 | (size < BYTES_PER_WORD) || | ||
1787 | (size > (1 << MAX_OBJ_ORDER) * PAGE_SIZE) || (dtor && !ctor)) { | 1906 | (size > (1 << MAX_OBJ_ORDER) * PAGE_SIZE) || (dtor && !ctor)) { |
1788 | printk(KERN_ERR "%s: Early error in slab %s\n", | 1907 | printk(KERN_ERR "%s: Early error in slab %s\n", __FUNCTION__, |
1789 | __FUNCTION__, name); | 1908 | name); |
1790 | BUG(); | 1909 | BUG(); |
1791 | } | 1910 | } |
1792 | 1911 | ||
@@ -1840,8 +1959,7 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
1840 | * above the next power of two: caches with object sizes just above a | 1959 | * above the next power of two: caches with object sizes just above a |
1841 | * power of two have a significant amount of internal fragmentation. | 1960 | * power of two have a significant amount of internal fragmentation. |
1842 | */ | 1961 | */ |
1843 | if ((size < 4096 | 1962 | if (size < 4096 || fls(size - 1) == fls(size-1 + 3 * BYTES_PER_WORD)) |
1844 | || fls(size - 1) == fls(size - 1 + 3 * BYTES_PER_WORD))) | ||
1845 | flags |= SLAB_RED_ZONE | SLAB_STORE_USER; | 1963 | flags |= SLAB_RED_ZONE | SLAB_STORE_USER; |
1846 | if (!(flags & SLAB_DESTROY_BY_RCU)) | 1964 | if (!(flags & SLAB_DESTROY_BY_RCU)) |
1847 | flags |= SLAB_POISON; | 1965 | flags |= SLAB_POISON; |
@@ -1853,13 +1971,14 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
1853 | BUG_ON(dtor); | 1971 | BUG_ON(dtor); |
1854 | 1972 | ||
1855 | /* | 1973 | /* |
1856 | * Always checks flags, a caller might be expecting debug | 1974 | * Always checks flags, a caller might be expecting debug support which |
1857 | * support which isn't available. | 1975 | * isn't available. |
1858 | */ | 1976 | */ |
1859 | if (flags & ~CREATE_MASK) | 1977 | if (flags & ~CREATE_MASK) |
1860 | BUG(); | 1978 | BUG(); |
1861 | 1979 | ||
1862 | /* Check that size is in terms of words. This is needed to avoid | 1980 | /* |
1981 | * Check that size is in terms of words. This is needed to avoid | ||
1863 | * unaligned accesses for some archs when redzoning is used, and makes | 1982 | * unaligned accesses for some archs when redzoning is used, and makes |
1864 | * sure any on-slab bufctl's are also correctly aligned. | 1983 | * sure any on-slab bufctl's are also correctly aligned. |
1865 | */ | 1984 | */ |
@@ -1868,12 +1987,14 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
1868 | size &= ~(BYTES_PER_WORD - 1); | 1987 | size &= ~(BYTES_PER_WORD - 1); |
1869 | } | 1988 | } |
1870 | 1989 | ||
1871 | /* calculate out the final buffer alignment: */ | 1990 | /* calculate the final buffer alignment: */ |
1991 | |||
1872 | /* 1) arch recommendation: can be overridden for debug */ | 1992 | /* 1) arch recommendation: can be overridden for debug */ |
1873 | if (flags & SLAB_HWCACHE_ALIGN) { | 1993 | if (flags & SLAB_HWCACHE_ALIGN) { |
1874 | /* Default alignment: as specified by the arch code. | 1994 | /* |
1875 | * Except if an object is really small, then squeeze multiple | 1995 | * Default alignment: as specified by the arch code. Except if |
1876 | * objects into one cacheline. | 1996 | * an object is really small, then squeeze multiple objects into |
1997 | * one cacheline. | ||
1877 | */ | 1998 | */ |
1878 | ralign = cache_line_size(); | 1999 | ralign = cache_line_size(); |
1879 | while (size <= ralign / 2) | 2000 | while (size <= ralign / 2) |
@@ -1893,16 +2014,16 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
1893 | if (ralign > BYTES_PER_WORD) | 2014 | if (ralign > BYTES_PER_WORD) |
1894 | flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); | 2015 | flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); |
1895 | } | 2016 | } |
1896 | /* 4) Store it. Note that the debug code below can reduce | 2017 | /* |
2018 | * 4) Store it. Note that the debug code below can reduce | ||
1897 | * the alignment to BYTES_PER_WORD. | 2019 | * the alignment to BYTES_PER_WORD. |
1898 | */ | 2020 | */ |
1899 | align = ralign; | 2021 | align = ralign; |
1900 | 2022 | ||
1901 | /* Get cache's description obj. */ | 2023 | /* Get cache's description obj. */ |
1902 | cachep = kmem_cache_alloc(&cache_cache, SLAB_KERNEL); | 2024 | cachep = kmem_cache_zalloc(&cache_cache, SLAB_KERNEL); |
1903 | if (!cachep) | 2025 | if (!cachep) |
1904 | goto oops; | 2026 | goto oops; |
1905 | memset(cachep, 0, sizeof(struct kmem_cache)); | ||
1906 | 2027 | ||
1907 | #if DEBUG | 2028 | #if DEBUG |
1908 | cachep->obj_size = size; | 2029 | cachep->obj_size = size; |
@@ -1978,7 +2099,6 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
1978 | cachep->gfpflags = 0; | 2099 | cachep->gfpflags = 0; |
1979 | if (flags & SLAB_CACHE_DMA) | 2100 | if (flags & SLAB_CACHE_DMA) |
1980 | cachep->gfpflags |= GFP_DMA; | 2101 | cachep->gfpflags |= GFP_DMA; |
1981 | spin_lock_init(&cachep->spinlock); | ||
1982 | cachep->buffer_size = size; | 2102 | cachep->buffer_size = size; |
1983 | 2103 | ||
1984 | if (flags & CFLGS_OFF_SLAB) | 2104 | if (flags & CFLGS_OFF_SLAB) |
@@ -1988,64 +2108,11 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
1988 | cachep->name = name; | 2108 | cachep->name = name; |
1989 | 2109 | ||
1990 | 2110 | ||
1991 | if (g_cpucache_up == FULL) { | 2111 | setup_cpu_cache(cachep); |
1992 | enable_cpucache(cachep); | ||
1993 | } else { | ||
1994 | if (g_cpucache_up == NONE) { | ||
1995 | /* Note: the first kmem_cache_create must create | ||
1996 | * the cache that's used by kmalloc(24), otherwise | ||
1997 | * the creation of further caches will BUG(). | ||
1998 | */ | ||
1999 | cachep->array[smp_processor_id()] = | ||
2000 | &initarray_generic.cache; | ||
2001 | |||
2002 | /* If the cache that's used by | ||
2003 | * kmalloc(sizeof(kmem_list3)) is the first cache, | ||
2004 | * then we need to set up all its list3s, otherwise | ||
2005 | * the creation of further caches will BUG(). | ||
2006 | */ | ||
2007 | set_up_list3s(cachep, SIZE_AC); | ||
2008 | if (INDEX_AC == INDEX_L3) | ||
2009 | g_cpucache_up = PARTIAL_L3; | ||
2010 | else | ||
2011 | g_cpucache_up = PARTIAL_AC; | ||
2012 | } else { | ||
2013 | cachep->array[smp_processor_id()] = | ||
2014 | kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); | ||
2015 | |||
2016 | if (g_cpucache_up == PARTIAL_AC) { | ||
2017 | set_up_list3s(cachep, SIZE_L3); | ||
2018 | g_cpucache_up = PARTIAL_L3; | ||
2019 | } else { | ||
2020 | int node; | ||
2021 | for_each_online_node(node) { | ||
2022 | |||
2023 | cachep->nodelists[node] = | ||
2024 | kmalloc_node(sizeof | ||
2025 | (struct kmem_list3), | ||
2026 | GFP_KERNEL, node); | ||
2027 | BUG_ON(!cachep->nodelists[node]); | ||
2028 | kmem_list3_init(cachep-> | ||
2029 | nodelists[node]); | ||
2030 | } | ||
2031 | } | ||
2032 | } | ||
2033 | cachep->nodelists[numa_node_id()]->next_reap = | ||
2034 | jiffies + REAPTIMEOUT_LIST3 + | ||
2035 | ((unsigned long)cachep) % REAPTIMEOUT_LIST3; | ||
2036 | |||
2037 | BUG_ON(!cpu_cache_get(cachep)); | ||
2038 | cpu_cache_get(cachep)->avail = 0; | ||
2039 | cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES; | ||
2040 | cpu_cache_get(cachep)->batchcount = 1; | ||
2041 | cpu_cache_get(cachep)->touched = 0; | ||
2042 | cachep->batchcount = 1; | ||
2043 | cachep->limit = BOOT_CPUCACHE_ENTRIES; | ||
2044 | } | ||
2045 | 2112 | ||
2046 | /* cache setup completed, link it into the list */ | 2113 | /* cache setup completed, link it into the list */ |
2047 | list_add(&cachep->next, &cache_chain); | 2114 | list_add(&cachep->next, &cache_chain); |
2048 | oops: | 2115 | oops: |
2049 | if (!cachep && (flags & SLAB_PANIC)) | 2116 | if (!cachep && (flags & SLAB_PANIC)) |
2050 | panic("kmem_cache_create(): failed to create slab `%s'\n", | 2117 | panic("kmem_cache_create(): failed to create slab `%s'\n", |
2051 | name); | 2118 | name); |
@@ -2089,30 +2156,13 @@ static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node) | |||
2089 | #define check_spinlock_acquired_node(x, y) do { } while(0) | 2156 | #define check_spinlock_acquired_node(x, y) do { } while(0) |
2090 | #endif | 2157 | #endif |
2091 | 2158 | ||
2092 | /* | 2159 | static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, |
2093 | * Waits for all CPUs to execute func(). | 2160 | struct array_cache *ac, |
2094 | */ | 2161 | int force, int node); |
2095 | static void smp_call_function_all_cpus(void (*func)(void *arg), void *arg) | ||
2096 | { | ||
2097 | check_irq_on(); | ||
2098 | preempt_disable(); | ||
2099 | |||
2100 | local_irq_disable(); | ||
2101 | func(arg); | ||
2102 | local_irq_enable(); | ||
2103 | |||
2104 | if (smp_call_function(func, arg, 1, 1)) | ||
2105 | BUG(); | ||
2106 | |||
2107 | preempt_enable(); | ||
2108 | } | ||
2109 | |||
2110 | static void drain_array_locked(struct kmem_cache *cachep, struct array_cache *ac, | ||
2111 | int force, int node); | ||
2112 | 2162 | ||
2113 | static void do_drain(void *arg) | 2163 | static void do_drain(void *arg) |
2114 | { | 2164 | { |
2115 | struct kmem_cache *cachep = (struct kmem_cache *) arg; | 2165 | struct kmem_cache *cachep = arg; |
2116 | struct array_cache *ac; | 2166 | struct array_cache *ac; |
2117 | int node = numa_node_id(); | 2167 | int node = numa_node_id(); |
2118 | 2168 | ||
@@ -2129,14 +2179,12 @@ static void drain_cpu_caches(struct kmem_cache *cachep) | |||
2129 | struct kmem_list3 *l3; | 2179 | struct kmem_list3 *l3; |
2130 | int node; | 2180 | int node; |
2131 | 2181 | ||
2132 | smp_call_function_all_cpus(do_drain, cachep); | 2182 | on_each_cpu(do_drain, cachep, 1, 1); |
2133 | check_irq_on(); | 2183 | check_irq_on(); |
2134 | for_each_online_node(node) { | 2184 | for_each_online_node(node) { |
2135 | l3 = cachep->nodelists[node]; | 2185 | l3 = cachep->nodelists[node]; |
2136 | if (l3) { | 2186 | if (l3) { |
2137 | spin_lock_irq(&l3->list_lock); | 2187 | drain_array(cachep, l3, l3->shared, 1, node); |
2138 | drain_array_locked(cachep, l3->shared, 1, node); | ||
2139 | spin_unlock_irq(&l3->list_lock); | ||
2140 | if (l3->alien) | 2188 | if (l3->alien) |
2141 | drain_alien_cache(cachep, l3->alien); | 2189 | drain_alien_cache(cachep, l3->alien); |
2142 | } | 2190 | } |
@@ -2260,16 +2308,15 @@ int kmem_cache_destroy(struct kmem_cache *cachep) | |||
2260 | 2308 | ||
2261 | /* NUMA: free the list3 structures */ | 2309 | /* NUMA: free the list3 structures */ |
2262 | for_each_online_node(i) { | 2310 | for_each_online_node(i) { |
2263 | if ((l3 = cachep->nodelists[i])) { | 2311 | l3 = cachep->nodelists[i]; |
2312 | if (l3) { | ||
2264 | kfree(l3->shared); | 2313 | kfree(l3->shared); |
2265 | free_alien_cache(l3->alien); | 2314 | free_alien_cache(l3->alien); |
2266 | kfree(l3); | 2315 | kfree(l3); |
2267 | } | 2316 | } |
2268 | } | 2317 | } |
2269 | kmem_cache_free(&cache_cache, cachep); | 2318 | kmem_cache_free(&cache_cache, cachep); |
2270 | |||
2271 | unlock_cpu_hotplug(); | 2319 | unlock_cpu_hotplug(); |
2272 | |||
2273 | return 0; | 2320 | return 0; |
2274 | } | 2321 | } |
2275 | EXPORT_SYMBOL(kmem_cache_destroy); | 2322 | EXPORT_SYMBOL(kmem_cache_destroy); |
@@ -2292,7 +2339,6 @@ static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp, | |||
2292 | slabp->inuse = 0; | 2339 | slabp->inuse = 0; |
2293 | slabp->colouroff = colour_off; | 2340 | slabp->colouroff = colour_off; |
2294 | slabp->s_mem = objp + colour_off; | 2341 | slabp->s_mem = objp + colour_off; |
2295 | |||
2296 | return slabp; | 2342 | return slabp; |
2297 | } | 2343 | } |
2298 | 2344 | ||
@@ -2307,7 +2353,7 @@ static void cache_init_objs(struct kmem_cache *cachep, | |||
2307 | int i; | 2353 | int i; |
2308 | 2354 | ||
2309 | for (i = 0; i < cachep->num; i++) { | 2355 | for (i = 0; i < cachep->num; i++) { |
2310 | void *objp = slabp->s_mem + cachep->buffer_size * i; | 2356 | void *objp = index_to_obj(cachep, slabp, i); |
2311 | #if DEBUG | 2357 | #if DEBUG |
2312 | /* need to poison the objs? */ | 2358 | /* need to poison the objs? */ |
2313 | if (cachep->flags & SLAB_POISON) | 2359 | if (cachep->flags & SLAB_POISON) |
@@ -2320,9 +2366,9 @@ static void cache_init_objs(struct kmem_cache *cachep, | |||
2320 | *dbg_redzone2(cachep, objp) = RED_INACTIVE; | 2366 | *dbg_redzone2(cachep, objp) = RED_INACTIVE; |
2321 | } | 2367 | } |
2322 | /* | 2368 | /* |
2323 | * Constructors are not allowed to allocate memory from | 2369 | * Constructors are not allowed to allocate memory from the same |
2324 | * the same cache which they are a constructor for. | 2370 | * cache which they are a constructor for. Otherwise, deadlock. |
2325 | * Otherwise, deadlock. They must also be threaded. | 2371 | * They must also be threaded. |
2326 | */ | 2372 | */ |
2327 | if (cachep->ctor && !(cachep->flags & SLAB_POISON)) | 2373 | if (cachep->ctor && !(cachep->flags & SLAB_POISON)) |
2328 | cachep->ctor(objp + obj_offset(cachep), cachep, | 2374 | cachep->ctor(objp + obj_offset(cachep), cachep, |
@@ -2336,8 +2382,8 @@ static void cache_init_objs(struct kmem_cache *cachep, | |||
2336 | slab_error(cachep, "constructor overwrote the" | 2382 | slab_error(cachep, "constructor overwrote the" |
2337 | " start of an object"); | 2383 | " start of an object"); |
2338 | } | 2384 | } |
2339 | if ((cachep->buffer_size % PAGE_SIZE) == 0 && OFF_SLAB(cachep) | 2385 | if ((cachep->buffer_size % PAGE_SIZE) == 0 && |
2340 | && cachep->flags & SLAB_POISON) | 2386 | OFF_SLAB(cachep) && cachep->flags & SLAB_POISON) |
2341 | kernel_map_pages(virt_to_page(objp), | 2387 | kernel_map_pages(virt_to_page(objp), |
2342 | cachep->buffer_size / PAGE_SIZE, 0); | 2388 | cachep->buffer_size / PAGE_SIZE, 0); |
2343 | #else | 2389 | #else |
@@ -2352,18 +2398,16 @@ static void cache_init_objs(struct kmem_cache *cachep, | |||
2352 | 2398 | ||
2353 | static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags) | 2399 | static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags) |
2354 | { | 2400 | { |
2355 | if (flags & SLAB_DMA) { | 2401 | if (flags & SLAB_DMA) |
2356 | if (!(cachep->gfpflags & GFP_DMA)) | 2402 | BUG_ON(!(cachep->gfpflags & GFP_DMA)); |
2357 | BUG(); | 2403 | else |
2358 | } else { | 2404 | BUG_ON(cachep->gfpflags & GFP_DMA); |
2359 | if (cachep->gfpflags & GFP_DMA) | ||
2360 | BUG(); | ||
2361 | } | ||
2362 | } | 2405 | } |
2363 | 2406 | ||
2364 | static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp, int nodeid) | 2407 | static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp, |
2408 | int nodeid) | ||
2365 | { | 2409 | { |
2366 | void *objp = slabp->s_mem + (slabp->free * cachep->buffer_size); | 2410 | void *objp = index_to_obj(cachep, slabp, slabp->free); |
2367 | kmem_bufctl_t next; | 2411 | kmem_bufctl_t next; |
2368 | 2412 | ||
2369 | slabp->inuse++; | 2413 | slabp->inuse++; |
@@ -2377,18 +2421,18 @@ static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp, int nod | |||
2377 | return objp; | 2421 | return objp; |
2378 | } | 2422 | } |
2379 | 2423 | ||
2380 | static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp, void *objp, | 2424 | static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp, |
2381 | int nodeid) | 2425 | void *objp, int nodeid) |
2382 | { | 2426 | { |
2383 | unsigned int objnr = (unsigned)(objp-slabp->s_mem) / cachep->buffer_size; | 2427 | unsigned int objnr = obj_to_index(cachep, slabp, objp); |
2384 | 2428 | ||
2385 | #if DEBUG | 2429 | #if DEBUG |
2386 | /* Verify that the slab belongs to the intended node */ | 2430 | /* Verify that the slab belongs to the intended node */ |
2387 | WARN_ON(slabp->nodeid != nodeid); | 2431 | WARN_ON(slabp->nodeid != nodeid); |
2388 | 2432 | ||
2389 | if (slab_bufctl(slabp)[objnr] != BUFCTL_FREE) { | 2433 | if (slab_bufctl(slabp)[objnr] + 1 <= SLAB_LIMIT + 1) { |
2390 | printk(KERN_ERR "slab: double free detected in cache " | 2434 | printk(KERN_ERR "slab: double free detected in cache " |
2391 | "'%s', objp %p\n", cachep->name, objp); | 2435 | "'%s', objp %p\n", cachep->name, objp); |
2392 | BUG(); | 2436 | BUG(); |
2393 | } | 2437 | } |
2394 | #endif | 2438 | #endif |
@@ -2397,14 +2441,18 @@ static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp, void *ob | |||
2397 | slabp->inuse--; | 2441 | slabp->inuse--; |
2398 | } | 2442 | } |
2399 | 2443 | ||
2400 | static void set_slab_attr(struct kmem_cache *cachep, struct slab *slabp, void *objp) | 2444 | static void set_slab_attr(struct kmem_cache *cachep, struct slab *slabp, |
2445 | void *objp) | ||
2401 | { | 2446 | { |
2402 | int i; | 2447 | int i; |
2403 | struct page *page; | 2448 | struct page *page; |
2404 | 2449 | ||
2405 | /* Nasty!!!!!! I hope this is OK. */ | 2450 | /* Nasty!!!!!! I hope this is OK. */ |
2406 | i = 1 << cachep->gfporder; | ||
2407 | page = virt_to_page(objp); | 2451 | page = virt_to_page(objp); |
2452 | |||
2453 | i = 1; | ||
2454 | if (likely(!PageCompound(page))) | ||
2455 | i <<= cachep->gfporder; | ||
2408 | do { | 2456 | do { |
2409 | page_set_cache(page, cachep); | 2457 | page_set_cache(page, cachep); |
2410 | page_set_slab(page, slabp); | 2458 | page_set_slab(page, slabp); |
@@ -2425,8 +2473,9 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid) | |||
2425 | unsigned long ctor_flags; | 2473 | unsigned long ctor_flags; |
2426 | struct kmem_list3 *l3; | 2474 | struct kmem_list3 *l3; |
2427 | 2475 | ||
2428 | /* Be lazy and only check for valid flags here, | 2476 | /* |
2429 | * keeping it out of the critical path in kmem_cache_alloc(). | 2477 | * Be lazy and only check for valid flags here, keeping it out of the |
2478 | * critical path in kmem_cache_alloc(). | ||
2430 | */ | 2479 | */ |
2431 | if (flags & ~(SLAB_DMA | SLAB_LEVEL_MASK | SLAB_NO_GROW)) | 2480 | if (flags & ~(SLAB_DMA | SLAB_LEVEL_MASK | SLAB_NO_GROW)) |
2432 | BUG(); | 2481 | BUG(); |
@@ -2467,14 +2516,17 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid) | |||
2467 | */ | 2516 | */ |
2468 | kmem_flagcheck(cachep, flags); | 2517 | kmem_flagcheck(cachep, flags); |
2469 | 2518 | ||
2470 | /* Get mem for the objs. | 2519 | /* |
2471 | * Attempt to allocate a physical page from 'nodeid', | 2520 | * Get mem for the objs. Attempt to allocate a physical page from |
2521 | * 'nodeid'. | ||
2472 | */ | 2522 | */ |
2473 | if (!(objp = kmem_getpages(cachep, flags, nodeid))) | 2523 | objp = kmem_getpages(cachep, flags, nodeid); |
2524 | if (!objp) | ||
2474 | goto failed; | 2525 | goto failed; |
2475 | 2526 | ||
2476 | /* Get slab management. */ | 2527 | /* Get slab management. */ |
2477 | if (!(slabp = alloc_slabmgmt(cachep, objp, offset, local_flags))) | 2528 | slabp = alloc_slabmgmt(cachep, objp, offset, local_flags); |
2529 | if (!slabp) | ||
2478 | goto opps1; | 2530 | goto opps1; |
2479 | 2531 | ||
2480 | slabp->nodeid = nodeid; | 2532 | slabp->nodeid = nodeid; |
@@ -2493,9 +2545,9 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid) | |||
2493 | l3->free_objects += cachep->num; | 2545 | l3->free_objects += cachep->num; |
2494 | spin_unlock(&l3->list_lock); | 2546 | spin_unlock(&l3->list_lock); |
2495 | return 1; | 2547 | return 1; |
2496 | opps1: | 2548 | opps1: |
2497 | kmem_freepages(cachep, objp); | 2549 | kmem_freepages(cachep, objp); |
2498 | failed: | 2550 | failed: |
2499 | if (local_flags & __GFP_WAIT) | 2551 | if (local_flags & __GFP_WAIT) |
2500 | local_irq_disable(); | 2552 | local_irq_disable(); |
2501 | return 0; | 2553 | return 0; |
@@ -2538,8 +2590,8 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, | |||
2538 | page = virt_to_page(objp); | 2590 | page = virt_to_page(objp); |
2539 | 2591 | ||
2540 | if (page_get_cache(page) != cachep) { | 2592 | if (page_get_cache(page) != cachep) { |
2541 | printk(KERN_ERR | 2593 | printk(KERN_ERR "mismatch in kmem_cache_free: expected " |
2542 | "mismatch in kmem_cache_free: expected cache %p, got %p\n", | 2594 | "cache %p, got %p\n", |
2543 | page_get_cache(page), cachep); | 2595 | page_get_cache(page), cachep); |
2544 | printk(KERN_ERR "%p is %s.\n", cachep, cachep->name); | 2596 | printk(KERN_ERR "%p is %s.\n", cachep, cachep->name); |
2545 | printk(KERN_ERR "%p is %s.\n", page_get_cache(page), | 2597 | printk(KERN_ERR "%p is %s.\n", page_get_cache(page), |
@@ -2549,13 +2601,12 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, | |||
2549 | slabp = page_get_slab(page); | 2601 | slabp = page_get_slab(page); |
2550 | 2602 | ||
2551 | if (cachep->flags & SLAB_RED_ZONE) { | 2603 | if (cachep->flags & SLAB_RED_ZONE) { |
2552 | if (*dbg_redzone1(cachep, objp) != RED_ACTIVE | 2604 | if (*dbg_redzone1(cachep, objp) != RED_ACTIVE || |
2553 | || *dbg_redzone2(cachep, objp) != RED_ACTIVE) { | 2605 | *dbg_redzone2(cachep, objp) != RED_ACTIVE) { |
2554 | slab_error(cachep, | 2606 | slab_error(cachep, "double free, or memory outside" |
2555 | "double free, or memory outside" | 2607 | " object was overwritten"); |
2556 | " object was overwritten"); | 2608 | printk(KERN_ERR "%p: redzone 1:0x%lx, " |
2557 | printk(KERN_ERR | 2609 | "redzone 2:0x%lx.\n", |
2558 | "%p: redzone 1: 0x%lx, redzone 2: 0x%lx.\n", | ||
2559 | objp, *dbg_redzone1(cachep, objp), | 2610 | objp, *dbg_redzone1(cachep, objp), |
2560 | *dbg_redzone2(cachep, objp)); | 2611 | *dbg_redzone2(cachep, objp)); |
2561 | } | 2612 | } |
@@ -2565,15 +2616,16 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, | |||
2565 | if (cachep->flags & SLAB_STORE_USER) | 2616 | if (cachep->flags & SLAB_STORE_USER) |
2566 | *dbg_userword(cachep, objp) = caller; | 2617 | *dbg_userword(cachep, objp) = caller; |
2567 | 2618 | ||
2568 | objnr = (unsigned)(objp - slabp->s_mem) / cachep->buffer_size; | 2619 | objnr = obj_to_index(cachep, slabp, objp); |
2569 | 2620 | ||
2570 | BUG_ON(objnr >= cachep->num); | 2621 | BUG_ON(objnr >= cachep->num); |
2571 | BUG_ON(objp != slabp->s_mem + objnr * cachep->buffer_size); | 2622 | BUG_ON(objp != index_to_obj(cachep, slabp, objnr)); |
2572 | 2623 | ||
2573 | if (cachep->flags & SLAB_DEBUG_INITIAL) { | 2624 | if (cachep->flags & SLAB_DEBUG_INITIAL) { |
2574 | /* Need to call the slab's constructor so the | 2625 | /* |
2575 | * caller can perform a verify of its state (debugging). | 2626 | * Need to call the slab's constructor so the caller can |
2576 | * Called without the cache-lock held. | 2627 | * perform a verify of its state (debugging). Called without |
2628 | * the cache-lock held. | ||
2577 | */ | 2629 | */ |
2578 | cachep->ctor(objp + obj_offset(cachep), | 2630 | cachep->ctor(objp + obj_offset(cachep), |
2579 | cachep, SLAB_CTOR_CONSTRUCTOR | SLAB_CTOR_VERIFY); | 2631 | cachep, SLAB_CTOR_CONSTRUCTOR | SLAB_CTOR_VERIFY); |
@@ -2584,9 +2636,12 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, | |||
2584 | */ | 2636 | */ |
2585 | cachep->dtor(objp + obj_offset(cachep), cachep, 0); | 2637 | cachep->dtor(objp + obj_offset(cachep), cachep, 0); |
2586 | } | 2638 | } |
2639 | #ifdef CONFIG_DEBUG_SLAB_LEAK | ||
2640 | slab_bufctl(slabp)[objnr] = BUFCTL_FREE; | ||
2641 | #endif | ||
2587 | if (cachep->flags & SLAB_POISON) { | 2642 | if (cachep->flags & SLAB_POISON) { |
2588 | #ifdef CONFIG_DEBUG_PAGEALLOC | 2643 | #ifdef CONFIG_DEBUG_PAGEALLOC |
2589 | if ((cachep->buffer_size % PAGE_SIZE) == 0 && OFF_SLAB(cachep)) { | 2644 | if ((cachep->buffer_size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) { |
2590 | store_stackinfo(cachep, objp, (unsigned long)caller); | 2645 | store_stackinfo(cachep, objp, (unsigned long)caller); |
2591 | kernel_map_pages(virt_to_page(objp), | 2646 | kernel_map_pages(virt_to_page(objp), |
2592 | cachep->buffer_size / PAGE_SIZE, 0); | 2647 | cachep->buffer_size / PAGE_SIZE, 0); |
@@ -2612,14 +2667,14 @@ static void check_slabp(struct kmem_cache *cachep, struct slab *slabp) | |||
2612 | goto bad; | 2667 | goto bad; |
2613 | } | 2668 | } |
2614 | if (entries != cachep->num - slabp->inuse) { | 2669 | if (entries != cachep->num - slabp->inuse) { |
2615 | bad: | 2670 | bad: |
2616 | printk(KERN_ERR | 2671 | printk(KERN_ERR "slab: Internal list corruption detected in " |
2617 | "slab: Internal list corruption detected in cache '%s'(%d), slabp %p(%d). Hexdump:\n", | 2672 | "cache '%s'(%d), slabp %p(%d). Hexdump:\n", |
2618 | cachep->name, cachep->num, slabp, slabp->inuse); | 2673 | cachep->name, cachep->num, slabp, slabp->inuse); |
2619 | for (i = 0; | 2674 | for (i = 0; |
2620 | i < sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t); | 2675 | i < sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t); |
2621 | i++) { | 2676 | i++) { |
2622 | if ((i % 16) == 0) | 2677 | if (i % 16 == 0) |
2623 | printk("\n%03x:", i); | 2678 | printk("\n%03x:", i); |
2624 | printk(" %02x", ((unsigned char *)slabp)[i]); | 2679 | printk(" %02x", ((unsigned char *)slabp)[i]); |
2625 | } | 2680 | } |
@@ -2641,12 +2696,13 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags) | |||
2641 | 2696 | ||
2642 | check_irq_off(); | 2697 | check_irq_off(); |
2643 | ac = cpu_cache_get(cachep); | 2698 | ac = cpu_cache_get(cachep); |
2644 | retry: | 2699 | retry: |
2645 | batchcount = ac->batchcount; | 2700 | batchcount = ac->batchcount; |
2646 | if (!ac->touched && batchcount > BATCHREFILL_LIMIT) { | 2701 | if (!ac->touched && batchcount > BATCHREFILL_LIMIT) { |
2647 | /* if there was little recent activity on this | 2702 | /* |
2648 | * cache, then perform only a partial refill. | 2703 | * If there was little recent activity on this cache, then |
2649 | * Otherwise we could generate refill bouncing. | 2704 | * perform only a partial refill. Otherwise we could generate |
2705 | * refill bouncing. | ||
2650 | */ | 2706 | */ |
2651 | batchcount = BATCHREFILL_LIMIT; | 2707 | batchcount = BATCHREFILL_LIMIT; |
2652 | } | 2708 | } |
@@ -2655,20 +2711,10 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags) | |||
2655 | BUG_ON(ac->avail > 0 || !l3); | 2711 | BUG_ON(ac->avail > 0 || !l3); |
2656 | spin_lock(&l3->list_lock); | 2712 | spin_lock(&l3->list_lock); |
2657 | 2713 | ||
2658 | if (l3->shared) { | 2714 | /* See if we can refill from the shared array */ |
2659 | struct array_cache *shared_array = l3->shared; | 2715 | if (l3->shared && transfer_objects(ac, l3->shared, batchcount)) |
2660 | if (shared_array->avail) { | 2716 | goto alloc_done; |
2661 | if (batchcount > shared_array->avail) | 2717 | |
2662 | batchcount = shared_array->avail; | ||
2663 | shared_array->avail -= batchcount; | ||
2664 | ac->avail = batchcount; | ||
2665 | memcpy(ac->entry, | ||
2666 | &(shared_array->entry[shared_array->avail]), | ||
2667 | sizeof(void *) * batchcount); | ||
2668 | shared_array->touched = 1; | ||
2669 | goto alloc_done; | ||
2670 | } | ||
2671 | } | ||
2672 | while (batchcount > 0) { | 2718 | while (batchcount > 0) { |
2673 | struct list_head *entry; | 2719 | struct list_head *entry; |
2674 | struct slab *slabp; | 2720 | struct slab *slabp; |
@@ -2702,29 +2748,29 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags) | |||
2702 | list_add(&slabp->list, &l3->slabs_partial); | 2748 | list_add(&slabp->list, &l3->slabs_partial); |
2703 | } | 2749 | } |
2704 | 2750 | ||
2705 | must_grow: | 2751 | must_grow: |
2706 | l3->free_objects -= ac->avail; | 2752 | l3->free_objects -= ac->avail; |
2707 | alloc_done: | 2753 | alloc_done: |
2708 | spin_unlock(&l3->list_lock); | 2754 | spin_unlock(&l3->list_lock); |
2709 | 2755 | ||
2710 | if (unlikely(!ac->avail)) { | 2756 | if (unlikely(!ac->avail)) { |
2711 | int x; | 2757 | int x; |
2712 | x = cache_grow(cachep, flags, numa_node_id()); | 2758 | x = cache_grow(cachep, flags, numa_node_id()); |
2713 | 2759 | ||
2714 | // cache_grow can reenable interrupts, then ac could change. | 2760 | /* cache_grow can reenable interrupts, then ac could change. */ |
2715 | ac = cpu_cache_get(cachep); | 2761 | ac = cpu_cache_get(cachep); |
2716 | if (!x && ac->avail == 0) // no objects in sight? abort | 2762 | if (!x && ac->avail == 0) /* no objects in sight? abort */ |
2717 | return NULL; | 2763 | return NULL; |
2718 | 2764 | ||
2719 | if (!ac->avail) // objects refilled by interrupt? | 2765 | if (!ac->avail) /* objects refilled by interrupt? */ |
2720 | goto retry; | 2766 | goto retry; |
2721 | } | 2767 | } |
2722 | ac->touched = 1; | 2768 | ac->touched = 1; |
2723 | return ac->entry[--ac->avail]; | 2769 | return ac->entry[--ac->avail]; |
2724 | } | 2770 | } |
2725 | 2771 | ||
2726 | static inline void | 2772 | static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep, |
2727 | cache_alloc_debugcheck_before(struct kmem_cache *cachep, gfp_t flags) | 2773 | gfp_t flags) |
2728 | { | 2774 | { |
2729 | might_sleep_if(flags & __GFP_WAIT); | 2775 | might_sleep_if(flags & __GFP_WAIT); |
2730 | #if DEBUG | 2776 | #if DEBUG |
@@ -2733,8 +2779,8 @@ cache_alloc_debugcheck_before(struct kmem_cache *cachep, gfp_t flags) | |||
2733 | } | 2779 | } |
2734 | 2780 | ||
2735 | #if DEBUG | 2781 | #if DEBUG |
2736 | static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, gfp_t flags, | 2782 | static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, |
2737 | void *objp, void *caller) | 2783 | gfp_t flags, void *objp, void *caller) |
2738 | { | 2784 | { |
2739 | if (!objp) | 2785 | if (!objp) |
2740 | return objp; | 2786 | return objp; |
@@ -2754,19 +2800,28 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, gfp_t flags | |||
2754 | *dbg_userword(cachep, objp) = caller; | 2800 | *dbg_userword(cachep, objp) = caller; |
2755 | 2801 | ||
2756 | if (cachep->flags & SLAB_RED_ZONE) { | 2802 | if (cachep->flags & SLAB_RED_ZONE) { |
2757 | if (*dbg_redzone1(cachep, objp) != RED_INACTIVE | 2803 | if (*dbg_redzone1(cachep, objp) != RED_INACTIVE || |
2758 | || *dbg_redzone2(cachep, objp) != RED_INACTIVE) { | 2804 | *dbg_redzone2(cachep, objp) != RED_INACTIVE) { |
2759 | slab_error(cachep, | 2805 | slab_error(cachep, "double free, or memory outside" |
2760 | "double free, or memory outside" | 2806 | " object was overwritten"); |
2761 | " object was overwritten"); | ||
2762 | printk(KERN_ERR | 2807 | printk(KERN_ERR |
2763 | "%p: redzone 1: 0x%lx, redzone 2: 0x%lx.\n", | 2808 | "%p: redzone 1:0x%lx, redzone 2:0x%lx\n", |
2764 | objp, *dbg_redzone1(cachep, objp), | 2809 | objp, *dbg_redzone1(cachep, objp), |
2765 | *dbg_redzone2(cachep, objp)); | 2810 | *dbg_redzone2(cachep, objp)); |
2766 | } | 2811 | } |
2767 | *dbg_redzone1(cachep, objp) = RED_ACTIVE; | 2812 | *dbg_redzone1(cachep, objp) = RED_ACTIVE; |
2768 | *dbg_redzone2(cachep, objp) = RED_ACTIVE; | 2813 | *dbg_redzone2(cachep, objp) = RED_ACTIVE; |
2769 | } | 2814 | } |
2815 | #ifdef CONFIG_DEBUG_SLAB_LEAK | ||
2816 | { | ||
2817 | struct slab *slabp; | ||
2818 | unsigned objnr; | ||
2819 | |||
2820 | slabp = page_get_slab(virt_to_page(objp)); | ||
2821 | objnr = (unsigned)(objp - slabp->s_mem) / cachep->buffer_size; | ||
2822 | slab_bufctl(slabp)[objnr] = BUFCTL_ACTIVE; | ||
2823 | } | ||
2824 | #endif | ||
2770 | objp += obj_offset(cachep); | 2825 | objp += obj_offset(cachep); |
2771 | if (cachep->ctor && cachep->flags & SLAB_POISON) { | 2826 | if (cachep->ctor && cachep->flags & SLAB_POISON) { |
2772 | unsigned long ctor_flags = SLAB_CTOR_CONSTRUCTOR; | 2827 | unsigned long ctor_flags = SLAB_CTOR_CONSTRUCTOR; |
@@ -2788,11 +2843,10 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) | |||
2788 | struct array_cache *ac; | 2843 | struct array_cache *ac; |
2789 | 2844 | ||
2790 | #ifdef CONFIG_NUMA | 2845 | #ifdef CONFIG_NUMA |
2791 | if (unlikely(current->mempolicy && !in_interrupt())) { | 2846 | if (unlikely(current->flags & (PF_SPREAD_SLAB | PF_MEMPOLICY))) { |
2792 | int nid = slab_node(current->mempolicy); | 2847 | objp = alternate_node_alloc(cachep, flags); |
2793 | 2848 | if (objp != NULL) | |
2794 | if (nid != numa_node_id()) | 2849 | return objp; |
2795 | return __cache_alloc_node(cachep, flags, nid); | ||
2796 | } | 2850 | } |
2797 | #endif | 2851 | #endif |
2798 | 2852 | ||
@@ -2809,8 +2863,8 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) | |||
2809 | return objp; | 2863 | return objp; |
2810 | } | 2864 | } |
2811 | 2865 | ||
2812 | static __always_inline void * | 2866 | static __always_inline void *__cache_alloc(struct kmem_cache *cachep, |
2813 | __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller) | 2867 | gfp_t flags, void *caller) |
2814 | { | 2868 | { |
2815 | unsigned long save_flags; | 2869 | unsigned long save_flags; |
2816 | void *objp; | 2870 | void *objp; |
@@ -2828,9 +2882,32 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller) | |||
2828 | 2882 | ||
2829 | #ifdef CONFIG_NUMA | 2883 | #ifdef CONFIG_NUMA |
2830 | /* | 2884 | /* |
2885 | * Try allocating on another node if PF_SPREAD_SLAB|PF_MEMPOLICY. | ||
2886 | * | ||
2887 | * If we are in_interrupt, then process context, including cpusets and | ||
2888 | * mempolicy, may not apply and should not be used for allocation policy. | ||
2889 | */ | ||
2890 | static void *alternate_node_alloc(struct kmem_cache *cachep, gfp_t flags) | ||
2891 | { | ||
2892 | int nid_alloc, nid_here; | ||
2893 | |||
2894 | if (in_interrupt()) | ||
2895 | return NULL; | ||
2896 | nid_alloc = nid_here = numa_node_id(); | ||
2897 | if (cpuset_do_slab_mem_spread() && (cachep->flags & SLAB_MEM_SPREAD)) | ||
2898 | nid_alloc = cpuset_mem_spread_node(); | ||
2899 | else if (current->mempolicy) | ||
2900 | nid_alloc = slab_node(current->mempolicy); | ||
2901 | if (nid_alloc != nid_here) | ||
2902 | return __cache_alloc_node(cachep, flags, nid_alloc); | ||
2903 | return NULL; | ||
2904 | } | ||
2905 | |||
2906 | /* | ||
2831 | * A interface to enable slab creation on nodeid | 2907 | * A interface to enable slab creation on nodeid |
2832 | */ | 2908 | */ |
2833 | static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) | 2909 | static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, |
2910 | int nodeid) | ||
2834 | { | 2911 | { |
2835 | struct list_head *entry; | 2912 | struct list_head *entry; |
2836 | struct slab *slabp; | 2913 | struct slab *slabp; |
@@ -2841,7 +2918,7 @@ static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int node | |||
2841 | l3 = cachep->nodelists[nodeid]; | 2918 | l3 = cachep->nodelists[nodeid]; |
2842 | BUG_ON(!l3); | 2919 | BUG_ON(!l3); |
2843 | 2920 | ||
2844 | retry: | 2921 | retry: |
2845 | check_irq_off(); | 2922 | check_irq_off(); |
2846 | spin_lock(&l3->list_lock); | 2923 | spin_lock(&l3->list_lock); |
2847 | entry = l3->slabs_partial.next; | 2924 | entry = l3->slabs_partial.next; |
@@ -2868,16 +2945,15 @@ static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int node | |||
2868 | /* move slabp to correct slabp list: */ | 2945 | /* move slabp to correct slabp list: */ |
2869 | list_del(&slabp->list); | 2946 | list_del(&slabp->list); |
2870 | 2947 | ||
2871 | if (slabp->free == BUFCTL_END) { | 2948 | if (slabp->free == BUFCTL_END) |
2872 | list_add(&slabp->list, &l3->slabs_full); | 2949 | list_add(&slabp->list, &l3->slabs_full); |
2873 | } else { | 2950 | else |
2874 | list_add(&slabp->list, &l3->slabs_partial); | 2951 | list_add(&slabp->list, &l3->slabs_partial); |
2875 | } | ||
2876 | 2952 | ||
2877 | spin_unlock(&l3->list_lock); | 2953 | spin_unlock(&l3->list_lock); |
2878 | goto done; | 2954 | goto done; |
2879 | 2955 | ||
2880 | must_grow: | 2956 | must_grow: |
2881 | spin_unlock(&l3->list_lock); | 2957 | spin_unlock(&l3->list_lock); |
2882 | x = cache_grow(cachep, flags, nodeid); | 2958 | x = cache_grow(cachep, flags, nodeid); |
2883 | 2959 | ||
@@ -2885,7 +2961,7 @@ static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int node | |||
2885 | return NULL; | 2961 | return NULL; |
2886 | 2962 | ||
2887 | goto retry; | 2963 | goto retry; |
2888 | done: | 2964 | done: |
2889 | return obj; | 2965 | return obj; |
2890 | } | 2966 | } |
2891 | #endif | 2967 | #endif |
@@ -2958,7 +3034,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac) | |||
2958 | } | 3034 | } |
2959 | 3035 | ||
2960 | free_block(cachep, ac->entry, batchcount, node); | 3036 | free_block(cachep, ac->entry, batchcount, node); |
2961 | free_done: | 3037 | free_done: |
2962 | #if STATS | 3038 | #if STATS |
2963 | { | 3039 | { |
2964 | int i = 0; | 3040 | int i = 0; |
@@ -2979,16 +3055,12 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac) | |||
2979 | #endif | 3055 | #endif |
2980 | spin_unlock(&l3->list_lock); | 3056 | spin_unlock(&l3->list_lock); |
2981 | ac->avail -= batchcount; | 3057 | ac->avail -= batchcount; |
2982 | memmove(ac->entry, &(ac->entry[batchcount]), | 3058 | memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail); |
2983 | sizeof(void *) * ac->avail); | ||
2984 | } | 3059 | } |
2985 | 3060 | ||
2986 | /* | 3061 | /* |
2987 | * __cache_free | 3062 | * Release an obj back to its cache. If the obj has a constructed state, it must |
2988 | * Release an obj back to its cache. If the obj has a constructed | 3063 | * be in this state _before_ it is released. Called with disabled ints. |
2989 | * state, it must be in this state _before_ it is released. | ||
2990 | * | ||
2991 | * Called with disabled ints. | ||
2992 | */ | 3064 | */ |
2993 | static inline void __cache_free(struct kmem_cache *cachep, void *objp) | 3065 | static inline void __cache_free(struct kmem_cache *cachep, void *objp) |
2994 | { | 3066 | { |
@@ -3007,9 +3079,9 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp) | |||
3007 | if (unlikely(slabp->nodeid != numa_node_id())) { | 3079 | if (unlikely(slabp->nodeid != numa_node_id())) { |
3008 | struct array_cache *alien = NULL; | 3080 | struct array_cache *alien = NULL; |
3009 | int nodeid = slabp->nodeid; | 3081 | int nodeid = slabp->nodeid; |
3010 | struct kmem_list3 *l3 = | 3082 | struct kmem_list3 *l3; |
3011 | cachep->nodelists[numa_node_id()]; | ||
3012 | 3083 | ||
3084 | l3 = cachep->nodelists[numa_node_id()]; | ||
3013 | STATS_INC_NODEFREES(cachep); | 3085 | STATS_INC_NODEFREES(cachep); |
3014 | if (l3->alien && l3->alien[nodeid]) { | 3086 | if (l3->alien && l3->alien[nodeid]) { |
3015 | alien = l3->alien[nodeid]; | 3087 | alien = l3->alien[nodeid]; |
@@ -3056,6 +3128,23 @@ void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) | |||
3056 | EXPORT_SYMBOL(kmem_cache_alloc); | 3128 | EXPORT_SYMBOL(kmem_cache_alloc); |
3057 | 3129 | ||
3058 | /** | 3130 | /** |
3131 | * kmem_cache_alloc - Allocate an object. The memory is set to zero. | ||
3132 | * @cache: The cache to allocate from. | ||
3133 | * @flags: See kmalloc(). | ||
3134 | * | ||
3135 | * Allocate an object from this cache and set the allocated memory to zero. | ||
3136 | * The flags are only relevant if the cache has no available objects. | ||
3137 | */ | ||
3138 | void *kmem_cache_zalloc(struct kmem_cache *cache, gfp_t flags) | ||
3139 | { | ||
3140 | void *ret = __cache_alloc(cache, flags, __builtin_return_address(0)); | ||
3141 | if (ret) | ||
3142 | memset(ret, 0, obj_size(cache)); | ||
3143 | return ret; | ||
3144 | } | ||
3145 | EXPORT_SYMBOL(kmem_cache_zalloc); | ||
3146 | |||
3147 | /** | ||
3059 | * kmem_ptr_validate - check if an untrusted pointer might | 3148 | * kmem_ptr_validate - check if an untrusted pointer might |
3060 | * be a slab entry. | 3149 | * be a slab entry. |
3061 | * @cachep: the cache we're checking against | 3150 | * @cachep: the cache we're checking against |
@@ -3093,7 +3182,7 @@ int fastcall kmem_ptr_validate(struct kmem_cache *cachep, void *ptr) | |||
3093 | if (unlikely(page_get_cache(page) != cachep)) | 3182 | if (unlikely(page_get_cache(page) != cachep)) |
3094 | goto out; | 3183 | goto out; |
3095 | return 1; | 3184 | return 1; |
3096 | out: | 3185 | out: |
3097 | return 0; | 3186 | return 0; |
3098 | } | 3187 | } |
3099 | 3188 | ||
@@ -3119,7 +3208,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) | |||
3119 | local_irq_save(save_flags); | 3208 | local_irq_save(save_flags); |
3120 | 3209 | ||
3121 | if (nodeid == -1 || nodeid == numa_node_id() || | 3210 | if (nodeid == -1 || nodeid == numa_node_id() || |
3122 | !cachep->nodelists[nodeid]) | 3211 | !cachep->nodelists[nodeid]) |
3123 | ptr = ____cache_alloc(cachep, flags); | 3212 | ptr = ____cache_alloc(cachep, flags); |
3124 | else | 3213 | else |
3125 | ptr = __cache_alloc_node(cachep, flags, nodeid); | 3214 | ptr = __cache_alloc_node(cachep, flags, nodeid); |
@@ -3148,6 +3237,7 @@ EXPORT_SYMBOL(kmalloc_node); | |||
3148 | * kmalloc - allocate memory | 3237 | * kmalloc - allocate memory |
3149 | * @size: how many bytes of memory are required. | 3238 | * @size: how many bytes of memory are required. |
3150 | * @flags: the type of memory to allocate. | 3239 | * @flags: the type of memory to allocate. |
3240 | * @caller: function caller for debug tracking of the caller | ||
3151 | * | 3241 | * |
3152 | * kmalloc is the normal method of allocating memory | 3242 | * kmalloc is the normal method of allocating memory |
3153 | * in the kernel. | 3243 | * in the kernel. |
@@ -3181,22 +3271,23 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags, | |||
3181 | return __cache_alloc(cachep, flags, caller); | 3271 | return __cache_alloc(cachep, flags, caller); |
3182 | } | 3272 | } |
3183 | 3273 | ||
3184 | #ifndef CONFIG_DEBUG_SLAB | ||
3185 | 3274 | ||
3186 | void *__kmalloc(size_t size, gfp_t flags) | 3275 | void *__kmalloc(size_t size, gfp_t flags) |
3187 | { | 3276 | { |
3277 | #ifndef CONFIG_DEBUG_SLAB | ||
3188 | return __do_kmalloc(size, flags, NULL); | 3278 | return __do_kmalloc(size, flags, NULL); |
3279 | #else | ||
3280 | return __do_kmalloc(size, flags, __builtin_return_address(0)); | ||
3281 | #endif | ||
3189 | } | 3282 | } |
3190 | EXPORT_SYMBOL(__kmalloc); | 3283 | EXPORT_SYMBOL(__kmalloc); |
3191 | 3284 | ||
3192 | #else | 3285 | #ifdef CONFIG_DEBUG_SLAB |
3193 | |||
3194 | void *__kmalloc_track_caller(size_t size, gfp_t flags, void *caller) | 3286 | void *__kmalloc_track_caller(size_t size, gfp_t flags, void *caller) |
3195 | { | 3287 | { |
3196 | return __do_kmalloc(size, flags, caller); | 3288 | return __do_kmalloc(size, flags, caller); |
3197 | } | 3289 | } |
3198 | EXPORT_SYMBOL(__kmalloc_track_caller); | 3290 | EXPORT_SYMBOL(__kmalloc_track_caller); |
3199 | |||
3200 | #endif | 3291 | #endif |
3201 | 3292 | ||
3202 | #ifdef CONFIG_SMP | 3293 | #ifdef CONFIG_SMP |
@@ -3220,7 +3311,7 @@ void *__alloc_percpu(size_t size) | |||
3220 | * and we have no way of figuring out how to fix the array | 3311 | * and we have no way of figuring out how to fix the array |
3221 | * that we have allocated then.... | 3312 | * that we have allocated then.... |
3222 | */ | 3313 | */ |
3223 | for_each_cpu(i) { | 3314 | for_each_possible_cpu(i) { |
3224 | int node = cpu_to_node(i); | 3315 | int node = cpu_to_node(i); |
3225 | 3316 | ||
3226 | if (node_online(node)) | 3317 | if (node_online(node)) |
@@ -3236,7 +3327,7 @@ void *__alloc_percpu(size_t size) | |||
3236 | /* Catch derefs w/o wrappers */ | 3327 | /* Catch derefs w/o wrappers */ |
3237 | return (void *)(~(unsigned long)pdata); | 3328 | return (void *)(~(unsigned long)pdata); |
3238 | 3329 | ||
3239 | unwind_oom: | 3330 | unwind_oom: |
3240 | while (--i >= 0) { | 3331 | while (--i >= 0) { |
3241 | if (!cpu_possible(i)) | 3332 | if (!cpu_possible(i)) |
3242 | continue; | 3333 | continue; |
@@ -3307,7 +3398,7 @@ void free_percpu(const void *objp) | |||
3307 | /* | 3398 | /* |
3308 | * We allocate for all cpus so we cannot use for online cpu here. | 3399 | * We allocate for all cpus so we cannot use for online cpu here. |
3309 | */ | 3400 | */ |
3310 | for_each_cpu(i) | 3401 | for_each_possible_cpu(i) |
3311 | kfree(p->ptrs[i]); | 3402 | kfree(p->ptrs[i]); |
3312 | kfree(p); | 3403 | kfree(p); |
3313 | } | 3404 | } |
@@ -3327,61 +3418,86 @@ const char *kmem_cache_name(struct kmem_cache *cachep) | |||
3327 | EXPORT_SYMBOL_GPL(kmem_cache_name); | 3418 | EXPORT_SYMBOL_GPL(kmem_cache_name); |
3328 | 3419 | ||
3329 | /* | 3420 | /* |
3330 | * This initializes kmem_list3 for all nodes. | 3421 | * This initializes kmem_list3 or resizes varioius caches for all nodes. |
3331 | */ | 3422 | */ |
3332 | static int alloc_kmemlist(struct kmem_cache *cachep) | 3423 | static int alloc_kmemlist(struct kmem_cache *cachep) |
3333 | { | 3424 | { |
3334 | int node; | 3425 | int node; |
3335 | struct kmem_list3 *l3; | 3426 | struct kmem_list3 *l3; |
3336 | int err = 0; | 3427 | struct array_cache *new_shared; |
3428 | struct array_cache **new_alien; | ||
3337 | 3429 | ||
3338 | for_each_online_node(node) { | 3430 | for_each_online_node(node) { |
3339 | struct array_cache *nc = NULL, *new; | 3431 | |
3340 | struct array_cache **new_alien = NULL; | 3432 | new_alien = alloc_alien_cache(node, cachep->limit); |
3341 | #ifdef CONFIG_NUMA | 3433 | if (!new_alien) |
3342 | if (!(new_alien = alloc_alien_cache(node, cachep->limit))) | ||
3343 | goto fail; | 3434 | goto fail; |
3344 | #endif | 3435 | |
3345 | if (!(new = alloc_arraycache(node, (cachep->shared * | 3436 | new_shared = alloc_arraycache(node, |
3346 | cachep->batchcount), | 3437 | cachep->shared*cachep->batchcount, |
3347 | 0xbaadf00d))) | 3438 | 0xbaadf00d); |
3439 | if (!new_shared) { | ||
3440 | free_alien_cache(new_alien); | ||
3348 | goto fail; | 3441 | goto fail; |
3349 | if ((l3 = cachep->nodelists[node])) { | 3442 | } |
3443 | |||
3444 | l3 = cachep->nodelists[node]; | ||
3445 | if (l3) { | ||
3446 | struct array_cache *shared = l3->shared; | ||
3350 | 3447 | ||
3351 | spin_lock_irq(&l3->list_lock); | 3448 | spin_lock_irq(&l3->list_lock); |
3352 | 3449 | ||
3353 | if ((nc = cachep->nodelists[node]->shared)) | 3450 | if (shared) |
3354 | free_block(cachep, nc->entry, nc->avail, node); | 3451 | free_block(cachep, shared->entry, |
3452 | shared->avail, node); | ||
3355 | 3453 | ||
3356 | l3->shared = new; | 3454 | l3->shared = new_shared; |
3357 | if (!cachep->nodelists[node]->alien) { | 3455 | if (!l3->alien) { |
3358 | l3->alien = new_alien; | 3456 | l3->alien = new_alien; |
3359 | new_alien = NULL; | 3457 | new_alien = NULL; |
3360 | } | 3458 | } |
3361 | l3->free_limit = (1 + nr_cpus_node(node)) * | 3459 | l3->free_limit = (1 + nr_cpus_node(node)) * |
3362 | cachep->batchcount + cachep->num; | 3460 | cachep->batchcount + cachep->num; |
3363 | spin_unlock_irq(&l3->list_lock); | 3461 | spin_unlock_irq(&l3->list_lock); |
3364 | kfree(nc); | 3462 | kfree(shared); |
3365 | free_alien_cache(new_alien); | 3463 | free_alien_cache(new_alien); |
3366 | continue; | 3464 | continue; |
3367 | } | 3465 | } |
3368 | if (!(l3 = kmalloc_node(sizeof(struct kmem_list3), | 3466 | l3 = kmalloc_node(sizeof(struct kmem_list3), GFP_KERNEL, node); |
3369 | GFP_KERNEL, node))) | 3467 | if (!l3) { |
3468 | free_alien_cache(new_alien); | ||
3469 | kfree(new_shared); | ||
3370 | goto fail; | 3470 | goto fail; |
3471 | } | ||
3371 | 3472 | ||
3372 | kmem_list3_init(l3); | 3473 | kmem_list3_init(l3); |
3373 | l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + | 3474 | l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + |
3374 | ((unsigned long)cachep) % REAPTIMEOUT_LIST3; | 3475 | ((unsigned long)cachep) % REAPTIMEOUT_LIST3; |
3375 | l3->shared = new; | 3476 | l3->shared = new_shared; |
3376 | l3->alien = new_alien; | 3477 | l3->alien = new_alien; |
3377 | l3->free_limit = (1 + nr_cpus_node(node)) * | 3478 | l3->free_limit = (1 + nr_cpus_node(node)) * |
3378 | cachep->batchcount + cachep->num; | 3479 | cachep->batchcount + cachep->num; |
3379 | cachep->nodelists[node] = l3; | 3480 | cachep->nodelists[node] = l3; |
3380 | } | 3481 | } |
3381 | return err; | 3482 | return 0; |
3382 | fail: | 3483 | |
3383 | err = -ENOMEM; | 3484 | fail: |
3384 | return err; | 3485 | if (!cachep->next.next) { |
3486 | /* Cache is not active yet. Roll back what we did */ | ||
3487 | node--; | ||
3488 | while (node >= 0) { | ||
3489 | if (cachep->nodelists[node]) { | ||
3490 | l3 = cachep->nodelists[node]; | ||
3491 | |||
3492 | kfree(l3->shared); | ||
3493 | free_alien_cache(l3->alien); | ||
3494 | kfree(l3); | ||
3495 | cachep->nodelists[node] = NULL; | ||
3496 | } | ||
3497 | node--; | ||
3498 | } | ||
3499 | } | ||
3500 | return -ENOMEM; | ||
3385 | } | 3501 | } |
3386 | 3502 | ||
3387 | struct ccupdate_struct { | 3503 | struct ccupdate_struct { |
@@ -3391,7 +3507,7 @@ struct ccupdate_struct { | |||
3391 | 3507 | ||
3392 | static void do_ccupdate_local(void *info) | 3508 | static void do_ccupdate_local(void *info) |
3393 | { | 3509 | { |
3394 | struct ccupdate_struct *new = (struct ccupdate_struct *)info; | 3510 | struct ccupdate_struct *new = info; |
3395 | struct array_cache *old; | 3511 | struct array_cache *old; |
3396 | 3512 | ||
3397 | check_irq_off(); | 3513 | check_irq_off(); |
@@ -3401,16 +3517,17 @@ static void do_ccupdate_local(void *info) | |||
3401 | new->new[smp_processor_id()] = old; | 3517 | new->new[smp_processor_id()] = old; |
3402 | } | 3518 | } |
3403 | 3519 | ||
3404 | static int do_tune_cpucache(struct kmem_cache *cachep, int limit, int batchcount, | 3520 | /* Always called with the cache_chain_mutex held */ |
3405 | int shared) | 3521 | static int do_tune_cpucache(struct kmem_cache *cachep, int limit, |
3522 | int batchcount, int shared) | ||
3406 | { | 3523 | { |
3407 | struct ccupdate_struct new; | 3524 | struct ccupdate_struct new; |
3408 | int i, err; | 3525 | int i, err; |
3409 | 3526 | ||
3410 | memset(&new.new, 0, sizeof(new.new)); | 3527 | memset(&new.new, 0, sizeof(new.new)); |
3411 | for_each_online_cpu(i) { | 3528 | for_each_online_cpu(i) { |
3412 | new.new[i] = | 3529 | new.new[i] = alloc_arraycache(cpu_to_node(i), limit, |
3413 | alloc_arraycache(cpu_to_node(i), limit, batchcount); | 3530 | batchcount); |
3414 | if (!new.new[i]) { | 3531 | if (!new.new[i]) { |
3415 | for (i--; i >= 0; i--) | 3532 | for (i--; i >= 0; i--) |
3416 | kfree(new.new[i]); | 3533 | kfree(new.new[i]); |
@@ -3419,14 +3536,12 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, int batchcount | |||
3419 | } | 3536 | } |
3420 | new.cachep = cachep; | 3537 | new.cachep = cachep; |
3421 | 3538 | ||
3422 | smp_call_function_all_cpus(do_ccupdate_local, (void *)&new); | 3539 | on_each_cpu(do_ccupdate_local, (void *)&new, 1, 1); |
3423 | 3540 | ||
3424 | check_irq_on(); | 3541 | check_irq_on(); |
3425 | spin_lock(&cachep->spinlock); | ||
3426 | cachep->batchcount = batchcount; | 3542 | cachep->batchcount = batchcount; |
3427 | cachep->limit = limit; | 3543 | cachep->limit = limit; |
3428 | cachep->shared = shared; | 3544 | cachep->shared = shared; |
3429 | spin_unlock(&cachep->spinlock); | ||
3430 | 3545 | ||
3431 | for_each_online_cpu(i) { | 3546 | for_each_online_cpu(i) { |
3432 | struct array_cache *ccold = new.new[i]; | 3547 | struct array_cache *ccold = new.new[i]; |
@@ -3447,15 +3562,17 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, int batchcount | |||
3447 | return 0; | 3562 | return 0; |
3448 | } | 3563 | } |
3449 | 3564 | ||
3565 | /* Called with cache_chain_mutex held always */ | ||
3450 | static void enable_cpucache(struct kmem_cache *cachep) | 3566 | static void enable_cpucache(struct kmem_cache *cachep) |
3451 | { | 3567 | { |
3452 | int err; | 3568 | int err; |
3453 | int limit, shared; | 3569 | int limit, shared; |
3454 | 3570 | ||
3455 | /* The head array serves three purposes: | 3571 | /* |
3572 | * The head array serves three purposes: | ||
3456 | * - create a LIFO ordering, i.e. return objects that are cache-warm | 3573 | * - create a LIFO ordering, i.e. return objects that are cache-warm |
3457 | * - reduce the number of spinlock operations. | 3574 | * - reduce the number of spinlock operations. |
3458 | * - reduce the number of linked list operations on the slab and | 3575 | * - reduce the number of linked list operations on the slab and |
3459 | * bufctl chains: array operations are cheaper. | 3576 | * bufctl chains: array operations are cheaper. |
3460 | * The numbers are guessed, we should auto-tune as described by | 3577 | * The numbers are guessed, we should auto-tune as described by |
3461 | * Bonwick. | 3578 | * Bonwick. |
@@ -3471,7 +3588,8 @@ static void enable_cpucache(struct kmem_cache *cachep) | |||
3471 | else | 3588 | else |
3472 | limit = 120; | 3589 | limit = 120; |
3473 | 3590 | ||
3474 | /* Cpu bound tasks (e.g. network routing) can exhibit cpu bound | 3591 | /* |
3592 | * CPU bound tasks (e.g. network routing) can exhibit cpu bound | ||
3475 | * allocation behaviour: Most allocs on one cpu, most free operations | 3593 | * allocation behaviour: Most allocs on one cpu, most free operations |
3476 | * on another cpu. For these cases, an efficient object passing between | 3594 | * on another cpu. For these cases, an efficient object passing between |
3477 | * cpus is necessary. This is provided by a shared array. The array | 3595 | * cpus is necessary. This is provided by a shared array. The array |
@@ -3486,9 +3604,9 @@ static void enable_cpucache(struct kmem_cache *cachep) | |||
3486 | #endif | 3604 | #endif |
3487 | 3605 | ||
3488 | #if DEBUG | 3606 | #if DEBUG |
3489 | /* With debugging enabled, large batchcount lead to excessively | 3607 | /* |
3490 | * long periods with disabled local interrupts. Limit the | 3608 | * With debugging enabled, large batchcount lead to excessively long |
3491 | * batchcount | 3609 | * periods with disabled local interrupts. Limit the batchcount |
3492 | */ | 3610 | */ |
3493 | if (limit > 32) | 3611 | if (limit > 32) |
3494 | limit = 32; | 3612 | limit = 32; |
@@ -3499,23 +3617,32 @@ static void enable_cpucache(struct kmem_cache *cachep) | |||
3499 | cachep->name, -err); | 3617 | cachep->name, -err); |
3500 | } | 3618 | } |
3501 | 3619 | ||
3502 | static void drain_array_locked(struct kmem_cache *cachep, struct array_cache *ac, | 3620 | /* |
3503 | int force, int node) | 3621 | * Drain an array if it contains any elements taking the l3 lock only if |
3622 | * necessary. Note that the l3 listlock also protects the array_cache | ||
3623 | * if drain_array() is used on the shared array. | ||
3624 | */ | ||
3625 | void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, | ||
3626 | struct array_cache *ac, int force, int node) | ||
3504 | { | 3627 | { |
3505 | int tofree; | 3628 | int tofree; |
3506 | 3629 | ||
3507 | check_spinlock_acquired_node(cachep, node); | 3630 | if (!ac || !ac->avail) |
3631 | return; | ||
3508 | if (ac->touched && !force) { | 3632 | if (ac->touched && !force) { |
3509 | ac->touched = 0; | 3633 | ac->touched = 0; |
3510 | } else if (ac->avail) { | 3634 | } else { |
3511 | tofree = force ? ac->avail : (ac->limit + 4) / 5; | 3635 | spin_lock_irq(&l3->list_lock); |
3512 | if (tofree > ac->avail) { | 3636 | if (ac->avail) { |
3513 | tofree = (ac->avail + 1) / 2; | 3637 | tofree = force ? ac->avail : (ac->limit + 4) / 5; |
3638 | if (tofree > ac->avail) | ||
3639 | tofree = (ac->avail + 1) / 2; | ||
3640 | free_block(cachep, ac->entry, tofree, node); | ||
3641 | ac->avail -= tofree; | ||
3642 | memmove(ac->entry, &(ac->entry[tofree]), | ||
3643 | sizeof(void *) * ac->avail); | ||
3514 | } | 3644 | } |
3515 | free_block(cachep, ac->entry, tofree, node); | 3645 | spin_unlock_irq(&l3->list_lock); |
3516 | ac->avail -= tofree; | ||
3517 | memmove(ac->entry, &(ac->entry[tofree]), | ||
3518 | sizeof(void *) * ac->avail); | ||
3519 | } | 3646 | } |
3520 | } | 3647 | } |
3521 | 3648 | ||
@@ -3528,13 +3655,14 @@ static void drain_array_locked(struct kmem_cache *cachep, struct array_cache *ac | |||
3528 | * - clear the per-cpu caches for this CPU. | 3655 | * - clear the per-cpu caches for this CPU. |
3529 | * - return freeable pages to the main free memory pool. | 3656 | * - return freeable pages to the main free memory pool. |
3530 | * | 3657 | * |
3531 | * If we cannot acquire the cache chain mutex then just give up - we'll | 3658 | * If we cannot acquire the cache chain mutex then just give up - we'll try |
3532 | * try again on the next iteration. | 3659 | * again on the next iteration. |
3533 | */ | 3660 | */ |
3534 | static void cache_reap(void *unused) | 3661 | static void cache_reap(void *unused) |
3535 | { | 3662 | { |
3536 | struct list_head *walk; | 3663 | struct list_head *walk; |
3537 | struct kmem_list3 *l3; | 3664 | struct kmem_list3 *l3; |
3665 | int node = numa_node_id(); | ||
3538 | 3666 | ||
3539 | if (!mutex_trylock(&cache_chain_mutex)) { | 3667 | if (!mutex_trylock(&cache_chain_mutex)) { |
3540 | /* Give up. Setup the next iteration. */ | 3668 | /* Give up. Setup the next iteration. */ |
@@ -3550,65 +3678,72 @@ static void cache_reap(void *unused) | |||
3550 | struct slab *slabp; | 3678 | struct slab *slabp; |
3551 | 3679 | ||
3552 | searchp = list_entry(walk, struct kmem_cache, next); | 3680 | searchp = list_entry(walk, struct kmem_cache, next); |
3553 | |||
3554 | if (searchp->flags & SLAB_NO_REAP) | ||
3555 | goto next; | ||
3556 | |||
3557 | check_irq_on(); | 3681 | check_irq_on(); |
3558 | 3682 | ||
3559 | l3 = searchp->nodelists[numa_node_id()]; | 3683 | /* |
3684 | * We only take the l3 lock if absolutely necessary and we | ||
3685 | * have established with reasonable certainty that | ||
3686 | * we can do some work if the lock was obtained. | ||
3687 | */ | ||
3688 | l3 = searchp->nodelists[node]; | ||
3689 | |||
3560 | reap_alien(searchp, l3); | 3690 | reap_alien(searchp, l3); |
3561 | spin_lock_irq(&l3->list_lock); | ||
3562 | 3691 | ||
3563 | drain_array_locked(searchp, cpu_cache_get(searchp), 0, | 3692 | drain_array(searchp, l3, cpu_cache_get(searchp), 0, node); |
3564 | numa_node_id()); | ||
3565 | 3693 | ||
3694 | /* | ||
3695 | * These are racy checks but it does not matter | ||
3696 | * if we skip one check or scan twice. | ||
3697 | */ | ||
3566 | if (time_after(l3->next_reap, jiffies)) | 3698 | if (time_after(l3->next_reap, jiffies)) |
3567 | goto next_unlock; | 3699 | goto next; |
3568 | 3700 | ||
3569 | l3->next_reap = jiffies + REAPTIMEOUT_LIST3; | 3701 | l3->next_reap = jiffies + REAPTIMEOUT_LIST3; |
3570 | 3702 | ||
3571 | if (l3->shared) | 3703 | drain_array(searchp, l3, l3->shared, 0, node); |
3572 | drain_array_locked(searchp, l3->shared, 0, | ||
3573 | numa_node_id()); | ||
3574 | 3704 | ||
3575 | if (l3->free_touched) { | 3705 | if (l3->free_touched) { |
3576 | l3->free_touched = 0; | 3706 | l3->free_touched = 0; |
3577 | goto next_unlock; | 3707 | goto next; |
3578 | } | 3708 | } |
3579 | 3709 | ||
3580 | tofree = | 3710 | tofree = (l3->free_limit + 5 * searchp->num - 1) / |
3581 | (l3->free_limit + 5 * searchp->num - | 3711 | (5 * searchp->num); |
3582 | 1) / (5 * searchp->num); | ||
3583 | do { | 3712 | do { |
3713 | /* | ||
3714 | * Do not lock if there are no free blocks. | ||
3715 | */ | ||
3716 | if (list_empty(&l3->slabs_free)) | ||
3717 | break; | ||
3718 | |||
3719 | spin_lock_irq(&l3->list_lock); | ||
3584 | p = l3->slabs_free.next; | 3720 | p = l3->slabs_free.next; |
3585 | if (p == &(l3->slabs_free)) | 3721 | if (p == &(l3->slabs_free)) { |
3722 | spin_unlock_irq(&l3->list_lock); | ||
3586 | break; | 3723 | break; |
3724 | } | ||
3587 | 3725 | ||
3588 | slabp = list_entry(p, struct slab, list); | 3726 | slabp = list_entry(p, struct slab, list); |
3589 | BUG_ON(slabp->inuse); | 3727 | BUG_ON(slabp->inuse); |
3590 | list_del(&slabp->list); | 3728 | list_del(&slabp->list); |
3591 | STATS_INC_REAPED(searchp); | 3729 | STATS_INC_REAPED(searchp); |
3592 | 3730 | ||
3593 | /* Safe to drop the lock. The slab is no longer | 3731 | /* |
3594 | * linked to the cache. | 3732 | * Safe to drop the lock. The slab is no longer linked |
3595 | * searchp cannot disappear, we hold | 3733 | * to the cache. searchp cannot disappear, we hold |
3596 | * cache_chain_lock | 3734 | * cache_chain_lock |
3597 | */ | 3735 | */ |
3598 | l3->free_objects -= searchp->num; | 3736 | l3->free_objects -= searchp->num; |
3599 | spin_unlock_irq(&l3->list_lock); | 3737 | spin_unlock_irq(&l3->list_lock); |
3600 | slab_destroy(searchp, slabp); | 3738 | slab_destroy(searchp, slabp); |
3601 | spin_lock_irq(&l3->list_lock); | ||
3602 | } while (--tofree > 0); | 3739 | } while (--tofree > 0); |
3603 | next_unlock: | 3740 | next: |
3604 | spin_unlock_irq(&l3->list_lock); | ||
3605 | next: | ||
3606 | cond_resched(); | 3741 | cond_resched(); |
3607 | } | 3742 | } |
3608 | check_irq_on(); | 3743 | check_irq_on(); |
3609 | mutex_unlock(&cache_chain_mutex); | 3744 | mutex_unlock(&cache_chain_mutex); |
3610 | next_reap_node(); | 3745 | next_reap_node(); |
3611 | /* Setup the next iteration */ | 3746 | /* Set up the next iteration */ |
3612 | schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC); | 3747 | schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC); |
3613 | } | 3748 | } |
3614 | 3749 | ||
@@ -3658,8 +3793,8 @@ static void *s_next(struct seq_file *m, void *p, loff_t *pos) | |||
3658 | { | 3793 | { |
3659 | struct kmem_cache *cachep = p; | 3794 | struct kmem_cache *cachep = p; |
3660 | ++*pos; | 3795 | ++*pos; |
3661 | return cachep->next.next == &cache_chain ? NULL | 3796 | return cachep->next.next == &cache_chain ? |
3662 | : list_entry(cachep->next.next, struct kmem_cache, next); | 3797 | NULL : list_entry(cachep->next.next, struct kmem_cache, next); |
3663 | } | 3798 | } |
3664 | 3799 | ||
3665 | static void s_stop(struct seq_file *m, void *p) | 3800 | static void s_stop(struct seq_file *m, void *p) |
@@ -3681,7 +3816,6 @@ static int s_show(struct seq_file *m, void *p) | |||
3681 | int node; | 3816 | int node; |
3682 | struct kmem_list3 *l3; | 3817 | struct kmem_list3 *l3; |
3683 | 3818 | ||
3684 | spin_lock(&cachep->spinlock); | ||
3685 | active_objs = 0; | 3819 | active_objs = 0; |
3686 | num_slabs = 0; | 3820 | num_slabs = 0; |
3687 | for_each_online_node(node) { | 3821 | for_each_online_node(node) { |
@@ -3748,7 +3882,9 @@ static int s_show(struct seq_file *m, void *p) | |||
3748 | unsigned long node_frees = cachep->node_frees; | 3882 | unsigned long node_frees = cachep->node_frees; |
3749 | 3883 | ||
3750 | seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu \ | 3884 | seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu \ |
3751 | %4lu %4lu %4lu %4lu", allocs, high, grown, reaped, errors, max_freeable, node_allocs, node_frees); | 3885 | %4lu %4lu %4lu %4lu", allocs, high, grown, |
3886 | reaped, errors, max_freeable, node_allocs, | ||
3887 | node_frees); | ||
3752 | } | 3888 | } |
3753 | /* cpu stats */ | 3889 | /* cpu stats */ |
3754 | { | 3890 | { |
@@ -3762,7 +3898,6 @@ static int s_show(struct seq_file *m, void *p) | |||
3762 | } | 3898 | } |
3763 | #endif | 3899 | #endif |
3764 | seq_putc(m, '\n'); | 3900 | seq_putc(m, '\n'); |
3765 | spin_unlock(&cachep->spinlock); | ||
3766 | return 0; | 3901 | return 0; |
3767 | } | 3902 | } |
3768 | 3903 | ||
@@ -3820,13 +3955,12 @@ ssize_t slabinfo_write(struct file *file, const char __user * buffer, | |||
3820 | mutex_lock(&cache_chain_mutex); | 3955 | mutex_lock(&cache_chain_mutex); |
3821 | res = -EINVAL; | 3956 | res = -EINVAL; |
3822 | list_for_each(p, &cache_chain) { | 3957 | list_for_each(p, &cache_chain) { |
3823 | struct kmem_cache *cachep = list_entry(p, struct kmem_cache, | 3958 | struct kmem_cache *cachep; |
3824 | next); | ||
3825 | 3959 | ||
3960 | cachep = list_entry(p, struct kmem_cache, next); | ||
3826 | if (!strcmp(cachep->name, kbuf)) { | 3961 | if (!strcmp(cachep->name, kbuf)) { |
3827 | if (limit < 1 || | 3962 | if (limit < 1 || batchcount < 1 || |
3828 | batchcount < 1 || | 3963 | batchcount > limit || shared < 0) { |
3829 | batchcount > limit || shared < 0) { | ||
3830 | res = 0; | 3964 | res = 0; |
3831 | } else { | 3965 | } else { |
3832 | res = do_tune_cpucache(cachep, limit, | 3966 | res = do_tune_cpucache(cachep, limit, |
@@ -3840,6 +3974,159 @@ ssize_t slabinfo_write(struct file *file, const char __user * buffer, | |||
3840 | res = count; | 3974 | res = count; |
3841 | return res; | 3975 | return res; |
3842 | } | 3976 | } |
3977 | |||
3978 | #ifdef CONFIG_DEBUG_SLAB_LEAK | ||
3979 | |||
3980 | static void *leaks_start(struct seq_file *m, loff_t *pos) | ||
3981 | { | ||
3982 | loff_t n = *pos; | ||
3983 | struct list_head *p; | ||
3984 | |||
3985 | mutex_lock(&cache_chain_mutex); | ||
3986 | p = cache_chain.next; | ||
3987 | while (n--) { | ||
3988 | p = p->next; | ||
3989 | if (p == &cache_chain) | ||
3990 | return NULL; | ||
3991 | } | ||
3992 | return list_entry(p, struct kmem_cache, next); | ||
3993 | } | ||
3994 | |||
3995 | static inline int add_caller(unsigned long *n, unsigned long v) | ||
3996 | { | ||
3997 | unsigned long *p; | ||
3998 | int l; | ||
3999 | if (!v) | ||
4000 | return 1; | ||
4001 | l = n[1]; | ||
4002 | p = n + 2; | ||
4003 | while (l) { | ||
4004 | int i = l/2; | ||
4005 | unsigned long *q = p + 2 * i; | ||
4006 | if (*q == v) { | ||
4007 | q[1]++; | ||
4008 | return 1; | ||
4009 | } | ||
4010 | if (*q > v) { | ||
4011 | l = i; | ||
4012 | } else { | ||
4013 | p = q + 2; | ||
4014 | l -= i + 1; | ||
4015 | } | ||
4016 | } | ||
4017 | if (++n[1] == n[0]) | ||
4018 | return 0; | ||
4019 | memmove(p + 2, p, n[1] * 2 * sizeof(unsigned long) - ((void *)p - (void *)n)); | ||
4020 | p[0] = v; | ||
4021 | p[1] = 1; | ||
4022 | return 1; | ||
4023 | } | ||
4024 | |||
4025 | static void handle_slab(unsigned long *n, struct kmem_cache *c, struct slab *s) | ||
4026 | { | ||
4027 | void *p; | ||
4028 | int i; | ||
4029 | if (n[0] == n[1]) | ||
4030 | return; | ||
4031 | for (i = 0, p = s->s_mem; i < c->num; i++, p += c->buffer_size) { | ||
4032 | if (slab_bufctl(s)[i] != BUFCTL_ACTIVE) | ||
4033 | continue; | ||
4034 | if (!add_caller(n, (unsigned long)*dbg_userword(c, p))) | ||
4035 | return; | ||
4036 | } | ||
4037 | } | ||
4038 | |||
4039 | static void show_symbol(struct seq_file *m, unsigned long address) | ||
4040 | { | ||
4041 | #ifdef CONFIG_KALLSYMS | ||
4042 | char *modname; | ||
4043 | const char *name; | ||
4044 | unsigned long offset, size; | ||
4045 | char namebuf[KSYM_NAME_LEN+1]; | ||
4046 | |||
4047 | name = kallsyms_lookup(address, &size, &offset, &modname, namebuf); | ||
4048 | |||
4049 | if (name) { | ||
4050 | seq_printf(m, "%s+%#lx/%#lx", name, offset, size); | ||
4051 | if (modname) | ||
4052 | seq_printf(m, " [%s]", modname); | ||
4053 | return; | ||
4054 | } | ||
4055 | #endif | ||
4056 | seq_printf(m, "%p", (void *)address); | ||
4057 | } | ||
4058 | |||
4059 | static int leaks_show(struct seq_file *m, void *p) | ||
4060 | { | ||
4061 | struct kmem_cache *cachep = p; | ||
4062 | struct list_head *q; | ||
4063 | struct slab *slabp; | ||
4064 | struct kmem_list3 *l3; | ||
4065 | const char *name; | ||
4066 | unsigned long *n = m->private; | ||
4067 | int node; | ||
4068 | int i; | ||
4069 | |||
4070 | if (!(cachep->flags & SLAB_STORE_USER)) | ||
4071 | return 0; | ||
4072 | if (!(cachep->flags & SLAB_RED_ZONE)) | ||
4073 | return 0; | ||
4074 | |||
4075 | /* OK, we can do it */ | ||
4076 | |||
4077 | n[1] = 0; | ||
4078 | |||
4079 | for_each_online_node(node) { | ||
4080 | l3 = cachep->nodelists[node]; | ||
4081 | if (!l3) | ||
4082 | continue; | ||
4083 | |||
4084 | check_irq_on(); | ||
4085 | spin_lock_irq(&l3->list_lock); | ||
4086 | |||
4087 | list_for_each(q, &l3->slabs_full) { | ||
4088 | slabp = list_entry(q, struct slab, list); | ||
4089 | handle_slab(n, cachep, slabp); | ||
4090 | } | ||
4091 | list_for_each(q, &l3->slabs_partial) { | ||
4092 | slabp = list_entry(q, struct slab, list); | ||
4093 | handle_slab(n, cachep, slabp); | ||
4094 | } | ||
4095 | spin_unlock_irq(&l3->list_lock); | ||
4096 | } | ||
4097 | name = cachep->name; | ||
4098 | if (n[0] == n[1]) { | ||
4099 | /* Increase the buffer size */ | ||
4100 | mutex_unlock(&cache_chain_mutex); | ||
4101 | m->private = kzalloc(n[0] * 4 * sizeof(unsigned long), GFP_KERNEL); | ||
4102 | if (!m->private) { | ||
4103 | /* Too bad, we are really out */ | ||
4104 | m->private = n; | ||
4105 | mutex_lock(&cache_chain_mutex); | ||
4106 | return -ENOMEM; | ||
4107 | } | ||
4108 | *(unsigned long *)m->private = n[0] * 2; | ||
4109 | kfree(n); | ||
4110 | mutex_lock(&cache_chain_mutex); | ||
4111 | /* Now make sure this entry will be retried */ | ||
4112 | m->count = m->size; | ||
4113 | return 0; | ||
4114 | } | ||
4115 | for (i = 0; i < n[1]; i++) { | ||
4116 | seq_printf(m, "%s: %lu ", name, n[2*i+3]); | ||
4117 | show_symbol(m, n[2*i+2]); | ||
4118 | seq_putc(m, '\n'); | ||
4119 | } | ||
4120 | return 0; | ||
4121 | } | ||
4122 | |||
4123 | struct seq_operations slabstats_op = { | ||
4124 | .start = leaks_start, | ||
4125 | .next = s_next, | ||
4126 | .stop = s_stop, | ||
4127 | .show = leaks_show, | ||
4128 | }; | ||
4129 | #endif | ||
3843 | #endif | 4130 | #endif |
3844 | 4131 | ||
3845 | /** | 4132 | /** |