diff options
Diffstat (limited to 'mm/slab.c')
| -rw-r--r-- | mm/slab.c | 890 |
1 files changed, 463 insertions, 427 deletions
| @@ -50,7 +50,7 @@ | |||
| 50 | * The head array is strictly LIFO and should improve the cache hit rates. | 50 | * The head array is strictly LIFO and should improve the cache hit rates. |
| 51 | * On SMP, it additionally reduces the spinlock operations. | 51 | * On SMP, it additionally reduces the spinlock operations. |
| 52 | * | 52 | * |
| 53 | * The c_cpuarray may not be read with enabled local interrupts - | 53 | * The c_cpuarray may not be read with enabled local interrupts - |
| 54 | * it's changed with a smp_call_function(). | 54 | * it's changed with a smp_call_function(). |
| 55 | * | 55 | * |
| 56 | * SMP synchronization: | 56 | * SMP synchronization: |
| @@ -170,12 +170,12 @@ | |||
| 170 | #if DEBUG | 170 | #if DEBUG |
| 171 | # define CREATE_MASK (SLAB_DEBUG_INITIAL | SLAB_RED_ZONE | \ | 171 | # define CREATE_MASK (SLAB_DEBUG_INITIAL | SLAB_RED_ZONE | \ |
| 172 | SLAB_POISON | SLAB_HWCACHE_ALIGN | \ | 172 | SLAB_POISON | SLAB_HWCACHE_ALIGN | \ |
| 173 | SLAB_NO_REAP | SLAB_CACHE_DMA | \ | 173 | SLAB_CACHE_DMA | \ |
| 174 | SLAB_MUST_HWCACHE_ALIGN | SLAB_STORE_USER | \ | 174 | SLAB_MUST_HWCACHE_ALIGN | SLAB_STORE_USER | \ |
| 175 | SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ | 175 | SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ |
| 176 | SLAB_DESTROY_BY_RCU) | 176 | SLAB_DESTROY_BY_RCU) |
| 177 | #else | 177 | #else |
| 178 | # define CREATE_MASK (SLAB_HWCACHE_ALIGN | SLAB_NO_REAP | \ | 178 | # define CREATE_MASK (SLAB_HWCACHE_ALIGN | \ |
| 179 | SLAB_CACHE_DMA | SLAB_MUST_HWCACHE_ALIGN | \ | 179 | SLAB_CACHE_DMA | SLAB_MUST_HWCACHE_ALIGN | \ |
| 180 | SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ | 180 | SLAB_RECLAIM_ACCOUNT | SLAB_PANIC | \ |
| 181 | SLAB_DESTROY_BY_RCU) | 181 | SLAB_DESTROY_BY_RCU) |
| @@ -266,16 +266,17 @@ struct array_cache { | |||
| 266 | unsigned int batchcount; | 266 | unsigned int batchcount; |
| 267 | unsigned int touched; | 267 | unsigned int touched; |
| 268 | spinlock_t lock; | 268 | spinlock_t lock; |
| 269 | void *entry[0]; /* | 269 | void *entry[0]; /* |
| 270 | * Must have this definition in here for the proper | 270 | * Must have this definition in here for the proper |
| 271 | * alignment of array_cache. Also simplifies accessing | 271 | * alignment of array_cache. Also simplifies accessing |
| 272 | * the entries. | 272 | * the entries. |
| 273 | * [0] is for gcc 2.95. It should really be []. | 273 | * [0] is for gcc 2.95. It should really be []. |
| 274 | */ | 274 | */ |
| 275 | }; | 275 | }; |
| 276 | 276 | ||
| 277 | /* bootstrap: The caches do not work without cpuarrays anymore, | 277 | /* |
| 278 | * but the cpuarrays are allocated from the generic caches... | 278 | * bootstrap: The caches do not work without cpuarrays anymore, but the |
| 279 | * cpuarrays are allocated from the generic caches... | ||
| 279 | */ | 280 | */ |
| 280 | #define BOOT_CPUCACHE_ENTRIES 1 | 281 | #define BOOT_CPUCACHE_ENTRIES 1 |
| 281 | struct arraycache_init { | 282 | struct arraycache_init { |
| @@ -291,13 +292,13 @@ struct kmem_list3 { | |||
| 291 | struct list_head slabs_full; | 292 | struct list_head slabs_full; |
| 292 | struct list_head slabs_free; | 293 | struct list_head slabs_free; |
| 293 | unsigned long free_objects; | 294 | unsigned long free_objects; |
| 294 | unsigned long next_reap; | ||
| 295 | int free_touched; | ||
| 296 | unsigned int free_limit; | 295 | unsigned int free_limit; |
| 297 | unsigned int colour_next; /* Per-node cache coloring */ | 296 | unsigned int colour_next; /* Per-node cache coloring */ |
| 298 | spinlock_t list_lock; | 297 | spinlock_t list_lock; |
| 299 | struct array_cache *shared; /* shared per node */ | 298 | struct array_cache *shared; /* shared per node */ |
| 300 | struct array_cache **alien; /* on other nodes */ | 299 | struct array_cache **alien; /* on other nodes */ |
| 300 | unsigned long next_reap; /* updated without locking */ | ||
| 301 | int free_touched; /* updated without locking */ | ||
| 301 | }; | 302 | }; |
| 302 | 303 | ||
| 303 | /* | 304 | /* |
| @@ -310,10 +311,8 @@ struct kmem_list3 __initdata initkmem_list3[NUM_INIT_LISTS]; | |||
| 310 | #define SIZE_L3 (1 + MAX_NUMNODES) | 311 | #define SIZE_L3 (1 + MAX_NUMNODES) |
| 311 | 312 | ||
| 312 | /* | 313 | /* |
| 313 | * This function must be completely optimized away if | 314 | * This function must be completely optimized away if a constant is passed to |
| 314 | * a constant is passed to it. Mostly the same as | 315 | * it. Mostly the same as what is in linux/slab.h except it returns an index. |
| 315 | * what is in linux/slab.h except it returns an | ||
| 316 | * index. | ||
| 317 | */ | 316 | */ |
| 318 | static __always_inline int index_of(const size_t size) | 317 | static __always_inline int index_of(const size_t size) |
| 319 | { | 318 | { |
| @@ -351,14 +350,14 @@ static void kmem_list3_init(struct kmem_list3 *parent) | |||
| 351 | parent->free_touched = 0; | 350 | parent->free_touched = 0; |
| 352 | } | 351 | } |
| 353 | 352 | ||
| 354 | #define MAKE_LIST(cachep, listp, slab, nodeid) \ | 353 | #define MAKE_LIST(cachep, listp, slab, nodeid) \ |
| 355 | do { \ | 354 | do { \ |
| 356 | INIT_LIST_HEAD(listp); \ | 355 | INIT_LIST_HEAD(listp); \ |
| 357 | list_splice(&(cachep->nodelists[nodeid]->slab), listp); \ | 356 | list_splice(&(cachep->nodelists[nodeid]->slab), listp); \ |
| 358 | } while (0) | 357 | } while (0) |
| 359 | 358 | ||
| 360 | #define MAKE_ALL_LISTS(cachep, ptr, nodeid) \ | 359 | #define MAKE_ALL_LISTS(cachep, ptr, nodeid) \ |
| 361 | do { \ | 360 | do { \ |
| 362 | MAKE_LIST((cachep), (&(ptr)->slabs_full), slabs_full, nodeid); \ | 361 | MAKE_LIST((cachep), (&(ptr)->slabs_full), slabs_full, nodeid); \ |
| 363 | MAKE_LIST((cachep), (&(ptr)->slabs_partial), slabs_partial, nodeid); \ | 362 | MAKE_LIST((cachep), (&(ptr)->slabs_partial), slabs_partial, nodeid); \ |
| 364 | MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \ | 363 | MAKE_LIST((cachep), (&(ptr)->slabs_free), slabs_free, nodeid); \ |
| @@ -373,28 +372,30 @@ static void kmem_list3_init(struct kmem_list3 *parent) | |||
| 373 | struct kmem_cache { | 372 | struct kmem_cache { |
| 374 | /* 1) per-cpu data, touched during every alloc/free */ | 373 | /* 1) per-cpu data, touched during every alloc/free */ |
| 375 | struct array_cache *array[NR_CPUS]; | 374 | struct array_cache *array[NR_CPUS]; |
| 375 | /* 2) Cache tunables. Protected by cache_chain_mutex */ | ||
| 376 | unsigned int batchcount; | 376 | unsigned int batchcount; |
| 377 | unsigned int limit; | 377 | unsigned int limit; |
| 378 | unsigned int shared; | 378 | unsigned int shared; |
| 379 | |||
| 379 | unsigned int buffer_size; | 380 | unsigned int buffer_size; |
| 380 | /* 2) touched by every alloc & free from the backend */ | 381 | /* 3) touched by every alloc & free from the backend */ |
| 381 | struct kmem_list3 *nodelists[MAX_NUMNODES]; | 382 | struct kmem_list3 *nodelists[MAX_NUMNODES]; |
| 382 | unsigned int flags; /* constant flags */ | ||
| 383 | unsigned int num; /* # of objs per slab */ | ||
| 384 | spinlock_t spinlock; | ||
| 385 | 383 | ||
| 386 | /* 3) cache_grow/shrink */ | 384 | unsigned int flags; /* constant flags */ |
| 385 | unsigned int num; /* # of objs per slab */ | ||
| 386 | |||
| 387 | /* 4) cache_grow/shrink */ | ||
| 387 | /* order of pgs per slab (2^n) */ | 388 | /* order of pgs per slab (2^n) */ |
| 388 | unsigned int gfporder; | 389 | unsigned int gfporder; |
| 389 | 390 | ||
| 390 | /* force GFP flags, e.g. GFP_DMA */ | 391 | /* force GFP flags, e.g. GFP_DMA */ |
| 391 | gfp_t gfpflags; | 392 | gfp_t gfpflags; |
| 392 | 393 | ||
| 393 | size_t colour; /* cache colouring range */ | 394 | size_t colour; /* cache colouring range */ |
| 394 | unsigned int colour_off; /* colour offset */ | 395 | unsigned int colour_off; /* colour offset */ |
| 395 | struct kmem_cache *slabp_cache; | 396 | struct kmem_cache *slabp_cache; |
| 396 | unsigned int slab_size; | 397 | unsigned int slab_size; |
| 397 | unsigned int dflags; /* dynamic flags */ | 398 | unsigned int dflags; /* dynamic flags */ |
| 398 | 399 | ||
| 399 | /* constructor func */ | 400 | /* constructor func */ |
| 400 | void (*ctor) (void *, struct kmem_cache *, unsigned long); | 401 | void (*ctor) (void *, struct kmem_cache *, unsigned long); |
| @@ -402,11 +403,11 @@ struct kmem_cache { | |||
| 402 | /* de-constructor func */ | 403 | /* de-constructor func */ |
| 403 | void (*dtor) (void *, struct kmem_cache *, unsigned long); | 404 | void (*dtor) (void *, struct kmem_cache *, unsigned long); |
| 404 | 405 | ||
| 405 | /* 4) cache creation/removal */ | 406 | /* 5) cache creation/removal */ |
| 406 | const char *name; | 407 | const char *name; |
| 407 | struct list_head next; | 408 | struct list_head next; |
| 408 | 409 | ||
| 409 | /* 5) statistics */ | 410 | /* 6) statistics */ |
| 410 | #if STATS | 411 | #if STATS |
| 411 | unsigned long num_active; | 412 | unsigned long num_active; |
| 412 | unsigned long num_allocations; | 413 | unsigned long num_allocations; |
| @@ -438,8 +439,9 @@ struct kmem_cache { | |||
| 438 | #define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB) | 439 | #define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB) |
| 439 | 440 | ||
| 440 | #define BATCHREFILL_LIMIT 16 | 441 | #define BATCHREFILL_LIMIT 16 |
| 441 | /* Optimization question: fewer reaps means less | 442 | /* |
| 442 | * probability for unnessary cpucache drain/refill cycles. | 443 | * Optimization question: fewer reaps means less probability for unnessary |
| 444 | * cpucache drain/refill cycles. | ||
| 443 | * | 445 | * |
| 444 | * OTOH the cpuarrays can contain lots of objects, | 446 | * OTOH the cpuarrays can contain lots of objects, |
| 445 | * which could lock up otherwise freeable slabs. | 447 | * which could lock up otherwise freeable slabs. |
| @@ -453,17 +455,19 @@ struct kmem_cache { | |||
| 453 | #define STATS_INC_ALLOCED(x) ((x)->num_allocations++) | 455 | #define STATS_INC_ALLOCED(x) ((x)->num_allocations++) |
| 454 | #define STATS_INC_GROWN(x) ((x)->grown++) | 456 | #define STATS_INC_GROWN(x) ((x)->grown++) |
| 455 | #define STATS_INC_REAPED(x) ((x)->reaped++) | 457 | #define STATS_INC_REAPED(x) ((x)->reaped++) |
| 456 | #define STATS_SET_HIGH(x) do { if ((x)->num_active > (x)->high_mark) \ | 458 | #define STATS_SET_HIGH(x) \ |
| 457 | (x)->high_mark = (x)->num_active; \ | 459 | do { \ |
| 458 | } while (0) | 460 | if ((x)->num_active > (x)->high_mark) \ |
| 461 | (x)->high_mark = (x)->num_active; \ | ||
| 462 | } while (0) | ||
| 459 | #define STATS_INC_ERR(x) ((x)->errors++) | 463 | #define STATS_INC_ERR(x) ((x)->errors++) |
| 460 | #define STATS_INC_NODEALLOCS(x) ((x)->node_allocs++) | 464 | #define STATS_INC_NODEALLOCS(x) ((x)->node_allocs++) |
| 461 | #define STATS_INC_NODEFREES(x) ((x)->node_frees++) | 465 | #define STATS_INC_NODEFREES(x) ((x)->node_frees++) |
| 462 | #define STATS_SET_FREEABLE(x, i) \ | 466 | #define STATS_SET_FREEABLE(x, i) \ |
| 463 | do { if ((x)->max_freeable < i) \ | 467 | do { \ |
| 464 | (x)->max_freeable = i; \ | 468 | if ((x)->max_freeable < i) \ |
| 465 | } while (0) | 469 | (x)->max_freeable = i; \ |
| 466 | 470 | } while (0) | |
| 467 | #define STATS_INC_ALLOCHIT(x) atomic_inc(&(x)->allochit) | 471 | #define STATS_INC_ALLOCHIT(x) atomic_inc(&(x)->allochit) |
| 468 | #define STATS_INC_ALLOCMISS(x) atomic_inc(&(x)->allocmiss) | 472 | #define STATS_INC_ALLOCMISS(x) atomic_inc(&(x)->allocmiss) |
| 469 | #define STATS_INC_FREEHIT(x) atomic_inc(&(x)->freehit) | 473 | #define STATS_INC_FREEHIT(x) atomic_inc(&(x)->freehit) |
| @@ -478,9 +482,7 @@ struct kmem_cache { | |||
| 478 | #define STATS_INC_ERR(x) do { } while (0) | 482 | #define STATS_INC_ERR(x) do { } while (0) |
| 479 | #define STATS_INC_NODEALLOCS(x) do { } while (0) | 483 | #define STATS_INC_NODEALLOCS(x) do { } while (0) |
| 480 | #define STATS_INC_NODEFREES(x) do { } while (0) | 484 | #define STATS_INC_NODEFREES(x) do { } while (0) |
| 481 | #define STATS_SET_FREEABLE(x, i) \ | 485 | #define STATS_SET_FREEABLE(x, i) do { } while (0) |
| 482 | do { } while (0) | ||
| 483 | |||
| 484 | #define STATS_INC_ALLOCHIT(x) do { } while (0) | 486 | #define STATS_INC_ALLOCHIT(x) do { } while (0) |
| 485 | #define STATS_INC_ALLOCMISS(x) do { } while (0) | 487 | #define STATS_INC_ALLOCMISS(x) do { } while (0) |
| 486 | #define STATS_INC_FREEHIT(x) do { } while (0) | 488 | #define STATS_INC_FREEHIT(x) do { } while (0) |
| @@ -488,7 +490,8 @@ struct kmem_cache { | |||
| 488 | #endif | 490 | #endif |
| 489 | 491 | ||
| 490 | #if DEBUG | 492 | #if DEBUG |
| 491 | /* Magic nums for obj red zoning. | 493 | /* |
| 494 | * Magic nums for obj red zoning. | ||
| 492 | * Placed in the first word before and the first word after an obj. | 495 | * Placed in the first word before and the first word after an obj. |
| 493 | */ | 496 | */ |
| 494 | #define RED_INACTIVE 0x5A2CF071UL /* when obj is inactive */ | 497 | #define RED_INACTIVE 0x5A2CF071UL /* when obj is inactive */ |
| @@ -499,7 +502,8 @@ struct kmem_cache { | |||
| 499 | #define POISON_FREE 0x6b /* for use-after-free poisoning */ | 502 | #define POISON_FREE 0x6b /* for use-after-free poisoning */ |
| 500 | #define POISON_END 0xa5 /* end-byte of poisoning */ | 503 | #define POISON_END 0xa5 /* end-byte of poisoning */ |
| 501 | 504 | ||
| 502 | /* memory layout of objects: | 505 | /* |
| 506 | * memory layout of objects: | ||
| 503 | * 0 : objp | 507 | * 0 : objp |
| 504 | * 0 .. cachep->obj_offset - BYTES_PER_WORD - 1: padding. This ensures that | 508 | * 0 .. cachep->obj_offset - BYTES_PER_WORD - 1: padding. This ensures that |
| 505 | * the end of an object is aligned with the end of the real | 509 | * the end of an object is aligned with the end of the real |
| @@ -508,7 +512,8 @@ struct kmem_cache { | |||
| 508 | * redzone word. | 512 | * redzone word. |
| 509 | * cachep->obj_offset: The real object. | 513 | * cachep->obj_offset: The real object. |
| 510 | * cachep->buffer_size - 2* BYTES_PER_WORD: redzone word [BYTES_PER_WORD long] | 514 | * cachep->buffer_size - 2* BYTES_PER_WORD: redzone word [BYTES_PER_WORD long] |
| 511 | * cachep->buffer_size - 1* BYTES_PER_WORD: last caller address [BYTES_PER_WORD long] | 515 | * cachep->buffer_size - 1* BYTES_PER_WORD: last caller address |
| 516 | * [BYTES_PER_WORD long] | ||
| 512 | */ | 517 | */ |
| 513 | static int obj_offset(struct kmem_cache *cachep) | 518 | static int obj_offset(struct kmem_cache *cachep) |
| 514 | { | 519 | { |
| @@ -552,8 +557,8 @@ static void **dbg_userword(struct kmem_cache *cachep, void *objp) | |||
| 552 | #endif | 557 | #endif |
| 553 | 558 | ||
| 554 | /* | 559 | /* |
| 555 | * Maximum size of an obj (in 2^order pages) | 560 | * Maximum size of an obj (in 2^order pages) and absolute limit for the gfp |
| 556 | * and absolute limit for the gfp order. | 561 | * order. |
| 557 | */ | 562 | */ |
| 558 | #if defined(CONFIG_LARGE_ALLOCS) | 563 | #if defined(CONFIG_LARGE_ALLOCS) |
| 559 | #define MAX_OBJ_ORDER 13 /* up to 32Mb */ | 564 | #define MAX_OBJ_ORDER 13 /* up to 32Mb */ |
| @@ -573,9 +578,10 @@ static void **dbg_userword(struct kmem_cache *cachep, void *objp) | |||
| 573 | #define BREAK_GFP_ORDER_LO 0 | 578 | #define BREAK_GFP_ORDER_LO 0 |
| 574 | static int slab_break_gfp_order = BREAK_GFP_ORDER_LO; | 579 | static int slab_break_gfp_order = BREAK_GFP_ORDER_LO; |
| 575 | 580 | ||
| 576 | /* Functions for storing/retrieving the cachep and or slab from the | 581 | /* |
| 577 | * global 'mem_map'. These are used to find the slab an obj belongs to. | 582 | * Functions for storing/retrieving the cachep and or slab from the page |
| 578 | * With kfree(), these are used to find the cache which an obj belongs to. | 583 | * allocator. These are used to find the slab an obj belongs to. With kfree(), |
| 584 | * these are used to find the cache which an obj belongs to. | ||
| 579 | */ | 585 | */ |
| 580 | static inline void page_set_cache(struct page *page, struct kmem_cache *cache) | 586 | static inline void page_set_cache(struct page *page, struct kmem_cache *cache) |
| 581 | { | 587 | { |
| @@ -584,6 +590,8 @@ static inline void page_set_cache(struct page *page, struct kmem_cache *cache) | |||
| 584 | 590 | ||
| 585 | static inline struct kmem_cache *page_get_cache(struct page *page) | 591 | static inline struct kmem_cache *page_get_cache(struct page *page) |
| 586 | { | 592 | { |
| 593 | if (unlikely(PageCompound(page))) | ||
| 594 | page = (struct page *)page_private(page); | ||
| 587 | return (struct kmem_cache *)page->lru.next; | 595 | return (struct kmem_cache *)page->lru.next; |
| 588 | } | 596 | } |
| 589 | 597 | ||
| @@ -594,6 +602,8 @@ static inline void page_set_slab(struct page *page, struct slab *slab) | |||
| 594 | 602 | ||
| 595 | static inline struct slab *page_get_slab(struct page *page) | 603 | static inline struct slab *page_get_slab(struct page *page) |
| 596 | { | 604 | { |
| 605 | if (unlikely(PageCompound(page))) | ||
| 606 | page = (struct page *)page_private(page); | ||
| 597 | return (struct slab *)page->lru.prev; | 607 | return (struct slab *)page->lru.prev; |
| 598 | } | 608 | } |
| 599 | 609 | ||
| @@ -609,7 +619,21 @@ static inline struct slab *virt_to_slab(const void *obj) | |||
| 609 | return page_get_slab(page); | 619 | return page_get_slab(page); |
| 610 | } | 620 | } |
| 611 | 621 | ||
| 612 | /* These are the default caches for kmalloc. Custom caches can have other sizes. */ | 622 | static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab, |
| 623 | unsigned int idx) | ||
| 624 | { | ||
| 625 | return slab->s_mem + cache->buffer_size * idx; | ||
| 626 | } | ||
| 627 | |||
| 628 | static inline unsigned int obj_to_index(struct kmem_cache *cache, | ||
| 629 | struct slab *slab, void *obj) | ||
| 630 | { | ||
| 631 | return (unsigned)(obj - slab->s_mem) / cache->buffer_size; | ||
| 632 | } | ||
| 633 | |||
| 634 | /* | ||
| 635 | * These are the default caches for kmalloc. Custom caches can have other sizes. | ||
| 636 | */ | ||
| 613 | struct cache_sizes malloc_sizes[] = { | 637 | struct cache_sizes malloc_sizes[] = { |
| 614 | #define CACHE(x) { .cs_size = (x) }, | 638 | #define CACHE(x) { .cs_size = (x) }, |
| 615 | #include <linux/kmalloc_sizes.h> | 639 | #include <linux/kmalloc_sizes.h> |
| @@ -642,8 +666,6 @@ static struct kmem_cache cache_cache = { | |||
| 642 | .limit = BOOT_CPUCACHE_ENTRIES, | 666 | .limit = BOOT_CPUCACHE_ENTRIES, |
| 643 | .shared = 1, | 667 | .shared = 1, |
| 644 | .buffer_size = sizeof(struct kmem_cache), | 668 | .buffer_size = sizeof(struct kmem_cache), |
| 645 | .flags = SLAB_NO_REAP, | ||
| 646 | .spinlock = SPIN_LOCK_UNLOCKED, | ||
| 647 | .name = "kmem_cache", | 669 | .name = "kmem_cache", |
| 648 | #if DEBUG | 670 | #if DEBUG |
| 649 | .obj_size = sizeof(struct kmem_cache), | 671 | .obj_size = sizeof(struct kmem_cache), |
| @@ -655,8 +677,8 @@ static DEFINE_MUTEX(cache_chain_mutex); | |||
| 655 | static struct list_head cache_chain; | 677 | static struct list_head cache_chain; |
| 656 | 678 | ||
| 657 | /* | 679 | /* |
| 658 | * vm_enough_memory() looks at this to determine how many | 680 | * vm_enough_memory() looks at this to determine how many slab-allocated pages |
| 659 | * slab-allocated pages are possibly freeable under pressure | 681 | * are possibly freeable under pressure |
| 660 | * | 682 | * |
| 661 | * SLAB_RECLAIM_ACCOUNT turns this on per-slab | 683 | * SLAB_RECLAIM_ACCOUNT turns this on per-slab |
| 662 | */ | 684 | */ |
| @@ -675,7 +697,8 @@ static enum { | |||
| 675 | 697 | ||
| 676 | static DEFINE_PER_CPU(struct work_struct, reap_work); | 698 | static DEFINE_PER_CPU(struct work_struct, reap_work); |
| 677 | 699 | ||
| 678 | static void free_block(struct kmem_cache *cachep, void **objpp, int len, int node); | 700 | static void free_block(struct kmem_cache *cachep, void **objpp, int len, |
| 701 | int node); | ||
| 679 | static void enable_cpucache(struct kmem_cache *cachep); | 702 | static void enable_cpucache(struct kmem_cache *cachep); |
| 680 | static void cache_reap(void *unused); | 703 | static void cache_reap(void *unused); |
| 681 | static int __node_shrink(struct kmem_cache *cachep, int node); | 704 | static int __node_shrink(struct kmem_cache *cachep, int node); |
| @@ -685,7 +708,8 @@ static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) | |||
| 685 | return cachep->array[smp_processor_id()]; | 708 | return cachep->array[smp_processor_id()]; |
| 686 | } | 709 | } |
| 687 | 710 | ||
| 688 | static inline struct kmem_cache *__find_general_cachep(size_t size, gfp_t gfpflags) | 711 | static inline struct kmem_cache *__find_general_cachep(size_t size, |
| 712 | gfp_t gfpflags) | ||
| 689 | { | 713 | { |
| 690 | struct cache_sizes *csizep = malloc_sizes; | 714 | struct cache_sizes *csizep = malloc_sizes; |
| 691 | 715 | ||
| @@ -720,8 +744,9 @@ static size_t slab_mgmt_size(size_t nr_objs, size_t align) | |||
| 720 | return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align); | 744 | return ALIGN(sizeof(struct slab)+nr_objs*sizeof(kmem_bufctl_t), align); |
| 721 | } | 745 | } |
| 722 | 746 | ||
| 723 | /* Calculate the number of objects and left-over bytes for a given | 747 | /* |
| 724 | buffer size. */ | 748 | * Calculate the number of objects and left-over bytes for a given buffer size. |
| 749 | */ | ||
| 725 | static void cache_estimate(unsigned long gfporder, size_t buffer_size, | 750 | static void cache_estimate(unsigned long gfporder, size_t buffer_size, |
| 726 | size_t align, int flags, size_t *left_over, | 751 | size_t align, int flags, size_t *left_over, |
| 727 | unsigned int *num) | 752 | unsigned int *num) |
| @@ -782,7 +807,8 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size, | |||
| 782 | 807 | ||
| 783 | #define slab_error(cachep, msg) __slab_error(__FUNCTION__, cachep, msg) | 808 | #define slab_error(cachep, msg) __slab_error(__FUNCTION__, cachep, msg) |
| 784 | 809 | ||
| 785 | static void __slab_error(const char *function, struct kmem_cache *cachep, char *msg) | 810 | static void __slab_error(const char *function, struct kmem_cache *cachep, |
| 811 | char *msg) | ||
| 786 | { | 812 | { |
| 787 | printk(KERN_ERR "slab error in %s(): cache `%s': %s\n", | 813 | printk(KERN_ERR "slab error in %s(): cache `%s': %s\n", |
| 788 | function, cachep->name, msg); | 814 | function, cachep->name, msg); |
| @@ -804,7 +830,7 @@ static void init_reap_node(int cpu) | |||
| 804 | 830 | ||
| 805 | node = next_node(cpu_to_node(cpu), node_online_map); | 831 | node = next_node(cpu_to_node(cpu), node_online_map); |
| 806 | if (node == MAX_NUMNODES) | 832 | if (node == MAX_NUMNODES) |
| 807 | node = 0; | 833 | node = first_node(node_online_map); |
| 808 | 834 | ||
| 809 | __get_cpu_var(reap_node) = node; | 835 | __get_cpu_var(reap_node) = node; |
| 810 | } | 836 | } |
| @@ -906,10 +932,8 @@ static void free_alien_cache(struct array_cache **ac_ptr) | |||
| 906 | 932 | ||
| 907 | if (!ac_ptr) | 933 | if (!ac_ptr) |
| 908 | return; | 934 | return; |
| 909 | |||
| 910 | for_each_node(i) | 935 | for_each_node(i) |
| 911 | kfree(ac_ptr[i]); | 936 | kfree(ac_ptr[i]); |
| 912 | |||
| 913 | kfree(ac_ptr); | 937 | kfree(ac_ptr); |
| 914 | } | 938 | } |
| 915 | 939 | ||
| @@ -943,7 +967,8 @@ static void reap_alien(struct kmem_cache *cachep, struct kmem_list3 *l3) | |||
| 943 | } | 967 | } |
| 944 | } | 968 | } |
| 945 | 969 | ||
| 946 | static void drain_alien_cache(struct kmem_cache *cachep, struct array_cache **alien) | 970 | static void drain_alien_cache(struct kmem_cache *cachep, |
| 971 | struct array_cache **alien) | ||
| 947 | { | 972 | { |
| 948 | int i = 0; | 973 | int i = 0; |
| 949 | struct array_cache *ac; | 974 | struct array_cache *ac; |
| @@ -986,20 +1011,22 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, | |||
| 986 | switch (action) { | 1011 | switch (action) { |
| 987 | case CPU_UP_PREPARE: | 1012 | case CPU_UP_PREPARE: |
| 988 | mutex_lock(&cache_chain_mutex); | 1013 | mutex_lock(&cache_chain_mutex); |
| 989 | /* we need to do this right in the beginning since | 1014 | /* |
| 1015 | * We need to do this right in the beginning since | ||
| 990 | * alloc_arraycache's are going to use this list. | 1016 | * alloc_arraycache's are going to use this list. |
| 991 | * kmalloc_node allows us to add the slab to the right | 1017 | * kmalloc_node allows us to add the slab to the right |
| 992 | * kmem_list3 and not this cpu's kmem_list3 | 1018 | * kmem_list3 and not this cpu's kmem_list3 |
| 993 | */ | 1019 | */ |
| 994 | 1020 | ||
| 995 | list_for_each_entry(cachep, &cache_chain, next) { | 1021 | list_for_each_entry(cachep, &cache_chain, next) { |
| 996 | /* setup the size64 kmemlist for cpu before we can | 1022 | /* |
| 1023 | * Set up the size64 kmemlist for cpu before we can | ||
| 997 | * begin anything. Make sure some other cpu on this | 1024 | * begin anything. Make sure some other cpu on this |
| 998 | * node has not already allocated this | 1025 | * node has not already allocated this |
| 999 | */ | 1026 | */ |
| 1000 | if (!cachep->nodelists[node]) { | 1027 | if (!cachep->nodelists[node]) { |
| 1001 | if (!(l3 = kmalloc_node(memsize, | 1028 | l3 = kmalloc_node(memsize, GFP_KERNEL, node); |
| 1002 | GFP_KERNEL, node))) | 1029 | if (!l3) |
| 1003 | goto bad; | 1030 | goto bad; |
| 1004 | kmem_list3_init(l3); | 1031 | kmem_list3_init(l3); |
| 1005 | l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + | 1032 | l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + |
| @@ -1015,13 +1042,15 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, | |||
| 1015 | 1042 | ||
| 1016 | spin_lock_irq(&cachep->nodelists[node]->list_lock); | 1043 | spin_lock_irq(&cachep->nodelists[node]->list_lock); |
| 1017 | cachep->nodelists[node]->free_limit = | 1044 | cachep->nodelists[node]->free_limit = |
| 1018 | (1 + nr_cpus_node(node)) * | 1045 | (1 + nr_cpus_node(node)) * |
| 1019 | cachep->batchcount + cachep->num; | 1046 | cachep->batchcount + cachep->num; |
| 1020 | spin_unlock_irq(&cachep->nodelists[node]->list_lock); | 1047 | spin_unlock_irq(&cachep->nodelists[node]->list_lock); |
| 1021 | } | 1048 | } |
| 1022 | 1049 | ||
| 1023 | /* Now we can go ahead with allocating the shared array's | 1050 | /* |
| 1024 | & array cache's */ | 1051 | * Now we can go ahead with allocating the shared arrays and |
| 1052 | * array caches | ||
| 1053 | */ | ||
| 1025 | list_for_each_entry(cachep, &cache_chain, next) { | 1054 | list_for_each_entry(cachep, &cache_chain, next) { |
| 1026 | struct array_cache *nc; | 1055 | struct array_cache *nc; |
| 1027 | struct array_cache *shared; | 1056 | struct array_cache *shared; |
| @@ -1041,7 +1070,6 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, | |||
| 1041 | if (!alien) | 1070 | if (!alien) |
| 1042 | goto bad; | 1071 | goto bad; |
| 1043 | cachep->array[cpu] = nc; | 1072 | cachep->array[cpu] = nc; |
| 1044 | |||
| 1045 | l3 = cachep->nodelists[node]; | 1073 | l3 = cachep->nodelists[node]; |
| 1046 | BUG_ON(!l3); | 1074 | BUG_ON(!l3); |
| 1047 | 1075 | ||
| @@ -1061,7 +1089,6 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, | |||
| 1061 | } | 1089 | } |
| 1062 | #endif | 1090 | #endif |
| 1063 | spin_unlock_irq(&l3->list_lock); | 1091 | spin_unlock_irq(&l3->list_lock); |
| 1064 | |||
| 1065 | kfree(shared); | 1092 | kfree(shared); |
| 1066 | free_alien_cache(alien); | 1093 | free_alien_cache(alien); |
| 1067 | } | 1094 | } |
| @@ -1083,7 +1110,6 @@ static int __devinit cpuup_callback(struct notifier_block *nfb, | |||
| 1083 | /* fall thru */ | 1110 | /* fall thru */ |
| 1084 | case CPU_UP_CANCELED: | 1111 | case CPU_UP_CANCELED: |
| 1085 | mutex_lock(&cache_chain_mutex); | 1112 | mutex_lock(&cache_chain_mutex); |
| 1086 | |||
| 1087 | list_for_each_entry(cachep, &cache_chain, next) { | 1113 | list_for_each_entry(cachep, &cache_chain, next) { |
| 1088 | struct array_cache *nc; | 1114 | struct array_cache *nc; |
| 1089 | struct array_cache *shared; | 1115 | struct array_cache *shared; |
| @@ -1150,7 +1176,7 @@ free_array_cache: | |||
| 1150 | #endif | 1176 | #endif |
| 1151 | } | 1177 | } |
| 1152 | return NOTIFY_OK; | 1178 | return NOTIFY_OK; |
| 1153 | bad: | 1179 | bad: |
| 1154 | mutex_unlock(&cache_chain_mutex); | 1180 | mutex_unlock(&cache_chain_mutex); |
| 1155 | return NOTIFY_BAD; | 1181 | return NOTIFY_BAD; |
| 1156 | } | 1182 | } |
| @@ -1160,7 +1186,8 @@ static struct notifier_block cpucache_notifier = { &cpuup_callback, NULL, 0 }; | |||
| 1160 | /* | 1186 | /* |
| 1161 | * swap the static kmem_list3 with kmalloced memory | 1187 | * swap the static kmem_list3 with kmalloced memory |
| 1162 | */ | 1188 | */ |
| 1163 | static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list, int nodeid) | 1189 | static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list, |
| 1190 | int nodeid) | ||
| 1164 | { | 1191 | { |
| 1165 | struct kmem_list3 *ptr; | 1192 | struct kmem_list3 *ptr; |
| 1166 | 1193 | ||
| @@ -1175,8 +1202,9 @@ static void init_list(struct kmem_cache *cachep, struct kmem_list3 *list, int no | |||
| 1175 | local_irq_enable(); | 1202 | local_irq_enable(); |
| 1176 | } | 1203 | } |
| 1177 | 1204 | ||
| 1178 | /* Initialisation. | 1205 | /* |
| 1179 | * Called after the gfp() functions have been enabled, and before smp_init(). | 1206 | * Initialisation. Called after the page allocator have been initialised and |
| 1207 | * before smp_init(). | ||
| 1180 | */ | 1208 | */ |
| 1181 | void __init kmem_cache_init(void) | 1209 | void __init kmem_cache_init(void) |
| 1182 | { | 1210 | { |
| @@ -1201,9 +1229,9 @@ void __init kmem_cache_init(void) | |||
| 1201 | 1229 | ||
| 1202 | /* Bootstrap is tricky, because several objects are allocated | 1230 | /* Bootstrap is tricky, because several objects are allocated |
| 1203 | * from caches that do not exist yet: | 1231 | * from caches that do not exist yet: |
| 1204 | * 1) initialize the cache_cache cache: it contains the struct kmem_cache | 1232 | * 1) initialize the cache_cache cache: it contains the struct |
| 1205 | * structures of all caches, except cache_cache itself: cache_cache | 1233 | * kmem_cache structures of all caches, except cache_cache itself: |
| 1206 | * is statically allocated. | 1234 | * cache_cache is statically allocated. |
| 1207 | * Initially an __init data area is used for the head array and the | 1235 | * Initially an __init data area is used for the head array and the |
| 1208 | * kmem_list3 structures, it's replaced with a kmalloc allocated | 1236 | * kmem_list3 structures, it's replaced with a kmalloc allocated |
| 1209 | * array at the end of the bootstrap. | 1237 | * array at the end of the bootstrap. |
| @@ -1226,7 +1254,8 @@ void __init kmem_cache_init(void) | |||
| 1226 | cache_cache.array[smp_processor_id()] = &initarray_cache.cache; | 1254 | cache_cache.array[smp_processor_id()] = &initarray_cache.cache; |
| 1227 | cache_cache.nodelists[numa_node_id()] = &initkmem_list3[CACHE_CACHE]; | 1255 | cache_cache.nodelists[numa_node_id()] = &initkmem_list3[CACHE_CACHE]; |
| 1228 | 1256 | ||
| 1229 | cache_cache.buffer_size = ALIGN(cache_cache.buffer_size, cache_line_size()); | 1257 | cache_cache.buffer_size = ALIGN(cache_cache.buffer_size, |
| 1258 | cache_line_size()); | ||
| 1230 | 1259 | ||
| 1231 | for (order = 0; order < MAX_ORDER; order++) { | 1260 | for (order = 0; order < MAX_ORDER; order++) { |
| 1232 | cache_estimate(order, cache_cache.buffer_size, | 1261 | cache_estimate(order, cache_cache.buffer_size, |
| @@ -1245,24 +1274,26 @@ void __init kmem_cache_init(void) | |||
| 1245 | sizes = malloc_sizes; | 1274 | sizes = malloc_sizes; |
| 1246 | names = cache_names; | 1275 | names = cache_names; |
| 1247 | 1276 | ||
| 1248 | /* Initialize the caches that provide memory for the array cache | 1277 | /* |
| 1249 | * and the kmem_list3 structures first. | 1278 | * Initialize the caches that provide memory for the array cache and the |
| 1250 | * Without this, further allocations will bug | 1279 | * kmem_list3 structures first. Without this, further allocations will |
| 1280 | * bug. | ||
| 1251 | */ | 1281 | */ |
| 1252 | 1282 | ||
| 1253 | sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name, | 1283 | sizes[INDEX_AC].cs_cachep = kmem_cache_create(names[INDEX_AC].name, |
| 1254 | sizes[INDEX_AC].cs_size, | 1284 | sizes[INDEX_AC].cs_size, |
| 1255 | ARCH_KMALLOC_MINALIGN, | 1285 | ARCH_KMALLOC_MINALIGN, |
| 1256 | (ARCH_KMALLOC_FLAGS | | 1286 | ARCH_KMALLOC_FLAGS|SLAB_PANIC, |
| 1257 | SLAB_PANIC), NULL, NULL); | 1287 | NULL, NULL); |
| 1258 | 1288 | ||
| 1259 | if (INDEX_AC != INDEX_L3) | 1289 | if (INDEX_AC != INDEX_L3) { |
| 1260 | sizes[INDEX_L3].cs_cachep = | 1290 | sizes[INDEX_L3].cs_cachep = |
| 1261 | kmem_cache_create(names[INDEX_L3].name, | 1291 | kmem_cache_create(names[INDEX_L3].name, |
| 1262 | sizes[INDEX_L3].cs_size, | 1292 | sizes[INDEX_L3].cs_size, |
| 1263 | ARCH_KMALLOC_MINALIGN, | 1293 | ARCH_KMALLOC_MINALIGN, |
| 1264 | (ARCH_KMALLOC_FLAGS | SLAB_PANIC), NULL, | 1294 | ARCH_KMALLOC_FLAGS|SLAB_PANIC, |
| 1265 | NULL); | 1295 | NULL, NULL); |
| 1296 | } | ||
| 1266 | 1297 | ||
| 1267 | while (sizes->cs_size != ULONG_MAX) { | 1298 | while (sizes->cs_size != ULONG_MAX) { |
| 1268 | /* | 1299 | /* |
| @@ -1272,13 +1303,13 @@ void __init kmem_cache_init(void) | |||
| 1272 | * Note for systems short on memory removing the alignment will | 1303 | * Note for systems short on memory removing the alignment will |
| 1273 | * allow tighter packing of the smaller caches. | 1304 | * allow tighter packing of the smaller caches. |
| 1274 | */ | 1305 | */ |
| 1275 | if (!sizes->cs_cachep) | 1306 | if (!sizes->cs_cachep) { |
| 1276 | sizes->cs_cachep = kmem_cache_create(names->name, | 1307 | sizes->cs_cachep = kmem_cache_create(names->name, |
| 1277 | sizes->cs_size, | 1308 | sizes->cs_size, |
| 1278 | ARCH_KMALLOC_MINALIGN, | 1309 | ARCH_KMALLOC_MINALIGN, |
| 1279 | (ARCH_KMALLOC_FLAGS | 1310 | ARCH_KMALLOC_FLAGS|SLAB_PANIC, |
| 1280 | | SLAB_PANIC), | 1311 | NULL, NULL); |
| 1281 | NULL, NULL); | 1312 | } |
| 1282 | 1313 | ||
| 1283 | /* Inc off-slab bufctl limit until the ceiling is hit. */ | 1314 | /* Inc off-slab bufctl limit until the ceiling is hit. */ |
| 1284 | if (!(OFF_SLAB(sizes->cs_cachep))) { | 1315 | if (!(OFF_SLAB(sizes->cs_cachep))) { |
| @@ -1287,13 +1318,11 @@ void __init kmem_cache_init(void) | |||
| 1287 | } | 1318 | } |
| 1288 | 1319 | ||
| 1289 | sizes->cs_dmacachep = kmem_cache_create(names->name_dma, | 1320 | sizes->cs_dmacachep = kmem_cache_create(names->name_dma, |
| 1290 | sizes->cs_size, | 1321 | sizes->cs_size, |
| 1291 | ARCH_KMALLOC_MINALIGN, | 1322 | ARCH_KMALLOC_MINALIGN, |
| 1292 | (ARCH_KMALLOC_FLAGS | | 1323 | ARCH_KMALLOC_FLAGS|SLAB_CACHE_DMA| |
| 1293 | SLAB_CACHE_DMA | | 1324 | SLAB_PANIC, |
| 1294 | SLAB_PANIC), NULL, | 1325 | NULL, NULL); |
| 1295 | NULL); | ||
| 1296 | |||
| 1297 | sizes++; | 1326 | sizes++; |
| 1298 | names++; | 1327 | names++; |
| 1299 | } | 1328 | } |
| @@ -1345,20 +1374,22 @@ void __init kmem_cache_init(void) | |||
| 1345 | struct kmem_cache *cachep; | 1374 | struct kmem_cache *cachep; |
| 1346 | mutex_lock(&cache_chain_mutex); | 1375 | mutex_lock(&cache_chain_mutex); |
| 1347 | list_for_each_entry(cachep, &cache_chain, next) | 1376 | list_for_each_entry(cachep, &cache_chain, next) |
| 1348 | enable_cpucache(cachep); | 1377 | enable_cpucache(cachep); |
| 1349 | mutex_unlock(&cache_chain_mutex); | 1378 | mutex_unlock(&cache_chain_mutex); |
| 1350 | } | 1379 | } |
| 1351 | 1380 | ||
| 1352 | /* Done! */ | 1381 | /* Done! */ |
| 1353 | g_cpucache_up = FULL; | 1382 | g_cpucache_up = FULL; |
| 1354 | 1383 | ||
| 1355 | /* Register a cpu startup notifier callback | 1384 | /* |
| 1356 | * that initializes cpu_cache_get for all new cpus | 1385 | * Register a cpu startup notifier callback that initializes |
| 1386 | * cpu_cache_get for all new cpus | ||
| 1357 | */ | 1387 | */ |
| 1358 | register_cpu_notifier(&cpucache_notifier); | 1388 | register_cpu_notifier(&cpucache_notifier); |
| 1359 | 1389 | ||
| 1360 | /* The reap timers are started later, with a module init call: | 1390 | /* |
| 1361 | * That part of the kernel is not yet operational. | 1391 | * The reap timers are started later, with a module init call: That part |
| 1392 | * of the kernel is not yet operational. | ||
| 1362 | */ | 1393 | */ |
| 1363 | } | 1394 | } |
| 1364 | 1395 | ||
| @@ -1366,16 +1397,13 @@ static int __init cpucache_init(void) | |||
| 1366 | { | 1397 | { |
| 1367 | int cpu; | 1398 | int cpu; |
| 1368 | 1399 | ||
| 1369 | /* | 1400 | /* |
| 1370 | * Register the timers that return unneeded | 1401 | * Register the timers that return unneeded pages to the page allocator |
| 1371 | * pages to gfp. | ||
| 1372 | */ | 1402 | */ |
| 1373 | for_each_online_cpu(cpu) | 1403 | for_each_online_cpu(cpu) |
| 1374 | start_cpu_timer(cpu); | 1404 | start_cpu_timer(cpu); |
| 1375 | |||
| 1376 | return 0; | 1405 | return 0; |
| 1377 | } | 1406 | } |
| 1378 | |||
| 1379 | __initcall(cpucache_init); | 1407 | __initcall(cpucache_init); |
| 1380 | 1408 | ||
| 1381 | /* | 1409 | /* |
| @@ -1402,7 +1430,7 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid) | |||
| 1402 | atomic_add(i, &slab_reclaim_pages); | 1430 | atomic_add(i, &slab_reclaim_pages); |
| 1403 | add_page_state(nr_slab, i); | 1431 | add_page_state(nr_slab, i); |
| 1404 | while (i--) { | 1432 | while (i--) { |
| 1405 | SetPageSlab(page); | 1433 | __SetPageSlab(page); |
| 1406 | page++; | 1434 | page++; |
| 1407 | } | 1435 | } |
| 1408 | return addr; | 1436 | return addr; |
| @@ -1418,8 +1446,8 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr) | |||
| 1418 | const unsigned long nr_freed = i; | 1446 | const unsigned long nr_freed = i; |
| 1419 | 1447 | ||
| 1420 | while (i--) { | 1448 | while (i--) { |
| 1421 | if (!TestClearPageSlab(page)) | 1449 | BUG_ON(!PageSlab(page)); |
| 1422 | BUG(); | 1450 | __ClearPageSlab(page); |
| 1423 | page++; | 1451 | page++; |
| 1424 | } | 1452 | } |
| 1425 | sub_page_state(nr_slab, nr_freed); | 1453 | sub_page_state(nr_slab, nr_freed); |
| @@ -1489,9 +1517,8 @@ static void dump_line(char *data, int offset, int limit) | |||
| 1489 | { | 1517 | { |
| 1490 | int i; | 1518 | int i; |
| 1491 | printk(KERN_ERR "%03x:", offset); | 1519 | printk(KERN_ERR "%03x:", offset); |
| 1492 | for (i = 0; i < limit; i++) { | 1520 | for (i = 0; i < limit; i++) |
| 1493 | printk(" %02x", (unsigned char)data[offset + i]); | 1521 | printk(" %02x", (unsigned char)data[offset + i]); |
| 1494 | } | ||
| 1495 | printk("\n"); | 1522 | printk("\n"); |
| 1496 | } | 1523 | } |
| 1497 | #endif | 1524 | #endif |
| @@ -1505,15 +1532,15 @@ static void print_objinfo(struct kmem_cache *cachep, void *objp, int lines) | |||
| 1505 | 1532 | ||
| 1506 | if (cachep->flags & SLAB_RED_ZONE) { | 1533 | if (cachep->flags & SLAB_RED_ZONE) { |
| 1507 | printk(KERN_ERR "Redzone: 0x%lx/0x%lx.\n", | 1534 | printk(KERN_ERR "Redzone: 0x%lx/0x%lx.\n", |
| 1508 | *dbg_redzone1(cachep, objp), | 1535 | *dbg_redzone1(cachep, objp), |
| 1509 | *dbg_redzone2(cachep, objp)); | 1536 | *dbg_redzone2(cachep, objp)); |
| 1510 | } | 1537 | } |
| 1511 | 1538 | ||
| 1512 | if (cachep->flags & SLAB_STORE_USER) { | 1539 | if (cachep->flags & SLAB_STORE_USER) { |
| 1513 | printk(KERN_ERR "Last user: [<%p>]", | 1540 | printk(KERN_ERR "Last user: [<%p>]", |
| 1514 | *dbg_userword(cachep, objp)); | 1541 | *dbg_userword(cachep, objp)); |
| 1515 | print_symbol("(%s)", | 1542 | print_symbol("(%s)", |
| 1516 | (unsigned long)*dbg_userword(cachep, objp)); | 1543 | (unsigned long)*dbg_userword(cachep, objp)); |
| 1517 | printk("\n"); | 1544 | printk("\n"); |
| 1518 | } | 1545 | } |
| 1519 | realobj = (char *)objp + obj_offset(cachep); | 1546 | realobj = (char *)objp + obj_offset(cachep); |
| @@ -1546,8 +1573,8 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp) | |||
| 1546 | /* Print header */ | 1573 | /* Print header */ |
| 1547 | if (lines == 0) { | 1574 | if (lines == 0) { |
| 1548 | printk(KERN_ERR | 1575 | printk(KERN_ERR |
| 1549 | "Slab corruption: start=%p, len=%d\n", | 1576 | "Slab corruption: start=%p, len=%d\n", |
| 1550 | realobj, size); | 1577 | realobj, size); |
| 1551 | print_objinfo(cachep, objp, 0); | 1578 | print_objinfo(cachep, objp, 0); |
| 1552 | } | 1579 | } |
| 1553 | /* Hexdump the affected line */ | 1580 | /* Hexdump the affected line */ |
| @@ -1568,18 +1595,18 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp) | |||
| 1568 | * exist: | 1595 | * exist: |
| 1569 | */ | 1596 | */ |
| 1570 | struct slab *slabp = virt_to_slab(objp); | 1597 | struct slab *slabp = virt_to_slab(objp); |
| 1571 | int objnr; | 1598 | unsigned int objnr; |
| 1572 | 1599 | ||
| 1573 | objnr = (unsigned)(objp - slabp->s_mem) / cachep->buffer_size; | 1600 | objnr = obj_to_index(cachep, slabp, objp); |
| 1574 | if (objnr) { | 1601 | if (objnr) { |
| 1575 | objp = slabp->s_mem + (objnr - 1) * cachep->buffer_size; | 1602 | objp = index_to_obj(cachep, slabp, objnr - 1); |
| 1576 | realobj = (char *)objp + obj_offset(cachep); | 1603 | realobj = (char *)objp + obj_offset(cachep); |
| 1577 | printk(KERN_ERR "Prev obj: start=%p, len=%d\n", | 1604 | printk(KERN_ERR "Prev obj: start=%p, len=%d\n", |
| 1578 | realobj, size); | 1605 | realobj, size); |
| 1579 | print_objinfo(cachep, objp, 2); | 1606 | print_objinfo(cachep, objp, 2); |
| 1580 | } | 1607 | } |
| 1581 | if (objnr + 1 < cachep->num) { | 1608 | if (objnr + 1 < cachep->num) { |
| 1582 | objp = slabp->s_mem + (objnr + 1) * cachep->buffer_size; | 1609 | objp = index_to_obj(cachep, slabp, objnr + 1); |
| 1583 | realobj = (char *)objp + obj_offset(cachep); | 1610 | realobj = (char *)objp + obj_offset(cachep); |
| 1584 | printk(KERN_ERR "Next obj: start=%p, len=%d\n", | 1611 | printk(KERN_ERR "Next obj: start=%p, len=%d\n", |
| 1585 | realobj, size); | 1612 | realobj, size); |
| @@ -1591,22 +1618,25 @@ static void check_poison_obj(struct kmem_cache *cachep, void *objp) | |||
| 1591 | 1618 | ||
| 1592 | #if DEBUG | 1619 | #if DEBUG |
| 1593 | /** | 1620 | /** |
| 1594 | * slab_destroy_objs - call the registered destructor for each object in | 1621 | * slab_destroy_objs - destroy a slab and its objects |
| 1595 | * a slab that is to be destroyed. | 1622 | * @cachep: cache pointer being destroyed |
| 1623 | * @slabp: slab pointer being destroyed | ||
| 1624 | * | ||
| 1625 | * Call the registered destructor for each object in a slab that is being | ||
| 1626 | * destroyed. | ||
| 1596 | */ | 1627 | */ |
| 1597 | static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp) | 1628 | static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp) |
| 1598 | { | 1629 | { |
| 1599 | int i; | 1630 | int i; |
| 1600 | for (i = 0; i < cachep->num; i++) { | 1631 | for (i = 0; i < cachep->num; i++) { |
| 1601 | void *objp = slabp->s_mem + cachep->buffer_size * i; | 1632 | void *objp = index_to_obj(cachep, slabp, i); |
| 1602 | 1633 | ||
| 1603 | if (cachep->flags & SLAB_POISON) { | 1634 | if (cachep->flags & SLAB_POISON) { |
| 1604 | #ifdef CONFIG_DEBUG_PAGEALLOC | 1635 | #ifdef CONFIG_DEBUG_PAGEALLOC |
| 1605 | if ((cachep->buffer_size % PAGE_SIZE) == 0 | 1636 | if (cachep->buffer_size % PAGE_SIZE == 0 && |
| 1606 | && OFF_SLAB(cachep)) | 1637 | OFF_SLAB(cachep)) |
| 1607 | kernel_map_pages(virt_to_page(objp), | 1638 | kernel_map_pages(virt_to_page(objp), |
| 1608 | cachep->buffer_size / PAGE_SIZE, | 1639 | cachep->buffer_size / PAGE_SIZE, 1); |
| 1609 | 1); | ||
| 1610 | else | 1640 | else |
| 1611 | check_poison_obj(cachep, objp); | 1641 | check_poison_obj(cachep, objp); |
| 1612 | #else | 1642 | #else |
| @@ -1631,7 +1661,7 @@ static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp) | |||
| 1631 | if (cachep->dtor) { | 1661 | if (cachep->dtor) { |
| 1632 | int i; | 1662 | int i; |
| 1633 | for (i = 0; i < cachep->num; i++) { | 1663 | for (i = 0; i < cachep->num; i++) { |
| 1634 | void *objp = slabp->s_mem + cachep->buffer_size * i; | 1664 | void *objp = index_to_obj(cachep, slabp, i); |
| 1635 | (cachep->dtor) (objp, cachep, 0); | 1665 | (cachep->dtor) (objp, cachep, 0); |
| 1636 | } | 1666 | } |
| 1637 | } | 1667 | } |
| @@ -1639,9 +1669,13 @@ static void slab_destroy_objs(struct kmem_cache *cachep, struct slab *slabp) | |||
| 1639 | #endif | 1669 | #endif |
| 1640 | 1670 | ||
| 1641 | /** | 1671 | /** |
| 1672 | * slab_destroy - destroy and release all objects in a slab | ||
| 1673 | * @cachep: cache pointer being destroyed | ||
| 1674 | * @slabp: slab pointer being destroyed | ||
| 1675 | * | ||
| 1642 | * Destroy all the objs in a slab, and release the mem back to the system. | 1676 | * Destroy all the objs in a slab, and release the mem back to the system. |
| 1643 | * Before calling the slab must have been unlinked from the cache. | 1677 | * Before calling the slab must have been unlinked from the cache. The |
| 1644 | * The cache-lock is not held/needed. | 1678 | * cache-lock is not held/needed. |
| 1645 | */ | 1679 | */ |
| 1646 | static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp) | 1680 | static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp) |
| 1647 | { | 1681 | { |
| @@ -1662,8 +1696,10 @@ static void slab_destroy(struct kmem_cache *cachep, struct slab *slabp) | |||
| 1662 | } | 1696 | } |
| 1663 | } | 1697 | } |
| 1664 | 1698 | ||
| 1665 | /* For setting up all the kmem_list3s for cache whose buffer_size is same | 1699 | /* |
| 1666 | as size of kmem_list3. */ | 1700 | * For setting up all the kmem_list3s for cache whose buffer_size is same as |
| 1701 | * size of kmem_list3. | ||
| 1702 | */ | ||
| 1667 | static void set_up_list3s(struct kmem_cache *cachep, int index) | 1703 | static void set_up_list3s(struct kmem_cache *cachep, int index) |
| 1668 | { | 1704 | { |
| 1669 | int node; | 1705 | int node; |
| @@ -1689,13 +1725,13 @@ static void set_up_list3s(struct kmem_cache *cachep, int index) | |||
| 1689 | * high order pages for slabs. When the gfp() functions are more friendly | 1725 | * high order pages for slabs. When the gfp() functions are more friendly |
| 1690 | * towards high-order requests, this should be changed. | 1726 | * towards high-order requests, this should be changed. |
| 1691 | */ | 1727 | */ |
| 1692 | static inline size_t calculate_slab_order(struct kmem_cache *cachep, | 1728 | static size_t calculate_slab_order(struct kmem_cache *cachep, |
| 1693 | size_t size, size_t align, unsigned long flags) | 1729 | size_t size, size_t align, unsigned long flags) |
| 1694 | { | 1730 | { |
| 1695 | size_t left_over = 0; | 1731 | size_t left_over = 0; |
| 1696 | int gfporder; | 1732 | int gfporder; |
| 1697 | 1733 | ||
| 1698 | for (gfporder = 0 ; gfporder <= MAX_GFP_ORDER; gfporder++) { | 1734 | for (gfporder = 0; gfporder <= MAX_GFP_ORDER; gfporder++) { |
| 1699 | unsigned int num; | 1735 | unsigned int num; |
| 1700 | size_t remainder; | 1736 | size_t remainder; |
| 1701 | 1737 | ||
| @@ -1730,12 +1766,66 @@ static inline size_t calculate_slab_order(struct kmem_cache *cachep, | |||
| 1730 | /* | 1766 | /* |
| 1731 | * Acceptable internal fragmentation? | 1767 | * Acceptable internal fragmentation? |
| 1732 | */ | 1768 | */ |
| 1733 | if ((left_over * 8) <= (PAGE_SIZE << gfporder)) | 1769 | if (left_over * 8 <= (PAGE_SIZE << gfporder)) |
| 1734 | break; | 1770 | break; |
| 1735 | } | 1771 | } |
| 1736 | return left_over; | 1772 | return left_over; |
| 1737 | } | 1773 | } |
| 1738 | 1774 | ||
| 1775 | static void setup_cpu_cache(struct kmem_cache *cachep) | ||
| 1776 | { | ||
| 1777 | if (g_cpucache_up == FULL) { | ||
| 1778 | enable_cpucache(cachep); | ||
| 1779 | return; | ||
| 1780 | } | ||
| 1781 | if (g_cpucache_up == NONE) { | ||
| 1782 | /* | ||
| 1783 | * Note: the first kmem_cache_create must create the cache | ||
| 1784 | * that's used by kmalloc(24), otherwise the creation of | ||
| 1785 | * further caches will BUG(). | ||
| 1786 | */ | ||
| 1787 | cachep->array[smp_processor_id()] = &initarray_generic.cache; | ||
| 1788 | |||
| 1789 | /* | ||
| 1790 | * If the cache that's used by kmalloc(sizeof(kmem_list3)) is | ||
| 1791 | * the first cache, then we need to set up all its list3s, | ||
| 1792 | * otherwise the creation of further caches will BUG(). | ||
| 1793 | */ | ||
| 1794 | set_up_list3s(cachep, SIZE_AC); | ||
| 1795 | if (INDEX_AC == INDEX_L3) | ||
| 1796 | g_cpucache_up = PARTIAL_L3; | ||
| 1797 | else | ||
| 1798 | g_cpucache_up = PARTIAL_AC; | ||
| 1799 | } else { | ||
| 1800 | cachep->array[smp_processor_id()] = | ||
| 1801 | kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); | ||
| 1802 | |||
| 1803 | if (g_cpucache_up == PARTIAL_AC) { | ||
| 1804 | set_up_list3s(cachep, SIZE_L3); | ||
| 1805 | g_cpucache_up = PARTIAL_L3; | ||
| 1806 | } else { | ||
| 1807 | int node; | ||
| 1808 | for_each_online_node(node) { | ||
| 1809 | cachep->nodelists[node] = | ||
| 1810 | kmalloc_node(sizeof(struct kmem_list3), | ||
| 1811 | GFP_KERNEL, node); | ||
| 1812 | BUG_ON(!cachep->nodelists[node]); | ||
| 1813 | kmem_list3_init(cachep->nodelists[node]); | ||
| 1814 | } | ||
| 1815 | } | ||
| 1816 | } | ||
| 1817 | cachep->nodelists[numa_node_id()]->next_reap = | ||
| 1818 | jiffies + REAPTIMEOUT_LIST3 + | ||
| 1819 | ((unsigned long)cachep) % REAPTIMEOUT_LIST3; | ||
| 1820 | |||
| 1821 | cpu_cache_get(cachep)->avail = 0; | ||
| 1822 | cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES; | ||
| 1823 | cpu_cache_get(cachep)->batchcount = 1; | ||
| 1824 | cpu_cache_get(cachep)->touched = 0; | ||
| 1825 | cachep->batchcount = 1; | ||
| 1826 | cachep->limit = BOOT_CPUCACHE_ENTRIES; | ||
| 1827 | } | ||
| 1828 | |||
| 1739 | /** | 1829 | /** |
| 1740 | * kmem_cache_create - Create a cache. | 1830 | * kmem_cache_create - Create a cache. |
| 1741 | * @name: A string which is used in /proc/slabinfo to identify this cache. | 1831 | * @name: A string which is used in /proc/slabinfo to identify this cache. |
| @@ -1751,9 +1841,8 @@ static inline size_t calculate_slab_order(struct kmem_cache *cachep, | |||
| 1751 | * and the @dtor is run before the pages are handed back. | 1841 | * and the @dtor is run before the pages are handed back. |
| 1752 | * | 1842 | * |
| 1753 | * @name must be valid until the cache is destroyed. This implies that | 1843 | * @name must be valid until the cache is destroyed. This implies that |
| 1754 | * the module calling this has to destroy the cache before getting | 1844 | * the module calling this has to destroy the cache before getting unloaded. |
| 1755 | * unloaded. | 1845 | * |
| 1756 | * | ||
| 1757 | * The flags are | 1846 | * The flags are |
| 1758 | * | 1847 | * |
| 1759 | * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5) | 1848 | * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5) |
| @@ -1762,16 +1851,14 @@ static inline size_t calculate_slab_order(struct kmem_cache *cachep, | |||
| 1762 | * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check | 1851 | * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check |
| 1763 | * for buffer overruns. | 1852 | * for buffer overruns. |
| 1764 | * | 1853 | * |
| 1765 | * %SLAB_NO_REAP - Don't automatically reap this cache when we're under | ||
| 1766 | * memory pressure. | ||
| 1767 | * | ||
| 1768 | * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware | 1854 | * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware |
| 1769 | * cacheline. This can be beneficial if you're counting cycles as closely | 1855 | * cacheline. This can be beneficial if you're counting cycles as closely |
| 1770 | * as davem. | 1856 | * as davem. |
| 1771 | */ | 1857 | */ |
| 1772 | struct kmem_cache * | 1858 | struct kmem_cache * |
| 1773 | kmem_cache_create (const char *name, size_t size, size_t align, | 1859 | kmem_cache_create (const char *name, size_t size, size_t align, |
| 1774 | unsigned long flags, void (*ctor)(void*, struct kmem_cache *, unsigned long), | 1860 | unsigned long flags, |
| 1861 | void (*ctor)(void*, struct kmem_cache *, unsigned long), | ||
| 1775 | void (*dtor)(void*, struct kmem_cache *, unsigned long)) | 1862 | void (*dtor)(void*, struct kmem_cache *, unsigned long)) |
| 1776 | { | 1863 | { |
| 1777 | size_t left_over, slab_size, ralign; | 1864 | size_t left_over, slab_size, ralign; |
| @@ -1781,12 +1868,10 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
| 1781 | /* | 1868 | /* |
| 1782 | * Sanity checks... these are all serious usage bugs. | 1869 | * Sanity checks... these are all serious usage bugs. |
| 1783 | */ | 1870 | */ |
| 1784 | if ((!name) || | 1871 | if (!name || in_interrupt() || (size < BYTES_PER_WORD) || |
| 1785 | in_interrupt() || | ||
| 1786 | (size < BYTES_PER_WORD) || | ||
| 1787 | (size > (1 << MAX_OBJ_ORDER) * PAGE_SIZE) || (dtor && !ctor)) { | 1872 | (size > (1 << MAX_OBJ_ORDER) * PAGE_SIZE) || (dtor && !ctor)) { |
| 1788 | printk(KERN_ERR "%s: Early error in slab %s\n", | 1873 | printk(KERN_ERR "%s: Early error in slab %s\n", __FUNCTION__, |
| 1789 | __FUNCTION__, name); | 1874 | name); |
| 1790 | BUG(); | 1875 | BUG(); |
| 1791 | } | 1876 | } |
| 1792 | 1877 | ||
| @@ -1840,8 +1925,7 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
| 1840 | * above the next power of two: caches with object sizes just above a | 1925 | * above the next power of two: caches with object sizes just above a |
| 1841 | * power of two have a significant amount of internal fragmentation. | 1926 | * power of two have a significant amount of internal fragmentation. |
| 1842 | */ | 1927 | */ |
| 1843 | if ((size < 4096 | 1928 | if (size < 4096 || fls(size - 1) == fls(size-1 + 3 * BYTES_PER_WORD)) |
| 1844 | || fls(size - 1) == fls(size - 1 + 3 * BYTES_PER_WORD))) | ||
| 1845 | flags |= SLAB_RED_ZONE | SLAB_STORE_USER; | 1929 | flags |= SLAB_RED_ZONE | SLAB_STORE_USER; |
| 1846 | if (!(flags & SLAB_DESTROY_BY_RCU)) | 1930 | if (!(flags & SLAB_DESTROY_BY_RCU)) |
| 1847 | flags |= SLAB_POISON; | 1931 | flags |= SLAB_POISON; |
| @@ -1853,13 +1937,14 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
| 1853 | BUG_ON(dtor); | 1937 | BUG_ON(dtor); |
| 1854 | 1938 | ||
| 1855 | /* | 1939 | /* |
| 1856 | * Always checks flags, a caller might be expecting debug | 1940 | * Always checks flags, a caller might be expecting debug support which |
| 1857 | * support which isn't available. | 1941 | * isn't available. |
| 1858 | */ | 1942 | */ |
| 1859 | if (flags & ~CREATE_MASK) | 1943 | if (flags & ~CREATE_MASK) |
| 1860 | BUG(); | 1944 | BUG(); |
| 1861 | 1945 | ||
| 1862 | /* Check that size is in terms of words. This is needed to avoid | 1946 | /* |
| 1947 | * Check that size is in terms of words. This is needed to avoid | ||
| 1863 | * unaligned accesses for some archs when redzoning is used, and makes | 1948 | * unaligned accesses for some archs when redzoning is used, and makes |
| 1864 | * sure any on-slab bufctl's are also correctly aligned. | 1949 | * sure any on-slab bufctl's are also correctly aligned. |
| 1865 | */ | 1950 | */ |
| @@ -1868,12 +1953,14 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
| 1868 | size &= ~(BYTES_PER_WORD - 1); | 1953 | size &= ~(BYTES_PER_WORD - 1); |
| 1869 | } | 1954 | } |
| 1870 | 1955 | ||
| 1871 | /* calculate out the final buffer alignment: */ | 1956 | /* calculate the final buffer alignment: */ |
| 1957 | |||
| 1872 | /* 1) arch recommendation: can be overridden for debug */ | 1958 | /* 1) arch recommendation: can be overridden for debug */ |
| 1873 | if (flags & SLAB_HWCACHE_ALIGN) { | 1959 | if (flags & SLAB_HWCACHE_ALIGN) { |
| 1874 | /* Default alignment: as specified by the arch code. | 1960 | /* |
| 1875 | * Except if an object is really small, then squeeze multiple | 1961 | * Default alignment: as specified by the arch code. Except if |
| 1876 | * objects into one cacheline. | 1962 | * an object is really small, then squeeze multiple objects into |
| 1963 | * one cacheline. | ||
| 1877 | */ | 1964 | */ |
| 1878 | ralign = cache_line_size(); | 1965 | ralign = cache_line_size(); |
| 1879 | while (size <= ralign / 2) | 1966 | while (size <= ralign / 2) |
| @@ -1893,7 +1980,8 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
| 1893 | if (ralign > BYTES_PER_WORD) | 1980 | if (ralign > BYTES_PER_WORD) |
| 1894 | flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); | 1981 | flags &= ~(SLAB_RED_ZONE | SLAB_STORE_USER); |
| 1895 | } | 1982 | } |
| 1896 | /* 4) Store it. Note that the debug code below can reduce | 1983 | /* |
| 1984 | * 4) Store it. Note that the debug code below can reduce | ||
| 1897 | * the alignment to BYTES_PER_WORD. | 1985 | * the alignment to BYTES_PER_WORD. |
| 1898 | */ | 1986 | */ |
| 1899 | align = ralign; | 1987 | align = ralign; |
| @@ -1978,7 +2066,6 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
| 1978 | cachep->gfpflags = 0; | 2066 | cachep->gfpflags = 0; |
| 1979 | if (flags & SLAB_CACHE_DMA) | 2067 | if (flags & SLAB_CACHE_DMA) |
| 1980 | cachep->gfpflags |= GFP_DMA; | 2068 | cachep->gfpflags |= GFP_DMA; |
| 1981 | spin_lock_init(&cachep->spinlock); | ||
| 1982 | cachep->buffer_size = size; | 2069 | cachep->buffer_size = size; |
| 1983 | 2070 | ||
| 1984 | if (flags & CFLGS_OFF_SLAB) | 2071 | if (flags & CFLGS_OFF_SLAB) |
| @@ -1988,64 +2075,11 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
| 1988 | cachep->name = name; | 2075 | cachep->name = name; |
| 1989 | 2076 | ||
| 1990 | 2077 | ||
| 1991 | if (g_cpucache_up == FULL) { | 2078 | setup_cpu_cache(cachep); |
| 1992 | enable_cpucache(cachep); | ||
| 1993 | } else { | ||
| 1994 | if (g_cpucache_up == NONE) { | ||
| 1995 | /* Note: the first kmem_cache_create must create | ||
| 1996 | * the cache that's used by kmalloc(24), otherwise | ||
| 1997 | * the creation of further caches will BUG(). | ||
| 1998 | */ | ||
| 1999 | cachep->array[smp_processor_id()] = | ||
| 2000 | &initarray_generic.cache; | ||
| 2001 | |||
| 2002 | /* If the cache that's used by | ||
| 2003 | * kmalloc(sizeof(kmem_list3)) is the first cache, | ||
| 2004 | * then we need to set up all its list3s, otherwise | ||
| 2005 | * the creation of further caches will BUG(). | ||
| 2006 | */ | ||
| 2007 | set_up_list3s(cachep, SIZE_AC); | ||
| 2008 | if (INDEX_AC == INDEX_L3) | ||
| 2009 | g_cpucache_up = PARTIAL_L3; | ||
| 2010 | else | ||
| 2011 | g_cpucache_up = PARTIAL_AC; | ||
| 2012 | } else { | ||
| 2013 | cachep->array[smp_processor_id()] = | ||
| 2014 | kmalloc(sizeof(struct arraycache_init), GFP_KERNEL); | ||
| 2015 | |||
| 2016 | if (g_cpucache_up == PARTIAL_AC) { | ||
| 2017 | set_up_list3s(cachep, SIZE_L3); | ||
| 2018 | g_cpucache_up = PARTIAL_L3; | ||
| 2019 | } else { | ||
| 2020 | int node; | ||
| 2021 | for_each_online_node(node) { | ||
| 2022 | |||
| 2023 | cachep->nodelists[node] = | ||
| 2024 | kmalloc_node(sizeof | ||
| 2025 | (struct kmem_list3), | ||
| 2026 | GFP_KERNEL, node); | ||
| 2027 | BUG_ON(!cachep->nodelists[node]); | ||
| 2028 | kmem_list3_init(cachep-> | ||
| 2029 | nodelists[node]); | ||
| 2030 | } | ||
| 2031 | } | ||
| 2032 | } | ||
| 2033 | cachep->nodelists[numa_node_id()]->next_reap = | ||
| 2034 | jiffies + REAPTIMEOUT_LIST3 + | ||
| 2035 | ((unsigned long)cachep) % REAPTIMEOUT_LIST3; | ||
| 2036 | |||
| 2037 | BUG_ON(!cpu_cache_get(cachep)); | ||
| 2038 | cpu_cache_get(cachep)->avail = 0; | ||
| 2039 | cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES; | ||
| 2040 | cpu_cache_get(cachep)->batchcount = 1; | ||
| 2041 | cpu_cache_get(cachep)->touched = 0; | ||
| 2042 | cachep->batchcount = 1; | ||
| 2043 | cachep->limit = BOOT_CPUCACHE_ENTRIES; | ||
| 2044 | } | ||
| 2045 | 2079 | ||
| 2046 | /* cache setup completed, link it into the list */ | 2080 | /* cache setup completed, link it into the list */ |
| 2047 | list_add(&cachep->next, &cache_chain); | 2081 | list_add(&cachep->next, &cache_chain); |
| 2048 | oops: | 2082 | oops: |
| 2049 | if (!cachep && (flags & SLAB_PANIC)) | 2083 | if (!cachep && (flags & SLAB_PANIC)) |
| 2050 | panic("kmem_cache_create(): failed to create slab `%s'\n", | 2084 | panic("kmem_cache_create(): failed to create slab `%s'\n", |
| 2051 | name); | 2085 | name); |
| @@ -2089,30 +2123,13 @@ static void check_spinlock_acquired_node(struct kmem_cache *cachep, int node) | |||
| 2089 | #define check_spinlock_acquired_node(x, y) do { } while(0) | 2123 | #define check_spinlock_acquired_node(x, y) do { } while(0) |
| 2090 | #endif | 2124 | #endif |
| 2091 | 2125 | ||
| 2092 | /* | 2126 | static void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, |
| 2093 | * Waits for all CPUs to execute func(). | 2127 | struct array_cache *ac, |
| 2094 | */ | 2128 | int force, int node); |
| 2095 | static void smp_call_function_all_cpus(void (*func)(void *arg), void *arg) | ||
| 2096 | { | ||
| 2097 | check_irq_on(); | ||
| 2098 | preempt_disable(); | ||
| 2099 | |||
| 2100 | local_irq_disable(); | ||
| 2101 | func(arg); | ||
| 2102 | local_irq_enable(); | ||
| 2103 | |||
| 2104 | if (smp_call_function(func, arg, 1, 1)) | ||
| 2105 | BUG(); | ||
| 2106 | |||
| 2107 | preempt_enable(); | ||
| 2108 | } | ||
| 2109 | |||
| 2110 | static void drain_array_locked(struct kmem_cache *cachep, struct array_cache *ac, | ||
| 2111 | int force, int node); | ||
| 2112 | 2129 | ||
| 2113 | static void do_drain(void *arg) | 2130 | static void do_drain(void *arg) |
| 2114 | { | 2131 | { |
| 2115 | struct kmem_cache *cachep = (struct kmem_cache *) arg; | 2132 | struct kmem_cache *cachep = arg; |
| 2116 | struct array_cache *ac; | 2133 | struct array_cache *ac; |
| 2117 | int node = numa_node_id(); | 2134 | int node = numa_node_id(); |
| 2118 | 2135 | ||
| @@ -2129,14 +2146,12 @@ static void drain_cpu_caches(struct kmem_cache *cachep) | |||
| 2129 | struct kmem_list3 *l3; | 2146 | struct kmem_list3 *l3; |
| 2130 | int node; | 2147 | int node; |
| 2131 | 2148 | ||
| 2132 | smp_call_function_all_cpus(do_drain, cachep); | 2149 | on_each_cpu(do_drain, cachep, 1, 1); |
| 2133 | check_irq_on(); | 2150 | check_irq_on(); |
| 2134 | for_each_online_node(node) { | 2151 | for_each_online_node(node) { |
| 2135 | l3 = cachep->nodelists[node]; | 2152 | l3 = cachep->nodelists[node]; |
| 2136 | if (l3) { | 2153 | if (l3) { |
| 2137 | spin_lock_irq(&l3->list_lock); | 2154 | drain_array(cachep, l3, l3->shared, 1, node); |
| 2138 | drain_array_locked(cachep, l3->shared, 1, node); | ||
| 2139 | spin_unlock_irq(&l3->list_lock); | ||
| 2140 | if (l3->alien) | 2155 | if (l3->alien) |
| 2141 | drain_alien_cache(cachep, l3->alien); | 2156 | drain_alien_cache(cachep, l3->alien); |
| 2142 | } | 2157 | } |
| @@ -2260,16 +2275,15 @@ int kmem_cache_destroy(struct kmem_cache *cachep) | |||
| 2260 | 2275 | ||
| 2261 | /* NUMA: free the list3 structures */ | 2276 | /* NUMA: free the list3 structures */ |
| 2262 | for_each_online_node(i) { | 2277 | for_each_online_node(i) { |
| 2263 | if ((l3 = cachep->nodelists[i])) { | 2278 | l3 = cachep->nodelists[i]; |
| 2279 | if (l3) { | ||
| 2264 | kfree(l3->shared); | 2280 | kfree(l3->shared); |
| 2265 | free_alien_cache(l3->alien); | 2281 | free_alien_cache(l3->alien); |
| 2266 | kfree(l3); | 2282 | kfree(l3); |
| 2267 | } | 2283 | } |
| 2268 | } | 2284 | } |
| 2269 | kmem_cache_free(&cache_cache, cachep); | 2285 | kmem_cache_free(&cache_cache, cachep); |
| 2270 | |||
| 2271 | unlock_cpu_hotplug(); | 2286 | unlock_cpu_hotplug(); |
| 2272 | |||
| 2273 | return 0; | 2287 | return 0; |
| 2274 | } | 2288 | } |
| 2275 | EXPORT_SYMBOL(kmem_cache_destroy); | 2289 | EXPORT_SYMBOL(kmem_cache_destroy); |
| @@ -2292,7 +2306,6 @@ static struct slab *alloc_slabmgmt(struct kmem_cache *cachep, void *objp, | |||
| 2292 | slabp->inuse = 0; | 2306 | slabp->inuse = 0; |
| 2293 | slabp->colouroff = colour_off; | 2307 | slabp->colouroff = colour_off; |
| 2294 | slabp->s_mem = objp + colour_off; | 2308 | slabp->s_mem = objp + colour_off; |
| 2295 | |||
| 2296 | return slabp; | 2309 | return slabp; |
| 2297 | } | 2310 | } |
| 2298 | 2311 | ||
| @@ -2307,7 +2320,7 @@ static void cache_init_objs(struct kmem_cache *cachep, | |||
| 2307 | int i; | 2320 | int i; |
| 2308 | 2321 | ||
| 2309 | for (i = 0; i < cachep->num; i++) { | 2322 | for (i = 0; i < cachep->num; i++) { |
| 2310 | void *objp = slabp->s_mem + cachep->buffer_size * i; | 2323 | void *objp = index_to_obj(cachep, slabp, i); |
| 2311 | #if DEBUG | 2324 | #if DEBUG |
| 2312 | /* need to poison the objs? */ | 2325 | /* need to poison the objs? */ |
| 2313 | if (cachep->flags & SLAB_POISON) | 2326 | if (cachep->flags & SLAB_POISON) |
| @@ -2320,9 +2333,9 @@ static void cache_init_objs(struct kmem_cache *cachep, | |||
| 2320 | *dbg_redzone2(cachep, objp) = RED_INACTIVE; | 2333 | *dbg_redzone2(cachep, objp) = RED_INACTIVE; |
| 2321 | } | 2334 | } |
| 2322 | /* | 2335 | /* |
| 2323 | * Constructors are not allowed to allocate memory from | 2336 | * Constructors are not allowed to allocate memory from the same |
| 2324 | * the same cache which they are a constructor for. | 2337 | * cache which they are a constructor for. Otherwise, deadlock. |
| 2325 | * Otherwise, deadlock. They must also be threaded. | 2338 | * They must also be threaded. |
| 2326 | */ | 2339 | */ |
| 2327 | if (cachep->ctor && !(cachep->flags & SLAB_POISON)) | 2340 | if (cachep->ctor && !(cachep->flags & SLAB_POISON)) |
| 2328 | cachep->ctor(objp + obj_offset(cachep), cachep, | 2341 | cachep->ctor(objp + obj_offset(cachep), cachep, |
| @@ -2336,8 +2349,8 @@ static void cache_init_objs(struct kmem_cache *cachep, | |||
| 2336 | slab_error(cachep, "constructor overwrote the" | 2349 | slab_error(cachep, "constructor overwrote the" |
| 2337 | " start of an object"); | 2350 | " start of an object"); |
| 2338 | } | 2351 | } |
| 2339 | if ((cachep->buffer_size % PAGE_SIZE) == 0 && OFF_SLAB(cachep) | 2352 | if ((cachep->buffer_size % PAGE_SIZE) == 0 && |
| 2340 | && cachep->flags & SLAB_POISON) | 2353 | OFF_SLAB(cachep) && cachep->flags & SLAB_POISON) |
| 2341 | kernel_map_pages(virt_to_page(objp), | 2354 | kernel_map_pages(virt_to_page(objp), |
| 2342 | cachep->buffer_size / PAGE_SIZE, 0); | 2355 | cachep->buffer_size / PAGE_SIZE, 0); |
| 2343 | #else | 2356 | #else |
| @@ -2352,18 +2365,16 @@ static void cache_init_objs(struct kmem_cache *cachep, | |||
| 2352 | 2365 | ||
| 2353 | static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags) | 2366 | static void kmem_flagcheck(struct kmem_cache *cachep, gfp_t flags) |
| 2354 | { | 2367 | { |
| 2355 | if (flags & SLAB_DMA) { | 2368 | if (flags & SLAB_DMA) |
| 2356 | if (!(cachep->gfpflags & GFP_DMA)) | 2369 | BUG_ON(!(cachep->gfpflags & GFP_DMA)); |
| 2357 | BUG(); | 2370 | else |
| 2358 | } else { | 2371 | BUG_ON(cachep->gfpflags & GFP_DMA); |
| 2359 | if (cachep->gfpflags & GFP_DMA) | ||
| 2360 | BUG(); | ||
| 2361 | } | ||
| 2362 | } | 2372 | } |
| 2363 | 2373 | ||
| 2364 | static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp, int nodeid) | 2374 | static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp, |
| 2375 | int nodeid) | ||
| 2365 | { | 2376 | { |
| 2366 | void *objp = slabp->s_mem + (slabp->free * cachep->buffer_size); | 2377 | void *objp = index_to_obj(cachep, slabp, slabp->free); |
| 2367 | kmem_bufctl_t next; | 2378 | kmem_bufctl_t next; |
| 2368 | 2379 | ||
| 2369 | slabp->inuse++; | 2380 | slabp->inuse++; |
| @@ -2377,10 +2388,10 @@ static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slabp, int nod | |||
| 2377 | return objp; | 2388 | return objp; |
| 2378 | } | 2389 | } |
| 2379 | 2390 | ||
| 2380 | static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp, void *objp, | 2391 | static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp, |
| 2381 | int nodeid) | 2392 | void *objp, int nodeid) |
| 2382 | { | 2393 | { |
| 2383 | unsigned int objnr = (unsigned)(objp-slabp->s_mem) / cachep->buffer_size; | 2394 | unsigned int objnr = obj_to_index(cachep, slabp, objp); |
| 2384 | 2395 | ||
| 2385 | #if DEBUG | 2396 | #if DEBUG |
| 2386 | /* Verify that the slab belongs to the intended node */ | 2397 | /* Verify that the slab belongs to the intended node */ |
| @@ -2388,7 +2399,7 @@ static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp, void *ob | |||
| 2388 | 2399 | ||
| 2389 | if (slab_bufctl(slabp)[objnr] != BUFCTL_FREE) { | 2400 | if (slab_bufctl(slabp)[objnr] != BUFCTL_FREE) { |
| 2390 | printk(KERN_ERR "slab: double free detected in cache " | 2401 | printk(KERN_ERR "slab: double free detected in cache " |
| 2391 | "'%s', objp %p\n", cachep->name, objp); | 2402 | "'%s', objp %p\n", cachep->name, objp); |
| 2392 | BUG(); | 2403 | BUG(); |
| 2393 | } | 2404 | } |
| 2394 | #endif | 2405 | #endif |
| @@ -2397,14 +2408,18 @@ static void slab_put_obj(struct kmem_cache *cachep, struct slab *slabp, void *ob | |||
| 2397 | slabp->inuse--; | 2408 | slabp->inuse--; |
| 2398 | } | 2409 | } |
| 2399 | 2410 | ||
| 2400 | static void set_slab_attr(struct kmem_cache *cachep, struct slab *slabp, void *objp) | 2411 | static void set_slab_attr(struct kmem_cache *cachep, struct slab *slabp, |
| 2412 | void *objp) | ||
| 2401 | { | 2413 | { |
| 2402 | int i; | 2414 | int i; |
| 2403 | struct page *page; | 2415 | struct page *page; |
| 2404 | 2416 | ||
| 2405 | /* Nasty!!!!!! I hope this is OK. */ | 2417 | /* Nasty!!!!!! I hope this is OK. */ |
| 2406 | i = 1 << cachep->gfporder; | ||
| 2407 | page = virt_to_page(objp); | 2418 | page = virt_to_page(objp); |
| 2419 | |||
| 2420 | i = 1; | ||
| 2421 | if (likely(!PageCompound(page))) | ||
| 2422 | i <<= cachep->gfporder; | ||
| 2408 | do { | 2423 | do { |
| 2409 | page_set_cache(page, cachep); | 2424 | page_set_cache(page, cachep); |
| 2410 | page_set_slab(page, slabp); | 2425 | page_set_slab(page, slabp); |
| @@ -2425,8 +2440,9 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid) | |||
| 2425 | unsigned long ctor_flags; | 2440 | unsigned long ctor_flags; |
| 2426 | struct kmem_list3 *l3; | 2441 | struct kmem_list3 *l3; |
| 2427 | 2442 | ||
| 2428 | /* Be lazy and only check for valid flags here, | 2443 | /* |
| 2429 | * keeping it out of the critical path in kmem_cache_alloc(). | 2444 | * Be lazy and only check for valid flags here, keeping it out of the |
| 2445 | * critical path in kmem_cache_alloc(). | ||
| 2430 | */ | 2446 | */ |
| 2431 | if (flags & ~(SLAB_DMA | SLAB_LEVEL_MASK | SLAB_NO_GROW)) | 2447 | if (flags & ~(SLAB_DMA | SLAB_LEVEL_MASK | SLAB_NO_GROW)) |
| 2432 | BUG(); | 2448 | BUG(); |
| @@ -2467,14 +2483,17 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid) | |||
| 2467 | */ | 2483 | */ |
| 2468 | kmem_flagcheck(cachep, flags); | 2484 | kmem_flagcheck(cachep, flags); |
| 2469 | 2485 | ||
| 2470 | /* Get mem for the objs. | 2486 | /* |
| 2471 | * Attempt to allocate a physical page from 'nodeid', | 2487 | * Get mem for the objs. Attempt to allocate a physical page from |
| 2488 | * 'nodeid'. | ||
| 2472 | */ | 2489 | */ |
| 2473 | if (!(objp = kmem_getpages(cachep, flags, nodeid))) | 2490 | objp = kmem_getpages(cachep, flags, nodeid); |
| 2491 | if (!objp) | ||
| 2474 | goto failed; | 2492 | goto failed; |
| 2475 | 2493 | ||
| 2476 | /* Get slab management. */ | 2494 | /* Get slab management. */ |
| 2477 | if (!(slabp = alloc_slabmgmt(cachep, objp, offset, local_flags))) | 2495 | slabp = alloc_slabmgmt(cachep, objp, offset, local_flags); |
| 2496 | if (!slabp) | ||
| 2478 | goto opps1; | 2497 | goto opps1; |
| 2479 | 2498 | ||
| 2480 | slabp->nodeid = nodeid; | 2499 | slabp->nodeid = nodeid; |
| @@ -2493,9 +2512,9 @@ static int cache_grow(struct kmem_cache *cachep, gfp_t flags, int nodeid) | |||
| 2493 | l3->free_objects += cachep->num; | 2512 | l3->free_objects += cachep->num; |
| 2494 | spin_unlock(&l3->list_lock); | 2513 | spin_unlock(&l3->list_lock); |
| 2495 | return 1; | 2514 | return 1; |
| 2496 | opps1: | 2515 | opps1: |
| 2497 | kmem_freepages(cachep, objp); | 2516 | kmem_freepages(cachep, objp); |
| 2498 | failed: | 2517 | failed: |
| 2499 | if (local_flags & __GFP_WAIT) | 2518 | if (local_flags & __GFP_WAIT) |
| 2500 | local_irq_disable(); | 2519 | local_irq_disable(); |
| 2501 | return 0; | 2520 | return 0; |
| @@ -2538,8 +2557,8 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, | |||
| 2538 | page = virt_to_page(objp); | 2557 | page = virt_to_page(objp); |
| 2539 | 2558 | ||
| 2540 | if (page_get_cache(page) != cachep) { | 2559 | if (page_get_cache(page) != cachep) { |
| 2541 | printk(KERN_ERR | 2560 | printk(KERN_ERR "mismatch in kmem_cache_free: expected " |
| 2542 | "mismatch in kmem_cache_free: expected cache %p, got %p\n", | 2561 | "cache %p, got %p\n", |
| 2543 | page_get_cache(page), cachep); | 2562 | page_get_cache(page), cachep); |
| 2544 | printk(KERN_ERR "%p is %s.\n", cachep, cachep->name); | 2563 | printk(KERN_ERR "%p is %s.\n", cachep, cachep->name); |
| 2545 | printk(KERN_ERR "%p is %s.\n", page_get_cache(page), | 2564 | printk(KERN_ERR "%p is %s.\n", page_get_cache(page), |
| @@ -2549,13 +2568,12 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, | |||
| 2549 | slabp = page_get_slab(page); | 2568 | slabp = page_get_slab(page); |
| 2550 | 2569 | ||
| 2551 | if (cachep->flags & SLAB_RED_ZONE) { | 2570 | if (cachep->flags & SLAB_RED_ZONE) { |
| 2552 | if (*dbg_redzone1(cachep, objp) != RED_ACTIVE | 2571 | if (*dbg_redzone1(cachep, objp) != RED_ACTIVE || |
| 2553 | || *dbg_redzone2(cachep, objp) != RED_ACTIVE) { | 2572 | *dbg_redzone2(cachep, objp) != RED_ACTIVE) { |
| 2554 | slab_error(cachep, | 2573 | slab_error(cachep, "double free, or memory outside" |
| 2555 | "double free, or memory outside" | 2574 | " object was overwritten"); |
| 2556 | " object was overwritten"); | 2575 | printk(KERN_ERR "%p: redzone 1:0x%lx, " |
| 2557 | printk(KERN_ERR | 2576 | "redzone 2:0x%lx.\n", |
| 2558 | "%p: redzone 1: 0x%lx, redzone 2: 0x%lx.\n", | ||
| 2559 | objp, *dbg_redzone1(cachep, objp), | 2577 | objp, *dbg_redzone1(cachep, objp), |
| 2560 | *dbg_redzone2(cachep, objp)); | 2578 | *dbg_redzone2(cachep, objp)); |
| 2561 | } | 2579 | } |
| @@ -2565,15 +2583,16 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, | |||
| 2565 | if (cachep->flags & SLAB_STORE_USER) | 2583 | if (cachep->flags & SLAB_STORE_USER) |
| 2566 | *dbg_userword(cachep, objp) = caller; | 2584 | *dbg_userword(cachep, objp) = caller; |
| 2567 | 2585 | ||
| 2568 | objnr = (unsigned)(objp - slabp->s_mem) / cachep->buffer_size; | 2586 | objnr = obj_to_index(cachep, slabp, objp); |
| 2569 | 2587 | ||
| 2570 | BUG_ON(objnr >= cachep->num); | 2588 | BUG_ON(objnr >= cachep->num); |
| 2571 | BUG_ON(objp != slabp->s_mem + objnr * cachep->buffer_size); | 2589 | BUG_ON(objp != index_to_obj(cachep, slabp, objnr)); |
| 2572 | 2590 | ||
| 2573 | if (cachep->flags & SLAB_DEBUG_INITIAL) { | 2591 | if (cachep->flags & SLAB_DEBUG_INITIAL) { |
| 2574 | /* Need to call the slab's constructor so the | 2592 | /* |
| 2575 | * caller can perform a verify of its state (debugging). | 2593 | * Need to call the slab's constructor so the caller can |
| 2576 | * Called without the cache-lock held. | 2594 | * perform a verify of its state (debugging). Called without |
| 2595 | * the cache-lock held. | ||
| 2577 | */ | 2596 | */ |
| 2578 | cachep->ctor(objp + obj_offset(cachep), | 2597 | cachep->ctor(objp + obj_offset(cachep), |
| 2579 | cachep, SLAB_CTOR_CONSTRUCTOR | SLAB_CTOR_VERIFY); | 2598 | cachep, SLAB_CTOR_CONSTRUCTOR | SLAB_CTOR_VERIFY); |
| @@ -2586,7 +2605,7 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp, | |||
| 2586 | } | 2605 | } |
| 2587 | if (cachep->flags & SLAB_POISON) { | 2606 | if (cachep->flags & SLAB_POISON) { |
| 2588 | #ifdef CONFIG_DEBUG_PAGEALLOC | 2607 | #ifdef CONFIG_DEBUG_PAGEALLOC |
| 2589 | if ((cachep->buffer_size % PAGE_SIZE) == 0 && OFF_SLAB(cachep)) { | 2608 | if ((cachep->buffer_size % PAGE_SIZE)==0 && OFF_SLAB(cachep)) { |
| 2590 | store_stackinfo(cachep, objp, (unsigned long)caller); | 2609 | store_stackinfo(cachep, objp, (unsigned long)caller); |
| 2591 | kernel_map_pages(virt_to_page(objp), | 2610 | kernel_map_pages(virt_to_page(objp), |
| 2592 | cachep->buffer_size / PAGE_SIZE, 0); | 2611 | cachep->buffer_size / PAGE_SIZE, 0); |
| @@ -2612,14 +2631,14 @@ static void check_slabp(struct kmem_cache *cachep, struct slab *slabp) | |||
| 2612 | goto bad; | 2631 | goto bad; |
| 2613 | } | 2632 | } |
| 2614 | if (entries != cachep->num - slabp->inuse) { | 2633 | if (entries != cachep->num - slabp->inuse) { |
| 2615 | bad: | 2634 | bad: |
| 2616 | printk(KERN_ERR | 2635 | printk(KERN_ERR "slab: Internal list corruption detected in " |
| 2617 | "slab: Internal list corruption detected in cache '%s'(%d), slabp %p(%d). Hexdump:\n", | 2636 | "cache '%s'(%d), slabp %p(%d). Hexdump:\n", |
| 2618 | cachep->name, cachep->num, slabp, slabp->inuse); | 2637 | cachep->name, cachep->num, slabp, slabp->inuse); |
| 2619 | for (i = 0; | 2638 | for (i = 0; |
| 2620 | i < sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t); | 2639 | i < sizeof(*slabp) + cachep->num * sizeof(kmem_bufctl_t); |
| 2621 | i++) { | 2640 | i++) { |
| 2622 | if ((i % 16) == 0) | 2641 | if (i % 16 == 0) |
| 2623 | printk("\n%03x:", i); | 2642 | printk("\n%03x:", i); |
| 2624 | printk(" %02x", ((unsigned char *)slabp)[i]); | 2643 | printk(" %02x", ((unsigned char *)slabp)[i]); |
| 2625 | } | 2644 | } |
| @@ -2641,12 +2660,13 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags) | |||
| 2641 | 2660 | ||
| 2642 | check_irq_off(); | 2661 | check_irq_off(); |
| 2643 | ac = cpu_cache_get(cachep); | 2662 | ac = cpu_cache_get(cachep); |
| 2644 | retry: | 2663 | retry: |
| 2645 | batchcount = ac->batchcount; | 2664 | batchcount = ac->batchcount; |
| 2646 | if (!ac->touched && batchcount > BATCHREFILL_LIMIT) { | 2665 | if (!ac->touched && batchcount > BATCHREFILL_LIMIT) { |
| 2647 | /* if there was little recent activity on this | 2666 | /* |
| 2648 | * cache, then perform only a partial refill. | 2667 | * If there was little recent activity on this cache, then |
| 2649 | * Otherwise we could generate refill bouncing. | 2668 | * perform only a partial refill. Otherwise we could generate |
| 2669 | * refill bouncing. | ||
| 2650 | */ | 2670 | */ |
| 2651 | batchcount = BATCHREFILL_LIMIT; | 2671 | batchcount = BATCHREFILL_LIMIT; |
| 2652 | } | 2672 | } |
| @@ -2702,29 +2722,29 @@ static void *cache_alloc_refill(struct kmem_cache *cachep, gfp_t flags) | |||
| 2702 | list_add(&slabp->list, &l3->slabs_partial); | 2722 | list_add(&slabp->list, &l3->slabs_partial); |
| 2703 | } | 2723 | } |
| 2704 | 2724 | ||
| 2705 | must_grow: | 2725 | must_grow: |
| 2706 | l3->free_objects -= ac->avail; | 2726 | l3->free_objects -= ac->avail; |
| 2707 | alloc_done: | 2727 | alloc_done: |
| 2708 | spin_unlock(&l3->list_lock); | 2728 | spin_unlock(&l3->list_lock); |
| 2709 | 2729 | ||
| 2710 | if (unlikely(!ac->avail)) { | 2730 | if (unlikely(!ac->avail)) { |
| 2711 | int x; | 2731 | int x; |
| 2712 | x = cache_grow(cachep, flags, numa_node_id()); | 2732 | x = cache_grow(cachep, flags, numa_node_id()); |
| 2713 | 2733 | ||
| 2714 | // cache_grow can reenable interrupts, then ac could change. | 2734 | /* cache_grow can reenable interrupts, then ac could change. */ |
| 2715 | ac = cpu_cache_get(cachep); | 2735 | ac = cpu_cache_get(cachep); |
| 2716 | if (!x && ac->avail == 0) // no objects in sight? abort | 2736 | if (!x && ac->avail == 0) /* no objects in sight? abort */ |
| 2717 | return NULL; | 2737 | return NULL; |
| 2718 | 2738 | ||
| 2719 | if (!ac->avail) // objects refilled by interrupt? | 2739 | if (!ac->avail) /* objects refilled by interrupt? */ |
| 2720 | goto retry; | 2740 | goto retry; |
| 2721 | } | 2741 | } |
| 2722 | ac->touched = 1; | 2742 | ac->touched = 1; |
| 2723 | return ac->entry[--ac->avail]; | 2743 | return ac->entry[--ac->avail]; |
| 2724 | } | 2744 | } |
| 2725 | 2745 | ||
| 2726 | static inline void | 2746 | static inline void cache_alloc_debugcheck_before(struct kmem_cache *cachep, |
| 2727 | cache_alloc_debugcheck_before(struct kmem_cache *cachep, gfp_t flags) | 2747 | gfp_t flags) |
| 2728 | { | 2748 | { |
| 2729 | might_sleep_if(flags & __GFP_WAIT); | 2749 | might_sleep_if(flags & __GFP_WAIT); |
| 2730 | #if DEBUG | 2750 | #if DEBUG |
| @@ -2733,8 +2753,8 @@ cache_alloc_debugcheck_before(struct kmem_cache *cachep, gfp_t flags) | |||
| 2733 | } | 2753 | } |
| 2734 | 2754 | ||
| 2735 | #if DEBUG | 2755 | #if DEBUG |
| 2736 | static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, gfp_t flags, | 2756 | static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, |
| 2737 | void *objp, void *caller) | 2757 | gfp_t flags, void *objp, void *caller) |
| 2738 | { | 2758 | { |
| 2739 | if (!objp) | 2759 | if (!objp) |
| 2740 | return objp; | 2760 | return objp; |
| @@ -2754,15 +2774,14 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep, gfp_t flags | |||
| 2754 | *dbg_userword(cachep, objp) = caller; | 2774 | *dbg_userword(cachep, objp) = caller; |
| 2755 | 2775 | ||
| 2756 | if (cachep->flags & SLAB_RED_ZONE) { | 2776 | if (cachep->flags & SLAB_RED_ZONE) { |
| 2757 | if (*dbg_redzone1(cachep, objp) != RED_INACTIVE | 2777 | if (*dbg_redzone1(cachep, objp) != RED_INACTIVE || |
| 2758 | || *dbg_redzone2(cachep, objp) != RED_INACTIVE) { | 2778 | *dbg_redzone2(cachep, objp) != RED_INACTIVE) { |
| 2759 | slab_error(cachep, | 2779 | slab_error(cachep, "double free, or memory outside" |
| 2760 | "double free, or memory outside" | 2780 | " object was overwritten"); |
| 2761 | " object was overwritten"); | ||
| 2762 | printk(KERN_ERR | 2781 | printk(KERN_ERR |
| 2763 | "%p: redzone 1: 0x%lx, redzone 2: 0x%lx.\n", | 2782 | "%p: redzone 1:0x%lx, redzone 2:0x%lx\n", |
| 2764 | objp, *dbg_redzone1(cachep, objp), | 2783 | objp, *dbg_redzone1(cachep, objp), |
| 2765 | *dbg_redzone2(cachep, objp)); | 2784 | *dbg_redzone2(cachep, objp)); |
| 2766 | } | 2785 | } |
| 2767 | *dbg_redzone1(cachep, objp) = RED_ACTIVE; | 2786 | *dbg_redzone1(cachep, objp) = RED_ACTIVE; |
| 2768 | *dbg_redzone2(cachep, objp) = RED_ACTIVE; | 2787 | *dbg_redzone2(cachep, objp) = RED_ACTIVE; |
| @@ -2809,8 +2828,8 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) | |||
| 2809 | return objp; | 2828 | return objp; |
| 2810 | } | 2829 | } |
| 2811 | 2830 | ||
| 2812 | static __always_inline void * | 2831 | static __always_inline void *__cache_alloc(struct kmem_cache *cachep, |
| 2813 | __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller) | 2832 | gfp_t flags, void *caller) |
| 2814 | { | 2833 | { |
| 2815 | unsigned long save_flags; | 2834 | unsigned long save_flags; |
| 2816 | void *objp; | 2835 | void *objp; |
| @@ -2830,7 +2849,8 @@ __cache_alloc(struct kmem_cache *cachep, gfp_t flags, void *caller) | |||
| 2830 | /* | 2849 | /* |
| 2831 | * A interface to enable slab creation on nodeid | 2850 | * A interface to enable slab creation on nodeid |
| 2832 | */ | 2851 | */ |
| 2833 | static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) | 2852 | static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, |
| 2853 | int nodeid) | ||
| 2834 | { | 2854 | { |
| 2835 | struct list_head *entry; | 2855 | struct list_head *entry; |
| 2836 | struct slab *slabp; | 2856 | struct slab *slabp; |
| @@ -2841,7 +2861,7 @@ static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int node | |||
| 2841 | l3 = cachep->nodelists[nodeid]; | 2861 | l3 = cachep->nodelists[nodeid]; |
| 2842 | BUG_ON(!l3); | 2862 | BUG_ON(!l3); |
| 2843 | 2863 | ||
| 2844 | retry: | 2864 | retry: |
| 2845 | check_irq_off(); | 2865 | check_irq_off(); |
| 2846 | spin_lock(&l3->list_lock); | 2866 | spin_lock(&l3->list_lock); |
| 2847 | entry = l3->slabs_partial.next; | 2867 | entry = l3->slabs_partial.next; |
| @@ -2868,16 +2888,15 @@ static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int node | |||
| 2868 | /* move slabp to correct slabp list: */ | 2888 | /* move slabp to correct slabp list: */ |
| 2869 | list_del(&slabp->list); | 2889 | list_del(&slabp->list); |
| 2870 | 2890 | ||
| 2871 | if (slabp->free == BUFCTL_END) { | 2891 | if (slabp->free == BUFCTL_END) |
| 2872 | list_add(&slabp->list, &l3->slabs_full); | 2892 | list_add(&slabp->list, &l3->slabs_full); |
| 2873 | } else { | 2893 | else |
| 2874 | list_add(&slabp->list, &l3->slabs_partial); | 2894 | list_add(&slabp->list, &l3->slabs_partial); |
| 2875 | } | ||
| 2876 | 2895 | ||
| 2877 | spin_unlock(&l3->list_lock); | 2896 | spin_unlock(&l3->list_lock); |
| 2878 | goto done; | 2897 | goto done; |
| 2879 | 2898 | ||
| 2880 | must_grow: | 2899 | must_grow: |
| 2881 | spin_unlock(&l3->list_lock); | 2900 | spin_unlock(&l3->list_lock); |
| 2882 | x = cache_grow(cachep, flags, nodeid); | 2901 | x = cache_grow(cachep, flags, nodeid); |
| 2883 | 2902 | ||
| @@ -2885,7 +2904,7 @@ static void *__cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int node | |||
| 2885 | return NULL; | 2904 | return NULL; |
| 2886 | 2905 | ||
| 2887 | goto retry; | 2906 | goto retry; |
| 2888 | done: | 2907 | done: |
| 2889 | return obj; | 2908 | return obj; |
| 2890 | } | 2909 | } |
| 2891 | #endif | 2910 | #endif |
| @@ -2958,7 +2977,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac) | |||
| 2958 | } | 2977 | } |
| 2959 | 2978 | ||
| 2960 | free_block(cachep, ac->entry, batchcount, node); | 2979 | free_block(cachep, ac->entry, batchcount, node); |
| 2961 | free_done: | 2980 | free_done: |
| 2962 | #if STATS | 2981 | #if STATS |
| 2963 | { | 2982 | { |
| 2964 | int i = 0; | 2983 | int i = 0; |
| @@ -2979,16 +2998,12 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac) | |||
| 2979 | #endif | 2998 | #endif |
| 2980 | spin_unlock(&l3->list_lock); | 2999 | spin_unlock(&l3->list_lock); |
| 2981 | ac->avail -= batchcount; | 3000 | ac->avail -= batchcount; |
| 2982 | memmove(ac->entry, &(ac->entry[batchcount]), | 3001 | memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail); |
| 2983 | sizeof(void *) * ac->avail); | ||
| 2984 | } | 3002 | } |
| 2985 | 3003 | ||
| 2986 | /* | 3004 | /* |
| 2987 | * __cache_free | 3005 | * Release an obj back to its cache. If the obj has a constructed state, it must |
| 2988 | * Release an obj back to its cache. If the obj has a constructed | 3006 | * be in this state _before_ it is released. Called with disabled ints. |
| 2989 | * state, it must be in this state _before_ it is released. | ||
| 2990 | * | ||
| 2991 | * Called with disabled ints. | ||
| 2992 | */ | 3007 | */ |
| 2993 | static inline void __cache_free(struct kmem_cache *cachep, void *objp) | 3008 | static inline void __cache_free(struct kmem_cache *cachep, void *objp) |
| 2994 | { | 3009 | { |
| @@ -3007,9 +3022,9 @@ static inline void __cache_free(struct kmem_cache *cachep, void *objp) | |||
| 3007 | if (unlikely(slabp->nodeid != numa_node_id())) { | 3022 | if (unlikely(slabp->nodeid != numa_node_id())) { |
| 3008 | struct array_cache *alien = NULL; | 3023 | struct array_cache *alien = NULL; |
| 3009 | int nodeid = slabp->nodeid; | 3024 | int nodeid = slabp->nodeid; |
| 3010 | struct kmem_list3 *l3 = | 3025 | struct kmem_list3 *l3; |
| 3011 | cachep->nodelists[numa_node_id()]; | ||
| 3012 | 3026 | ||
| 3027 | l3 = cachep->nodelists[numa_node_id()]; | ||
| 3013 | STATS_INC_NODEFREES(cachep); | 3028 | STATS_INC_NODEFREES(cachep); |
| 3014 | if (l3->alien && l3->alien[nodeid]) { | 3029 | if (l3->alien && l3->alien[nodeid]) { |
| 3015 | alien = l3->alien[nodeid]; | 3030 | alien = l3->alien[nodeid]; |
| @@ -3093,7 +3108,7 @@ int fastcall kmem_ptr_validate(struct kmem_cache *cachep, void *ptr) | |||
| 3093 | if (unlikely(page_get_cache(page) != cachep)) | 3108 | if (unlikely(page_get_cache(page) != cachep)) |
| 3094 | goto out; | 3109 | goto out; |
| 3095 | return 1; | 3110 | return 1; |
| 3096 | out: | 3111 | out: |
| 3097 | return 0; | 3112 | return 0; |
| 3098 | } | 3113 | } |
| 3099 | 3114 | ||
| @@ -3119,7 +3134,7 @@ void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid) | |||
| 3119 | local_irq_save(save_flags); | 3134 | local_irq_save(save_flags); |
| 3120 | 3135 | ||
| 3121 | if (nodeid == -1 || nodeid == numa_node_id() || | 3136 | if (nodeid == -1 || nodeid == numa_node_id() || |
| 3122 | !cachep->nodelists[nodeid]) | 3137 | !cachep->nodelists[nodeid]) |
| 3123 | ptr = ____cache_alloc(cachep, flags); | 3138 | ptr = ____cache_alloc(cachep, flags); |
| 3124 | else | 3139 | else |
| 3125 | ptr = __cache_alloc_node(cachep, flags, nodeid); | 3140 | ptr = __cache_alloc_node(cachep, flags, nodeid); |
| @@ -3148,6 +3163,7 @@ EXPORT_SYMBOL(kmalloc_node); | |||
| 3148 | * kmalloc - allocate memory | 3163 | * kmalloc - allocate memory |
| 3149 | * @size: how many bytes of memory are required. | 3164 | * @size: how many bytes of memory are required. |
| 3150 | * @flags: the type of memory to allocate. | 3165 | * @flags: the type of memory to allocate. |
| 3166 | * @caller: function caller for debug tracking of the caller | ||
| 3151 | * | 3167 | * |
| 3152 | * kmalloc is the normal method of allocating memory | 3168 | * kmalloc is the normal method of allocating memory |
| 3153 | * in the kernel. | 3169 | * in the kernel. |
| @@ -3236,7 +3252,7 @@ void *__alloc_percpu(size_t size) | |||
| 3236 | /* Catch derefs w/o wrappers */ | 3252 | /* Catch derefs w/o wrappers */ |
| 3237 | return (void *)(~(unsigned long)pdata); | 3253 | return (void *)(~(unsigned long)pdata); |
| 3238 | 3254 | ||
| 3239 | unwind_oom: | 3255 | unwind_oom: |
| 3240 | while (--i >= 0) { | 3256 | while (--i >= 0) { |
| 3241 | if (!cpu_possible(i)) | 3257 | if (!cpu_possible(i)) |
| 3242 | continue; | 3258 | continue; |
| @@ -3339,18 +3355,20 @@ static int alloc_kmemlist(struct kmem_cache *cachep) | |||
| 3339 | struct array_cache *nc = NULL, *new; | 3355 | struct array_cache *nc = NULL, *new; |
| 3340 | struct array_cache **new_alien = NULL; | 3356 | struct array_cache **new_alien = NULL; |
| 3341 | #ifdef CONFIG_NUMA | 3357 | #ifdef CONFIG_NUMA |
| 3342 | if (!(new_alien = alloc_alien_cache(node, cachep->limit))) | 3358 | new_alien = alloc_alien_cache(node, cachep->limit); |
| 3359 | if (!new_alien) | ||
| 3343 | goto fail; | 3360 | goto fail; |
| 3344 | #endif | 3361 | #endif |
| 3345 | if (!(new = alloc_arraycache(node, (cachep->shared * | 3362 | new = alloc_arraycache(node, cachep->shared*cachep->batchcount, |
| 3346 | cachep->batchcount), | 3363 | 0xbaadf00d); |
| 3347 | 0xbaadf00d))) | 3364 | if (!new) |
| 3348 | goto fail; | 3365 | goto fail; |
| 3349 | if ((l3 = cachep->nodelists[node])) { | 3366 | l3 = cachep->nodelists[node]; |
| 3350 | 3367 | if (l3) { | |
| 3351 | spin_lock_irq(&l3->list_lock); | 3368 | spin_lock_irq(&l3->list_lock); |
| 3352 | 3369 | ||
| 3353 | if ((nc = cachep->nodelists[node]->shared)) | 3370 | nc = cachep->nodelists[node]->shared; |
| 3371 | if (nc) | ||
| 3354 | free_block(cachep, nc->entry, nc->avail, node); | 3372 | free_block(cachep, nc->entry, nc->avail, node); |
| 3355 | 3373 | ||
| 3356 | l3->shared = new; | 3374 | l3->shared = new; |
| @@ -3359,27 +3377,27 @@ static int alloc_kmemlist(struct kmem_cache *cachep) | |||
| 3359 | new_alien = NULL; | 3377 | new_alien = NULL; |
| 3360 | } | 3378 | } |
| 3361 | l3->free_limit = (1 + nr_cpus_node(node)) * | 3379 | l3->free_limit = (1 + nr_cpus_node(node)) * |
| 3362 | cachep->batchcount + cachep->num; | 3380 | cachep->batchcount + cachep->num; |
| 3363 | spin_unlock_irq(&l3->list_lock); | 3381 | spin_unlock_irq(&l3->list_lock); |
| 3364 | kfree(nc); | 3382 | kfree(nc); |
| 3365 | free_alien_cache(new_alien); | 3383 | free_alien_cache(new_alien); |
| 3366 | continue; | 3384 | continue; |
| 3367 | } | 3385 | } |
| 3368 | if (!(l3 = kmalloc_node(sizeof(struct kmem_list3), | 3386 | l3 = kmalloc_node(sizeof(struct kmem_list3), GFP_KERNEL, node); |
| 3369 | GFP_KERNEL, node))) | 3387 | if (!l3) |
| 3370 | goto fail; | 3388 | goto fail; |
| 3371 | 3389 | ||
| 3372 | kmem_list3_init(l3); | 3390 | kmem_list3_init(l3); |
| 3373 | l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + | 3391 | l3->next_reap = jiffies + REAPTIMEOUT_LIST3 + |
| 3374 | ((unsigned long)cachep) % REAPTIMEOUT_LIST3; | 3392 | ((unsigned long)cachep) % REAPTIMEOUT_LIST3; |
| 3375 | l3->shared = new; | 3393 | l3->shared = new; |
| 3376 | l3->alien = new_alien; | 3394 | l3->alien = new_alien; |
| 3377 | l3->free_limit = (1 + nr_cpus_node(node)) * | 3395 | l3->free_limit = (1 + nr_cpus_node(node)) * |
| 3378 | cachep->batchcount + cachep->num; | 3396 | cachep->batchcount + cachep->num; |
| 3379 | cachep->nodelists[node] = l3; | 3397 | cachep->nodelists[node] = l3; |
| 3380 | } | 3398 | } |
| 3381 | return err; | 3399 | return err; |
| 3382 | fail: | 3400 | fail: |
| 3383 | err = -ENOMEM; | 3401 | err = -ENOMEM; |
| 3384 | return err; | 3402 | return err; |
| 3385 | } | 3403 | } |
| @@ -3391,7 +3409,7 @@ struct ccupdate_struct { | |||
| 3391 | 3409 | ||
| 3392 | static void do_ccupdate_local(void *info) | 3410 | static void do_ccupdate_local(void *info) |
| 3393 | { | 3411 | { |
| 3394 | struct ccupdate_struct *new = (struct ccupdate_struct *)info; | 3412 | struct ccupdate_struct *new = info; |
| 3395 | struct array_cache *old; | 3413 | struct array_cache *old; |
| 3396 | 3414 | ||
| 3397 | check_irq_off(); | 3415 | check_irq_off(); |
| @@ -3401,16 +3419,17 @@ static void do_ccupdate_local(void *info) | |||
| 3401 | new->new[smp_processor_id()] = old; | 3419 | new->new[smp_processor_id()] = old; |
| 3402 | } | 3420 | } |
| 3403 | 3421 | ||
| 3404 | static int do_tune_cpucache(struct kmem_cache *cachep, int limit, int batchcount, | 3422 | /* Always called with the cache_chain_mutex held */ |
| 3405 | int shared) | 3423 | static int do_tune_cpucache(struct kmem_cache *cachep, int limit, |
| 3424 | int batchcount, int shared) | ||
| 3406 | { | 3425 | { |
| 3407 | struct ccupdate_struct new; | 3426 | struct ccupdate_struct new; |
| 3408 | int i, err; | 3427 | int i, err; |
| 3409 | 3428 | ||
| 3410 | memset(&new.new, 0, sizeof(new.new)); | 3429 | memset(&new.new, 0, sizeof(new.new)); |
| 3411 | for_each_online_cpu(i) { | 3430 | for_each_online_cpu(i) { |
| 3412 | new.new[i] = | 3431 | new.new[i] = alloc_arraycache(cpu_to_node(i), limit, |
| 3413 | alloc_arraycache(cpu_to_node(i), limit, batchcount); | 3432 | batchcount); |
| 3414 | if (!new.new[i]) { | 3433 | if (!new.new[i]) { |
| 3415 | for (i--; i >= 0; i--) | 3434 | for (i--; i >= 0; i--) |
| 3416 | kfree(new.new[i]); | 3435 | kfree(new.new[i]); |
| @@ -3419,14 +3438,12 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, int batchcount | |||
| 3419 | } | 3438 | } |
| 3420 | new.cachep = cachep; | 3439 | new.cachep = cachep; |
| 3421 | 3440 | ||
| 3422 | smp_call_function_all_cpus(do_ccupdate_local, (void *)&new); | 3441 | on_each_cpu(do_ccupdate_local, (void *)&new, 1, 1); |
| 3423 | 3442 | ||
| 3424 | check_irq_on(); | 3443 | check_irq_on(); |
| 3425 | spin_lock(&cachep->spinlock); | ||
| 3426 | cachep->batchcount = batchcount; | 3444 | cachep->batchcount = batchcount; |
| 3427 | cachep->limit = limit; | 3445 | cachep->limit = limit; |
| 3428 | cachep->shared = shared; | 3446 | cachep->shared = shared; |
| 3429 | spin_unlock(&cachep->spinlock); | ||
| 3430 | 3447 | ||
| 3431 | for_each_online_cpu(i) { | 3448 | for_each_online_cpu(i) { |
| 3432 | struct array_cache *ccold = new.new[i]; | 3449 | struct array_cache *ccold = new.new[i]; |
| @@ -3447,15 +3464,17 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit, int batchcount | |||
| 3447 | return 0; | 3464 | return 0; |
| 3448 | } | 3465 | } |
| 3449 | 3466 | ||
| 3467 | /* Called with cache_chain_mutex held always */ | ||
| 3450 | static void enable_cpucache(struct kmem_cache *cachep) | 3468 | static void enable_cpucache(struct kmem_cache *cachep) |
| 3451 | { | 3469 | { |
| 3452 | int err; | 3470 | int err; |
| 3453 | int limit, shared; | 3471 | int limit, shared; |
| 3454 | 3472 | ||
| 3455 | /* The head array serves three purposes: | 3473 | /* |
| 3474 | * The head array serves three purposes: | ||
| 3456 | * - create a LIFO ordering, i.e. return objects that are cache-warm | 3475 | * - create a LIFO ordering, i.e. return objects that are cache-warm |
| 3457 | * - reduce the number of spinlock operations. | 3476 | * - reduce the number of spinlock operations. |
| 3458 | * - reduce the number of linked list operations on the slab and | 3477 | * - reduce the number of linked list operations on the slab and |
| 3459 | * bufctl chains: array operations are cheaper. | 3478 | * bufctl chains: array operations are cheaper. |
| 3460 | * The numbers are guessed, we should auto-tune as described by | 3479 | * The numbers are guessed, we should auto-tune as described by |
| 3461 | * Bonwick. | 3480 | * Bonwick. |
| @@ -3471,7 +3490,8 @@ static void enable_cpucache(struct kmem_cache *cachep) | |||
| 3471 | else | 3490 | else |
| 3472 | limit = 120; | 3491 | limit = 120; |
| 3473 | 3492 | ||
| 3474 | /* Cpu bound tasks (e.g. network routing) can exhibit cpu bound | 3493 | /* |
| 3494 | * CPU bound tasks (e.g. network routing) can exhibit cpu bound | ||
| 3475 | * allocation behaviour: Most allocs on one cpu, most free operations | 3495 | * allocation behaviour: Most allocs on one cpu, most free operations |
| 3476 | * on another cpu. For these cases, an efficient object passing between | 3496 | * on another cpu. For these cases, an efficient object passing between |
| 3477 | * cpus is necessary. This is provided by a shared array. The array | 3497 | * cpus is necessary. This is provided by a shared array. The array |
| @@ -3486,9 +3506,9 @@ static void enable_cpucache(struct kmem_cache *cachep) | |||
| 3486 | #endif | 3506 | #endif |
| 3487 | 3507 | ||
| 3488 | #if DEBUG | 3508 | #if DEBUG |
| 3489 | /* With debugging enabled, large batchcount lead to excessively | 3509 | /* |
| 3490 | * long periods with disabled local interrupts. Limit the | 3510 | * With debugging enabled, large batchcount lead to excessively long |
| 3491 | * batchcount | 3511 | * periods with disabled local interrupts. Limit the batchcount |
| 3492 | */ | 3512 | */ |
| 3493 | if (limit > 32) | 3513 | if (limit > 32) |
| 3494 | limit = 32; | 3514 | limit = 32; |
| @@ -3499,23 +3519,32 @@ static void enable_cpucache(struct kmem_cache *cachep) | |||
| 3499 | cachep->name, -err); | 3519 | cachep->name, -err); |
| 3500 | } | 3520 | } |
| 3501 | 3521 | ||
| 3502 | static void drain_array_locked(struct kmem_cache *cachep, struct array_cache *ac, | 3522 | /* |
| 3503 | int force, int node) | 3523 | * Drain an array if it contains any elements taking the l3 lock only if |
| 3524 | * necessary. Note that the l3 listlock also protects the array_cache | ||
| 3525 | * if drain_array() is used on the shared array. | ||
| 3526 | */ | ||
| 3527 | void drain_array(struct kmem_cache *cachep, struct kmem_list3 *l3, | ||
| 3528 | struct array_cache *ac, int force, int node) | ||
| 3504 | { | 3529 | { |
| 3505 | int tofree; | 3530 | int tofree; |
| 3506 | 3531 | ||
| 3507 | check_spinlock_acquired_node(cachep, node); | 3532 | if (!ac || !ac->avail) |
| 3533 | return; | ||
| 3508 | if (ac->touched && !force) { | 3534 | if (ac->touched && !force) { |
| 3509 | ac->touched = 0; | 3535 | ac->touched = 0; |
| 3510 | } else if (ac->avail) { | 3536 | } else { |
| 3511 | tofree = force ? ac->avail : (ac->limit + 4) / 5; | 3537 | spin_lock_irq(&l3->list_lock); |
| 3512 | if (tofree > ac->avail) { | 3538 | if (ac->avail) { |
| 3513 | tofree = (ac->avail + 1) / 2; | 3539 | tofree = force ? ac->avail : (ac->limit + 4) / 5; |
| 3540 | if (tofree > ac->avail) | ||
| 3541 | tofree = (ac->avail + 1) / 2; | ||
| 3542 | free_block(cachep, ac->entry, tofree, node); | ||
| 3543 | ac->avail -= tofree; | ||
| 3544 | memmove(ac->entry, &(ac->entry[tofree]), | ||
| 3545 | sizeof(void *) * ac->avail); | ||
| 3514 | } | 3546 | } |
| 3515 | free_block(cachep, ac->entry, tofree, node); | 3547 | spin_unlock_irq(&l3->list_lock); |
| 3516 | ac->avail -= tofree; | ||
| 3517 | memmove(ac->entry, &(ac->entry[tofree]), | ||
| 3518 | sizeof(void *) * ac->avail); | ||
| 3519 | } | 3548 | } |
| 3520 | } | 3549 | } |
| 3521 | 3550 | ||
| @@ -3528,13 +3557,14 @@ static void drain_array_locked(struct kmem_cache *cachep, struct array_cache *ac | |||
| 3528 | * - clear the per-cpu caches for this CPU. | 3557 | * - clear the per-cpu caches for this CPU. |
| 3529 | * - return freeable pages to the main free memory pool. | 3558 | * - return freeable pages to the main free memory pool. |
| 3530 | * | 3559 | * |
| 3531 | * If we cannot acquire the cache chain mutex then just give up - we'll | 3560 | * If we cannot acquire the cache chain mutex then just give up - we'll try |
| 3532 | * try again on the next iteration. | 3561 | * again on the next iteration. |
| 3533 | */ | 3562 | */ |
| 3534 | static void cache_reap(void *unused) | 3563 | static void cache_reap(void *unused) |
| 3535 | { | 3564 | { |
| 3536 | struct list_head *walk; | 3565 | struct list_head *walk; |
| 3537 | struct kmem_list3 *l3; | 3566 | struct kmem_list3 *l3; |
| 3567 | int node = numa_node_id(); | ||
| 3538 | 3568 | ||
| 3539 | if (!mutex_trylock(&cache_chain_mutex)) { | 3569 | if (!mutex_trylock(&cache_chain_mutex)) { |
| 3540 | /* Give up. Setup the next iteration. */ | 3570 | /* Give up. Setup the next iteration. */ |
| @@ -3550,65 +3580,72 @@ static void cache_reap(void *unused) | |||
| 3550 | struct slab *slabp; | 3580 | struct slab *slabp; |
| 3551 | 3581 | ||
| 3552 | searchp = list_entry(walk, struct kmem_cache, next); | 3582 | searchp = list_entry(walk, struct kmem_cache, next); |
| 3553 | |||
| 3554 | if (searchp->flags & SLAB_NO_REAP) | ||
| 3555 | goto next; | ||
| 3556 | |||
| 3557 | check_irq_on(); | 3583 | check_irq_on(); |
| 3558 | 3584 | ||
| 3559 | l3 = searchp->nodelists[numa_node_id()]; | 3585 | /* |
| 3586 | * We only take the l3 lock if absolutely necessary and we | ||
| 3587 | * have established with reasonable certainty that | ||
| 3588 | * we can do some work if the lock was obtained. | ||
| 3589 | */ | ||
| 3590 | l3 = searchp->nodelists[node]; | ||
| 3591 | |||
| 3560 | reap_alien(searchp, l3); | 3592 | reap_alien(searchp, l3); |
| 3561 | spin_lock_irq(&l3->list_lock); | ||
| 3562 | 3593 | ||
| 3563 | drain_array_locked(searchp, cpu_cache_get(searchp), 0, | 3594 | drain_array(searchp, l3, cpu_cache_get(searchp), 0, node); |
| 3564 | numa_node_id()); | ||
| 3565 | 3595 | ||
| 3596 | /* | ||
| 3597 | * These are racy checks but it does not matter | ||
| 3598 | * if we skip one check or scan twice. | ||
| 3599 | */ | ||
| 3566 | if (time_after(l3->next_reap, jiffies)) | 3600 | if (time_after(l3->next_reap, jiffies)) |
| 3567 | goto next_unlock; | 3601 | goto next; |
| 3568 | 3602 | ||
| 3569 | l3->next_reap = jiffies + REAPTIMEOUT_LIST3; | 3603 | l3->next_reap = jiffies + REAPTIMEOUT_LIST3; |
| 3570 | 3604 | ||
| 3571 | if (l3->shared) | 3605 | drain_array(searchp, l3, l3->shared, 0, node); |
| 3572 | drain_array_locked(searchp, l3->shared, 0, | ||
| 3573 | numa_node_id()); | ||
| 3574 | 3606 | ||
| 3575 | if (l3->free_touched) { | 3607 | if (l3->free_touched) { |
| 3576 | l3->free_touched = 0; | 3608 | l3->free_touched = 0; |
| 3577 | goto next_unlock; | 3609 | goto next; |
| 3578 | } | 3610 | } |
| 3579 | 3611 | ||
| 3580 | tofree = | 3612 | tofree = (l3->free_limit + 5 * searchp->num - 1) / |
| 3581 | (l3->free_limit + 5 * searchp->num - | 3613 | (5 * searchp->num); |
| 3582 | 1) / (5 * searchp->num); | ||
| 3583 | do { | 3614 | do { |
| 3615 | /* | ||
| 3616 | * Do not lock if there are no free blocks. | ||
| 3617 | */ | ||
| 3618 | if (list_empty(&l3->slabs_free)) | ||
| 3619 | break; | ||
| 3620 | |||
| 3621 | spin_lock_irq(&l3->list_lock); | ||
| 3584 | p = l3->slabs_free.next; | 3622 | p = l3->slabs_free.next; |
| 3585 | if (p == &(l3->slabs_free)) | 3623 | if (p == &(l3->slabs_free)) { |
| 3624 | spin_unlock_irq(&l3->list_lock); | ||
| 3586 | break; | 3625 | break; |
| 3626 | } | ||
| 3587 | 3627 | ||
| 3588 | slabp = list_entry(p, struct slab, list); | 3628 | slabp = list_entry(p, struct slab, list); |
| 3589 | BUG_ON(slabp->inuse); | 3629 | BUG_ON(slabp->inuse); |
| 3590 | list_del(&slabp->list); | 3630 | list_del(&slabp->list); |
| 3591 | STATS_INC_REAPED(searchp); | 3631 | STATS_INC_REAPED(searchp); |
| 3592 | 3632 | ||
| 3593 | /* Safe to drop the lock. The slab is no longer | 3633 | /* |
| 3594 | * linked to the cache. | 3634 | * Safe to drop the lock. The slab is no longer linked |
| 3595 | * searchp cannot disappear, we hold | 3635 | * to the cache. searchp cannot disappear, we hold |
| 3596 | * cache_chain_lock | 3636 | * cache_chain_lock |
| 3597 | */ | 3637 | */ |
| 3598 | l3->free_objects -= searchp->num; | 3638 | l3->free_objects -= searchp->num; |
| 3599 | spin_unlock_irq(&l3->list_lock); | 3639 | spin_unlock_irq(&l3->list_lock); |
| 3600 | slab_destroy(searchp, slabp); | 3640 | slab_destroy(searchp, slabp); |
| 3601 | spin_lock_irq(&l3->list_lock); | ||
| 3602 | } while (--tofree > 0); | 3641 | } while (--tofree > 0); |
| 3603 | next_unlock: | 3642 | next: |
| 3604 | spin_unlock_irq(&l3->list_lock); | ||
| 3605 | next: | ||
| 3606 | cond_resched(); | 3643 | cond_resched(); |
| 3607 | } | 3644 | } |
| 3608 | check_irq_on(); | 3645 | check_irq_on(); |
| 3609 | mutex_unlock(&cache_chain_mutex); | 3646 | mutex_unlock(&cache_chain_mutex); |
| 3610 | next_reap_node(); | 3647 | next_reap_node(); |
| 3611 | /* Setup the next iteration */ | 3648 | /* Set up the next iteration */ |
| 3612 | schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC); | 3649 | schedule_delayed_work(&__get_cpu_var(reap_work), REAPTIMEOUT_CPUC); |
| 3613 | } | 3650 | } |
| 3614 | 3651 | ||
| @@ -3658,8 +3695,8 @@ static void *s_next(struct seq_file *m, void *p, loff_t *pos) | |||
| 3658 | { | 3695 | { |
| 3659 | struct kmem_cache *cachep = p; | 3696 | struct kmem_cache *cachep = p; |
| 3660 | ++*pos; | 3697 | ++*pos; |
| 3661 | return cachep->next.next == &cache_chain ? NULL | 3698 | return cachep->next.next == &cache_chain ? |
| 3662 | : list_entry(cachep->next.next, struct kmem_cache, next); | 3699 | NULL : list_entry(cachep->next.next, struct kmem_cache, next); |
| 3663 | } | 3700 | } |
| 3664 | 3701 | ||
| 3665 | static void s_stop(struct seq_file *m, void *p) | 3702 | static void s_stop(struct seq_file *m, void *p) |
| @@ -3681,7 +3718,6 @@ static int s_show(struct seq_file *m, void *p) | |||
| 3681 | int node; | 3718 | int node; |
| 3682 | struct kmem_list3 *l3; | 3719 | struct kmem_list3 *l3; |
| 3683 | 3720 | ||
| 3684 | spin_lock(&cachep->spinlock); | ||
| 3685 | active_objs = 0; | 3721 | active_objs = 0; |
| 3686 | num_slabs = 0; | 3722 | num_slabs = 0; |
| 3687 | for_each_online_node(node) { | 3723 | for_each_online_node(node) { |
| @@ -3748,7 +3784,9 @@ static int s_show(struct seq_file *m, void *p) | |||
| 3748 | unsigned long node_frees = cachep->node_frees; | 3784 | unsigned long node_frees = cachep->node_frees; |
| 3749 | 3785 | ||
| 3750 | seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu \ | 3786 | seq_printf(m, " : globalstat %7lu %6lu %5lu %4lu \ |
| 3751 | %4lu %4lu %4lu %4lu", allocs, high, grown, reaped, errors, max_freeable, node_allocs, node_frees); | 3787 | %4lu %4lu %4lu %4lu", allocs, high, grown, |
| 3788 | reaped, errors, max_freeable, node_allocs, | ||
| 3789 | node_frees); | ||
| 3752 | } | 3790 | } |
| 3753 | /* cpu stats */ | 3791 | /* cpu stats */ |
| 3754 | { | 3792 | { |
| @@ -3762,7 +3800,6 @@ static int s_show(struct seq_file *m, void *p) | |||
| 3762 | } | 3800 | } |
| 3763 | #endif | 3801 | #endif |
| 3764 | seq_putc(m, '\n'); | 3802 | seq_putc(m, '\n'); |
| 3765 | spin_unlock(&cachep->spinlock); | ||
| 3766 | return 0; | 3803 | return 0; |
| 3767 | } | 3804 | } |
| 3768 | 3805 | ||
| @@ -3820,13 +3857,12 @@ ssize_t slabinfo_write(struct file *file, const char __user * buffer, | |||
| 3820 | mutex_lock(&cache_chain_mutex); | 3857 | mutex_lock(&cache_chain_mutex); |
| 3821 | res = -EINVAL; | 3858 | res = -EINVAL; |
| 3822 | list_for_each(p, &cache_chain) { | 3859 | list_for_each(p, &cache_chain) { |
| 3823 | struct kmem_cache *cachep = list_entry(p, struct kmem_cache, | 3860 | struct kmem_cache *cachep; |
| 3824 | next); | ||
| 3825 | 3861 | ||
| 3862 | cachep = list_entry(p, struct kmem_cache, next); | ||
| 3826 | if (!strcmp(cachep->name, kbuf)) { | 3863 | if (!strcmp(cachep->name, kbuf)) { |
| 3827 | if (limit < 1 || | 3864 | if (limit < 1 || batchcount < 1 || |
| 3828 | batchcount < 1 || | 3865 | batchcount > limit || shared < 0) { |
| 3829 | batchcount > limit || shared < 0) { | ||
| 3830 | res = 0; | 3866 | res = 0; |
| 3831 | } else { | 3867 | } else { |
| 3832 | res = do_tune_cpucache(cachep, limit, | 3868 | res = do_tune_cpucache(cachep, limit, |
