diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-04-13 16:28:13 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-04-13 16:28:13 -0400 |
commit | bf3a340738bc78008e496257c04fb5a7fc8281e6 (patch) | |
tree | 3e84d21261ff0c437f0ea2507df8c30844150769 /mm | |
parent | 321d03c86732e45f5f33ad0db5b68e2e1364acb9 (diff) | |
parent | 34bf6ef94a835a8f1d8abd3e7d38c6c08d205867 (diff) |
Merge branch 'slab/next' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/linux
Pull slab changes from Pekka Enberg:
"The biggest change is byte-sized freelist indices which reduces slab
freelist memory usage:
https://lkml.org/lkml/2013/12/2/64"
* 'slab/next' of git://git.kernel.org/pub/scm/linux/kernel/git/penberg/linux:
mm: slab/slub: use page->list consistently instead of page->lru
mm/slab.c: cleanup outdated comments and unify variables naming
slab: fix wrongly used macro
slub: fix high order page allocation problem with __GFP_NOFAIL
slab: Make allocations with GFP_ZERO slightly more efficient
slab: make more slab management structure off the slab
slab: introduce byte sized index for the freelist of a slab
slab: restrict the number of objects in a slab
slab: introduce helper functions to get/set free object
slab: factor out calculate nr objects in cache_estimate
Diffstat (limited to 'mm')
-rw-r--r-- | mm/slab.c | 183 | ||||
-rw-r--r-- | mm/slob.c | 10 | ||||
-rw-r--r-- | mm/slub.c | 5 |
3 files changed, 115 insertions, 83 deletions
@@ -157,6 +157,17 @@ | |||
157 | #define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN | 157 | #define ARCH_KMALLOC_FLAGS SLAB_HWCACHE_ALIGN |
158 | #endif | 158 | #endif |
159 | 159 | ||
160 | #define FREELIST_BYTE_INDEX (((PAGE_SIZE >> BITS_PER_BYTE) \ | ||
161 | <= SLAB_OBJ_MIN_SIZE) ? 1 : 0) | ||
162 | |||
163 | #if FREELIST_BYTE_INDEX | ||
164 | typedef unsigned char freelist_idx_t; | ||
165 | #else | ||
166 | typedef unsigned short freelist_idx_t; | ||
167 | #endif | ||
168 | |||
169 | #define SLAB_OBJ_MAX_NUM (1 << sizeof(freelist_idx_t) * BITS_PER_BYTE) | ||
170 | |||
160 | /* | 171 | /* |
161 | * true if a page was allocated from pfmemalloc reserves for network-based | 172 | * true if a page was allocated from pfmemalloc reserves for network-based |
162 | * swap | 173 | * swap |
@@ -277,8 +288,8 @@ static void kmem_cache_node_init(struct kmem_cache_node *parent) | |||
277 | * OTOH the cpuarrays can contain lots of objects, | 288 | * OTOH the cpuarrays can contain lots of objects, |
278 | * which could lock up otherwise freeable slabs. | 289 | * which could lock up otherwise freeable slabs. |
279 | */ | 290 | */ |
280 | #define REAPTIMEOUT_CPUC (2*HZ) | 291 | #define REAPTIMEOUT_AC (2*HZ) |
281 | #define REAPTIMEOUT_LIST3 (4*HZ) | 292 | #define REAPTIMEOUT_NODE (4*HZ) |
282 | 293 | ||
283 | #if STATS | 294 | #if STATS |
284 | #define STATS_INC_ACTIVE(x) ((x)->num_active++) | 295 | #define STATS_INC_ACTIVE(x) ((x)->num_active++) |
@@ -565,9 +576,31 @@ static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) | |||
565 | return cachep->array[smp_processor_id()]; | 576 | return cachep->array[smp_processor_id()]; |
566 | } | 577 | } |
567 | 578 | ||
568 | static size_t slab_mgmt_size(size_t nr_objs, size_t align) | 579 | static int calculate_nr_objs(size_t slab_size, size_t buffer_size, |
580 | size_t idx_size, size_t align) | ||
569 | { | 581 | { |
570 | return ALIGN(nr_objs * sizeof(unsigned int), align); | 582 | int nr_objs; |
583 | size_t freelist_size; | ||
584 | |||
585 | /* | ||
586 | * Ignore padding for the initial guess. The padding | ||
587 | * is at most @align-1 bytes, and @buffer_size is at | ||
588 | * least @align. In the worst case, this result will | ||
589 | * be one greater than the number of objects that fit | ||
590 | * into the memory allocation when taking the padding | ||
591 | * into account. | ||
592 | */ | ||
593 | nr_objs = slab_size / (buffer_size + idx_size); | ||
594 | |||
595 | /* | ||
596 | * This calculated number will be either the right | ||
597 | * amount, or one greater than what we want. | ||
598 | */ | ||
599 | freelist_size = slab_size - nr_objs * buffer_size; | ||
600 | if (freelist_size < ALIGN(nr_objs * idx_size, align)) | ||
601 | nr_objs--; | ||
602 | |||
603 | return nr_objs; | ||
571 | } | 604 | } |
572 | 605 | ||
573 | /* | 606 | /* |
@@ -600,25 +633,9 @@ static void cache_estimate(unsigned long gfporder, size_t buffer_size, | |||
600 | nr_objs = slab_size / buffer_size; | 633 | nr_objs = slab_size / buffer_size; |
601 | 634 | ||
602 | } else { | 635 | } else { |
603 | /* | 636 | nr_objs = calculate_nr_objs(slab_size, buffer_size, |
604 | * Ignore padding for the initial guess. The padding | 637 | sizeof(freelist_idx_t), align); |
605 | * is at most @align-1 bytes, and @buffer_size is at | 638 | mgmt_size = ALIGN(nr_objs * sizeof(freelist_idx_t), align); |
606 | * least @align. In the worst case, this result will | ||
607 | * be one greater than the number of objects that fit | ||
608 | * into the memory allocation when taking the padding | ||
609 | * into account. | ||
610 | */ | ||
611 | nr_objs = (slab_size) / (buffer_size + sizeof(unsigned int)); | ||
612 | |||
613 | /* | ||
614 | * This calculated number will be either the right | ||
615 | * amount, or one greater than what we want. | ||
616 | */ | ||
617 | if (slab_mgmt_size(nr_objs, align) + nr_objs*buffer_size | ||
618 | > slab_size) | ||
619 | nr_objs--; | ||
620 | |||
621 | mgmt_size = slab_mgmt_size(nr_objs, align); | ||
622 | } | 639 | } |
623 | *num = nr_objs; | 640 | *num = nr_objs; |
624 | *left_over = slab_size - nr_objs*buffer_size - mgmt_size; | 641 | *left_over = slab_size - nr_objs*buffer_size - mgmt_size; |
@@ -1067,7 +1084,7 @@ static int init_cache_node_node(int node) | |||
1067 | 1084 | ||
1068 | list_for_each_entry(cachep, &slab_caches, list) { | 1085 | list_for_each_entry(cachep, &slab_caches, list) { |
1069 | /* | 1086 | /* |
1070 | * Set up the size64 kmemlist for cpu before we can | 1087 | * Set up the kmem_cache_node for cpu before we can |
1071 | * begin anything. Make sure some other cpu on this | 1088 | * begin anything. Make sure some other cpu on this |
1072 | * node has not already allocated this | 1089 | * node has not already allocated this |
1073 | */ | 1090 | */ |
@@ -1076,12 +1093,12 @@ static int init_cache_node_node(int node) | |||
1076 | if (!n) | 1093 | if (!n) |
1077 | return -ENOMEM; | 1094 | return -ENOMEM; |
1078 | kmem_cache_node_init(n); | 1095 | kmem_cache_node_init(n); |
1079 | n->next_reap = jiffies + REAPTIMEOUT_LIST3 + | 1096 | n->next_reap = jiffies + REAPTIMEOUT_NODE + |
1080 | ((unsigned long)cachep) % REAPTIMEOUT_LIST3; | 1097 | ((unsigned long)cachep) % REAPTIMEOUT_NODE; |
1081 | 1098 | ||
1082 | /* | 1099 | /* |
1083 | * The l3s don't come and go as CPUs come and | 1100 | * The kmem_cache_nodes don't come and go as CPUs |
1084 | * go. slab_mutex is sufficient | 1101 | * come and go. slab_mutex is sufficient |
1085 | * protection here. | 1102 | * protection here. |
1086 | */ | 1103 | */ |
1087 | cachep->node[node] = n; | 1104 | cachep->node[node] = n; |
@@ -1406,8 +1423,8 @@ static void __init set_up_node(struct kmem_cache *cachep, int index) | |||
1406 | for_each_online_node(node) { | 1423 | for_each_online_node(node) { |
1407 | cachep->node[node] = &init_kmem_cache_node[index + node]; | 1424 | cachep->node[node] = &init_kmem_cache_node[index + node]; |
1408 | cachep->node[node]->next_reap = jiffies + | 1425 | cachep->node[node]->next_reap = jiffies + |
1409 | REAPTIMEOUT_LIST3 + | 1426 | REAPTIMEOUT_NODE + |
1410 | ((unsigned long)cachep) % REAPTIMEOUT_LIST3; | 1427 | ((unsigned long)cachep) % REAPTIMEOUT_NODE; |
1411 | } | 1428 | } |
1412 | } | 1429 | } |
1413 | 1430 | ||
@@ -2010,6 +2027,10 @@ static size_t calculate_slab_order(struct kmem_cache *cachep, | |||
2010 | if (!num) | 2027 | if (!num) |
2011 | continue; | 2028 | continue; |
2012 | 2029 | ||
2030 | /* Can't handle number of objects more than SLAB_OBJ_MAX_NUM */ | ||
2031 | if (num > SLAB_OBJ_MAX_NUM) | ||
2032 | break; | ||
2033 | |||
2013 | if (flags & CFLGS_OFF_SLAB) { | 2034 | if (flags & CFLGS_OFF_SLAB) { |
2014 | /* | 2035 | /* |
2015 | * Max number of objs-per-slab for caches which | 2036 | * Max number of objs-per-slab for caches which |
@@ -2017,7 +2038,7 @@ static size_t calculate_slab_order(struct kmem_cache *cachep, | |||
2017 | * looping condition in cache_grow(). | 2038 | * looping condition in cache_grow(). |
2018 | */ | 2039 | */ |
2019 | offslab_limit = size; | 2040 | offslab_limit = size; |
2020 | offslab_limit /= sizeof(unsigned int); | 2041 | offslab_limit /= sizeof(freelist_idx_t); |
2021 | 2042 | ||
2022 | if (num > offslab_limit) | 2043 | if (num > offslab_limit) |
2023 | break; | 2044 | break; |
@@ -2103,8 +2124,8 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp) | |||
2103 | } | 2124 | } |
2104 | } | 2125 | } |
2105 | cachep->node[numa_mem_id()]->next_reap = | 2126 | cachep->node[numa_mem_id()]->next_reap = |
2106 | jiffies + REAPTIMEOUT_LIST3 + | 2127 | jiffies + REAPTIMEOUT_NODE + |
2107 | ((unsigned long)cachep) % REAPTIMEOUT_LIST3; | 2128 | ((unsigned long)cachep) % REAPTIMEOUT_NODE; |
2108 | 2129 | ||
2109 | cpu_cache_get(cachep)->avail = 0; | 2130 | cpu_cache_get(cachep)->avail = 0; |
2110 | cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES; | 2131 | cpu_cache_get(cachep)->limit = BOOT_CPUCACHE_ENTRIES; |
@@ -2243,7 +2264,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) | |||
2243 | * it too early on. Always use on-slab management when | 2264 | * it too early on. Always use on-slab management when |
2244 | * SLAB_NOLEAKTRACE to avoid recursive calls into kmemleak) | 2265 | * SLAB_NOLEAKTRACE to avoid recursive calls into kmemleak) |
2245 | */ | 2266 | */ |
2246 | if ((size >= (PAGE_SIZE >> 3)) && !slab_early_init && | 2267 | if ((size >= (PAGE_SIZE >> 5)) && !slab_early_init && |
2247 | !(flags & SLAB_NOLEAKTRACE)) | 2268 | !(flags & SLAB_NOLEAKTRACE)) |
2248 | /* | 2269 | /* |
2249 | * Size is large, assume best to place the slab management obj | 2270 | * Size is large, assume best to place the slab management obj |
@@ -2252,6 +2273,12 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) | |||
2252 | flags |= CFLGS_OFF_SLAB; | 2273 | flags |= CFLGS_OFF_SLAB; |
2253 | 2274 | ||
2254 | size = ALIGN(size, cachep->align); | 2275 | size = ALIGN(size, cachep->align); |
2276 | /* | ||
2277 | * We should restrict the number of objects in a slab to implement | ||
2278 | * byte sized index. Refer comment on SLAB_OBJ_MIN_SIZE definition. | ||
2279 | */ | ||
2280 | if (FREELIST_BYTE_INDEX && size < SLAB_OBJ_MIN_SIZE) | ||
2281 | size = ALIGN(SLAB_OBJ_MIN_SIZE, cachep->align); | ||
2255 | 2282 | ||
2256 | left_over = calculate_slab_order(cachep, size, cachep->align, flags); | 2283 | left_over = calculate_slab_order(cachep, size, cachep->align, flags); |
2257 | 2284 | ||
@@ -2259,7 +2286,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) | |||
2259 | return -E2BIG; | 2286 | return -E2BIG; |
2260 | 2287 | ||
2261 | freelist_size = | 2288 | freelist_size = |
2262 | ALIGN(cachep->num * sizeof(unsigned int), cachep->align); | 2289 | ALIGN(cachep->num * sizeof(freelist_idx_t), cachep->align); |
2263 | 2290 | ||
2264 | /* | 2291 | /* |
2265 | * If the slab has been placed off-slab, and we have enough space then | 2292 | * If the slab has been placed off-slab, and we have enough space then |
@@ -2272,7 +2299,7 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) | |||
2272 | 2299 | ||
2273 | if (flags & CFLGS_OFF_SLAB) { | 2300 | if (flags & CFLGS_OFF_SLAB) { |
2274 | /* really off slab. No need for manual alignment */ | 2301 | /* really off slab. No need for manual alignment */ |
2275 | freelist_size = cachep->num * sizeof(unsigned int); | 2302 | freelist_size = cachep->num * sizeof(freelist_idx_t); |
2276 | 2303 | ||
2277 | #ifdef CONFIG_PAGE_POISONING | 2304 | #ifdef CONFIG_PAGE_POISONING |
2278 | /* If we're going to use the generic kernel_map_pages() | 2305 | /* If we're going to use the generic kernel_map_pages() |
@@ -2300,10 +2327,10 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags) | |||
2300 | if (flags & CFLGS_OFF_SLAB) { | 2327 | if (flags & CFLGS_OFF_SLAB) { |
2301 | cachep->freelist_cache = kmalloc_slab(freelist_size, 0u); | 2328 | cachep->freelist_cache = kmalloc_slab(freelist_size, 0u); |
2302 | /* | 2329 | /* |
2303 | * This is a possibility for one of the malloc_sizes caches. | 2330 | * This is a possibility for one of the kmalloc_{dma,}_caches. |
2304 | * But since we go off slab only for object size greater than | 2331 | * But since we go off slab only for object size greater than |
2305 | * PAGE_SIZE/8, and malloc_sizes gets created in ascending order, | 2332 | * PAGE_SIZE/8, and kmalloc_{dma,}_caches get created |
2306 | * this should not happen at all. | 2333 | * in ascending order,this should not happen at all. |
2307 | * But leave a BUG_ON for some lucky dude. | 2334 | * But leave a BUG_ON for some lucky dude. |
2308 | */ | 2335 | */ |
2309 | BUG_ON(ZERO_OR_NULL_PTR(cachep->freelist_cache)); | 2336 | BUG_ON(ZERO_OR_NULL_PTR(cachep->freelist_cache)); |
@@ -2511,14 +2538,17 @@ int __kmem_cache_shutdown(struct kmem_cache *cachep) | |||
2511 | 2538 | ||
2512 | /* | 2539 | /* |
2513 | * Get the memory for a slab management obj. | 2540 | * Get the memory for a slab management obj. |
2514 | * For a slab cache when the slab descriptor is off-slab, slab descriptors | 2541 | * |
2515 | * always come from malloc_sizes caches. The slab descriptor cannot | 2542 | * For a slab cache when the slab descriptor is off-slab, the |
2516 | * come from the same cache which is getting created because, | 2543 | * slab descriptor can't come from the same cache which is being created, |
2517 | * when we are searching for an appropriate cache for these | 2544 | * Because if it is the case, that means we defer the creation of |
2518 | * descriptors in kmem_cache_create, we search through the malloc_sizes array. | 2545 | * the kmalloc_{dma,}_cache of size sizeof(slab descriptor) to this point. |
2519 | * If we are creating a malloc_sizes cache here it would not be visible to | 2546 | * And we eventually call down to __kmem_cache_create(), which |
2520 | * kmem_find_general_cachep till the initialization is complete. | 2547 | * in turn looks up in the kmalloc_{dma,}_caches for the disired-size one. |
2521 | * Hence we cannot have freelist_cache same as the original cache. | 2548 | * This is a "chicken-and-egg" problem. |
2549 | * | ||
2550 | * So the off-slab slab descriptor shall come from the kmalloc_{dma,}_caches, | ||
2551 | * which are all initialized during kmem_cache_init(). | ||
2522 | */ | 2552 | */ |
2523 | static void *alloc_slabmgmt(struct kmem_cache *cachep, | 2553 | static void *alloc_slabmgmt(struct kmem_cache *cachep, |
2524 | struct page *page, int colour_off, | 2554 | struct page *page, int colour_off, |
@@ -2542,9 +2572,15 @@ static void *alloc_slabmgmt(struct kmem_cache *cachep, | |||
2542 | return freelist; | 2572 | return freelist; |
2543 | } | 2573 | } |
2544 | 2574 | ||
2545 | static inline unsigned int *slab_freelist(struct page *page) | 2575 | static inline freelist_idx_t get_free_obj(struct page *page, unsigned char idx) |
2546 | { | 2576 | { |
2547 | return (unsigned int *)(page->freelist); | 2577 | return ((freelist_idx_t *)page->freelist)[idx]; |
2578 | } | ||
2579 | |||
2580 | static inline void set_free_obj(struct page *page, | ||
2581 | unsigned char idx, freelist_idx_t val) | ||
2582 | { | ||
2583 | ((freelist_idx_t *)(page->freelist))[idx] = val; | ||
2548 | } | 2584 | } |
2549 | 2585 | ||
2550 | static void cache_init_objs(struct kmem_cache *cachep, | 2586 | static void cache_init_objs(struct kmem_cache *cachep, |
@@ -2589,7 +2625,7 @@ static void cache_init_objs(struct kmem_cache *cachep, | |||
2589 | if (cachep->ctor) | 2625 | if (cachep->ctor) |
2590 | cachep->ctor(objp); | 2626 | cachep->ctor(objp); |
2591 | #endif | 2627 | #endif |
2592 | slab_freelist(page)[i] = i; | 2628 | set_free_obj(page, i, i); |
2593 | } | 2629 | } |
2594 | } | 2630 | } |
2595 | 2631 | ||
@@ -2608,7 +2644,7 @@ static void *slab_get_obj(struct kmem_cache *cachep, struct page *page, | |||
2608 | { | 2644 | { |
2609 | void *objp; | 2645 | void *objp; |
2610 | 2646 | ||
2611 | objp = index_to_obj(cachep, page, slab_freelist(page)[page->active]); | 2647 | objp = index_to_obj(cachep, page, get_free_obj(page, page->active)); |
2612 | page->active++; | 2648 | page->active++; |
2613 | #if DEBUG | 2649 | #if DEBUG |
2614 | WARN_ON(page_to_nid(virt_to_page(objp)) != nodeid); | 2650 | WARN_ON(page_to_nid(virt_to_page(objp)) != nodeid); |
@@ -2629,7 +2665,7 @@ static void slab_put_obj(struct kmem_cache *cachep, struct page *page, | |||
2629 | 2665 | ||
2630 | /* Verify double free bug */ | 2666 | /* Verify double free bug */ |
2631 | for (i = page->active; i < cachep->num; i++) { | 2667 | for (i = page->active; i < cachep->num; i++) { |
2632 | if (slab_freelist(page)[i] == objnr) { | 2668 | if (get_free_obj(page, i) == objnr) { |
2633 | printk(KERN_ERR "slab: double free detected in cache " | 2669 | printk(KERN_ERR "slab: double free detected in cache " |
2634 | "'%s', objp %p\n", cachep->name, objp); | 2670 | "'%s', objp %p\n", cachep->name, objp); |
2635 | BUG(); | 2671 | BUG(); |
@@ -2637,7 +2673,7 @@ static void slab_put_obj(struct kmem_cache *cachep, struct page *page, | |||
2637 | } | 2673 | } |
2638 | #endif | 2674 | #endif |
2639 | page->active--; | 2675 | page->active--; |
2640 | slab_freelist(page)[page->active] = objnr; | 2676 | set_free_obj(page, page->active, objnr); |
2641 | } | 2677 | } |
2642 | 2678 | ||
2643 | /* | 2679 | /* |
@@ -2886,9 +2922,9 @@ retry: | |||
2886 | /* move slabp to correct slabp list: */ | 2922 | /* move slabp to correct slabp list: */ |
2887 | list_del(&page->lru); | 2923 | list_del(&page->lru); |
2888 | if (page->active == cachep->num) | 2924 | if (page->active == cachep->num) |
2889 | list_add(&page->list, &n->slabs_full); | 2925 | list_add(&page->lru, &n->slabs_full); |
2890 | else | 2926 | else |
2891 | list_add(&page->list, &n->slabs_partial); | 2927 | list_add(&page->lru, &n->slabs_partial); |
2892 | } | 2928 | } |
2893 | 2929 | ||
2894 | must_grow: | 2930 | must_grow: |
@@ -3245,11 +3281,11 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, | |||
3245 | kmemleak_alloc_recursive(ptr, cachep->object_size, 1, cachep->flags, | 3281 | kmemleak_alloc_recursive(ptr, cachep->object_size, 1, cachep->flags, |
3246 | flags); | 3282 | flags); |
3247 | 3283 | ||
3248 | if (likely(ptr)) | 3284 | if (likely(ptr)) { |
3249 | kmemcheck_slab_alloc(cachep, flags, ptr, cachep->object_size); | 3285 | kmemcheck_slab_alloc(cachep, flags, ptr, cachep->object_size); |
3250 | 3286 | if (unlikely(flags & __GFP_ZERO)) | |
3251 | if (unlikely((flags & __GFP_ZERO) && ptr)) | 3287 | memset(ptr, 0, cachep->object_size); |
3252 | memset(ptr, 0, cachep->object_size); | 3288 | } |
3253 | 3289 | ||
3254 | return ptr; | 3290 | return ptr; |
3255 | } | 3291 | } |
@@ -3310,17 +3346,17 @@ slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller) | |||
3310 | flags); | 3346 | flags); |
3311 | prefetchw(objp); | 3347 | prefetchw(objp); |
3312 | 3348 | ||
3313 | if (likely(objp)) | 3349 | if (likely(objp)) { |
3314 | kmemcheck_slab_alloc(cachep, flags, objp, cachep->object_size); | 3350 | kmemcheck_slab_alloc(cachep, flags, objp, cachep->object_size); |
3315 | 3351 | if (unlikely(flags & __GFP_ZERO)) | |
3316 | if (unlikely((flags & __GFP_ZERO) && objp)) | 3352 | memset(objp, 0, cachep->object_size); |
3317 | memset(objp, 0, cachep->object_size); | 3353 | } |
3318 | 3354 | ||
3319 | return objp; | 3355 | return objp; |
3320 | } | 3356 | } |
3321 | 3357 | ||
3322 | /* | 3358 | /* |
3323 | * Caller needs to acquire correct kmem_list's list_lock | 3359 | * Caller needs to acquire correct kmem_cache_node's list_lock |
3324 | */ | 3360 | */ |
3325 | static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, | 3361 | static void free_block(struct kmem_cache *cachep, void **objpp, int nr_objects, |
3326 | int node) | 3362 | int node) |
@@ -3574,11 +3610,6 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags, | |||
3574 | struct kmem_cache *cachep; | 3610 | struct kmem_cache *cachep; |
3575 | void *ret; | 3611 | void *ret; |
3576 | 3612 | ||
3577 | /* If you want to save a few bytes .text space: replace | ||
3578 | * __ with kmem_. | ||
3579 | * Then kmalloc uses the uninlined functions instead of the inline | ||
3580 | * functions. | ||
3581 | */ | ||
3582 | cachep = kmalloc_slab(size, flags); | 3613 | cachep = kmalloc_slab(size, flags); |
3583 | if (unlikely(ZERO_OR_NULL_PTR(cachep))) | 3614 | if (unlikely(ZERO_OR_NULL_PTR(cachep))) |
3584 | return cachep; | 3615 | return cachep; |
@@ -3670,7 +3701,7 @@ EXPORT_SYMBOL(kfree); | |||
3670 | /* | 3701 | /* |
3671 | * This initializes kmem_cache_node or resizes various caches for all nodes. | 3702 | * This initializes kmem_cache_node or resizes various caches for all nodes. |
3672 | */ | 3703 | */ |
3673 | static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp) | 3704 | static int alloc_kmem_cache_node(struct kmem_cache *cachep, gfp_t gfp) |
3674 | { | 3705 | { |
3675 | int node; | 3706 | int node; |
3676 | struct kmem_cache_node *n; | 3707 | struct kmem_cache_node *n; |
@@ -3726,8 +3757,8 @@ static int alloc_kmemlist(struct kmem_cache *cachep, gfp_t gfp) | |||
3726 | } | 3757 | } |
3727 | 3758 | ||
3728 | kmem_cache_node_init(n); | 3759 | kmem_cache_node_init(n); |
3729 | n->next_reap = jiffies + REAPTIMEOUT_LIST3 + | 3760 | n->next_reap = jiffies + REAPTIMEOUT_NODE + |
3730 | ((unsigned long)cachep) % REAPTIMEOUT_LIST3; | 3761 | ((unsigned long)cachep) % REAPTIMEOUT_NODE; |
3731 | n->shared = new_shared; | 3762 | n->shared = new_shared; |
3732 | n->alien = new_alien; | 3763 | n->alien = new_alien; |
3733 | n->free_limit = (1 + nr_cpus_node(node)) * | 3764 | n->free_limit = (1 + nr_cpus_node(node)) * |
@@ -3813,7 +3844,7 @@ static int __do_tune_cpucache(struct kmem_cache *cachep, int limit, | |||
3813 | kfree(ccold); | 3844 | kfree(ccold); |
3814 | } | 3845 | } |
3815 | kfree(new); | 3846 | kfree(new); |
3816 | return alloc_kmemlist(cachep, gfp); | 3847 | return alloc_kmem_cache_node(cachep, gfp); |
3817 | } | 3848 | } |
3818 | 3849 | ||
3819 | static int do_tune_cpucache(struct kmem_cache *cachep, int limit, | 3850 | static int do_tune_cpucache(struct kmem_cache *cachep, int limit, |
@@ -3982,7 +4013,7 @@ static void cache_reap(struct work_struct *w) | |||
3982 | if (time_after(n->next_reap, jiffies)) | 4013 | if (time_after(n->next_reap, jiffies)) |
3983 | goto next; | 4014 | goto next; |
3984 | 4015 | ||
3985 | n->next_reap = jiffies + REAPTIMEOUT_LIST3; | 4016 | n->next_reap = jiffies + REAPTIMEOUT_NODE; |
3986 | 4017 | ||
3987 | drain_array(searchp, n, n->shared, 0, node); | 4018 | drain_array(searchp, n, n->shared, 0, node); |
3988 | 4019 | ||
@@ -4003,7 +4034,7 @@ next: | |||
4003 | next_reap_node(); | 4034 | next_reap_node(); |
4004 | out: | 4035 | out: |
4005 | /* Set up the next iteration */ | 4036 | /* Set up the next iteration */ |
4006 | schedule_delayed_work(work, round_jiffies_relative(REAPTIMEOUT_CPUC)); | 4037 | schedule_delayed_work(work, round_jiffies_relative(REAPTIMEOUT_AC)); |
4007 | } | 4038 | } |
4008 | 4039 | ||
4009 | #ifdef CONFIG_SLABINFO | 4040 | #ifdef CONFIG_SLABINFO |
@@ -4210,7 +4241,7 @@ static void handle_slab(unsigned long *n, struct kmem_cache *c, | |||
4210 | 4241 | ||
4211 | for (j = page->active; j < c->num; j++) { | 4242 | for (j = page->active; j < c->num; j++) { |
4212 | /* Skip freed item */ | 4243 | /* Skip freed item */ |
4213 | if (slab_freelist(page)[j] == i) { | 4244 | if (get_free_obj(page, j) == i) { |
4214 | active = false; | 4245 | active = false; |
4215 | break; | 4246 | break; |
4216 | } | 4247 | } |
@@ -111,13 +111,13 @@ static inline int slob_page_free(struct page *sp) | |||
111 | 111 | ||
112 | static void set_slob_page_free(struct page *sp, struct list_head *list) | 112 | static void set_slob_page_free(struct page *sp, struct list_head *list) |
113 | { | 113 | { |
114 | list_add(&sp->list, list); | 114 | list_add(&sp->lru, list); |
115 | __SetPageSlobFree(sp); | 115 | __SetPageSlobFree(sp); |
116 | } | 116 | } |
117 | 117 | ||
118 | static inline void clear_slob_page_free(struct page *sp) | 118 | static inline void clear_slob_page_free(struct page *sp) |
119 | { | 119 | { |
120 | list_del(&sp->list); | 120 | list_del(&sp->lru); |
121 | __ClearPageSlobFree(sp); | 121 | __ClearPageSlobFree(sp); |
122 | } | 122 | } |
123 | 123 | ||
@@ -282,7 +282,7 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align, int node) | |||
282 | 282 | ||
283 | spin_lock_irqsave(&slob_lock, flags); | 283 | spin_lock_irqsave(&slob_lock, flags); |
284 | /* Iterate through each partially free page, try to find room */ | 284 | /* Iterate through each partially free page, try to find room */ |
285 | list_for_each_entry(sp, slob_list, list) { | 285 | list_for_each_entry(sp, slob_list, lru) { |
286 | #ifdef CONFIG_NUMA | 286 | #ifdef CONFIG_NUMA |
287 | /* | 287 | /* |
288 | * If there's a node specification, search for a partial | 288 | * If there's a node specification, search for a partial |
@@ -296,7 +296,7 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align, int node) | |||
296 | continue; | 296 | continue; |
297 | 297 | ||
298 | /* Attempt to alloc */ | 298 | /* Attempt to alloc */ |
299 | prev = sp->list.prev; | 299 | prev = sp->lru.prev; |
300 | b = slob_page_alloc(sp, size, align); | 300 | b = slob_page_alloc(sp, size, align); |
301 | if (!b) | 301 | if (!b) |
302 | continue; | 302 | continue; |
@@ -322,7 +322,7 @@ static void *slob_alloc(size_t size, gfp_t gfp, int align, int node) | |||
322 | spin_lock_irqsave(&slob_lock, flags); | 322 | spin_lock_irqsave(&slob_lock, flags); |
323 | sp->units = SLOB_UNITS(PAGE_SIZE); | 323 | sp->units = SLOB_UNITS(PAGE_SIZE); |
324 | sp->freelist = b; | 324 | sp->freelist = b; |
325 | INIT_LIST_HEAD(&sp->list); | 325 | INIT_LIST_HEAD(&sp->lru); |
326 | set_slob(b, SLOB_UNITS(PAGE_SIZE), b + SLOB_UNITS(PAGE_SIZE)); | 326 | set_slob(b, SLOB_UNITS(PAGE_SIZE), b + SLOB_UNITS(PAGE_SIZE)); |
327 | set_slob_page_free(sp, slob_list); | 327 | set_slob_page_free(sp, slob_list); |
328 | b = slob_page_alloc(sp, size, align); | 328 | b = slob_page_alloc(sp, size, align); |
@@ -1352,11 +1352,12 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) | |||
1352 | page = alloc_slab_page(alloc_gfp, node, oo); | 1352 | page = alloc_slab_page(alloc_gfp, node, oo); |
1353 | if (unlikely(!page)) { | 1353 | if (unlikely(!page)) { |
1354 | oo = s->min; | 1354 | oo = s->min; |
1355 | alloc_gfp = flags; | ||
1355 | /* | 1356 | /* |
1356 | * Allocation may have failed due to fragmentation. | 1357 | * Allocation may have failed due to fragmentation. |
1357 | * Try a lower order alloc if possible | 1358 | * Try a lower order alloc if possible |
1358 | */ | 1359 | */ |
1359 | page = alloc_slab_page(flags, node, oo); | 1360 | page = alloc_slab_page(alloc_gfp, node, oo); |
1360 | 1361 | ||
1361 | if (page) | 1362 | if (page) |
1362 | stat(s, ORDER_FALLBACK); | 1363 | stat(s, ORDER_FALLBACK); |
@@ -1366,7 +1367,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) | |||
1366 | && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) { | 1367 | && !(s->flags & (SLAB_NOTRACK | DEBUG_DEFAULT_FLAGS))) { |
1367 | int pages = 1 << oo_order(oo); | 1368 | int pages = 1 << oo_order(oo); |
1368 | 1369 | ||
1369 | kmemcheck_alloc_shadow(page, oo_order(oo), flags, node); | 1370 | kmemcheck_alloc_shadow(page, oo_order(oo), alloc_gfp, node); |
1370 | 1371 | ||
1371 | /* | 1372 | /* |
1372 | * Objects from caches that have a constructor don't get | 1373 | * Objects from caches that have a constructor don't get |