diff options
author | Christoph Lameter <cl@linux.com> | 2010-08-20 13:37:15 -0400 |
---|---|---|
committer | Pekka Enberg <penberg@kernel.org> | 2010-10-02 03:24:27 -0400 |
commit | 51df1142816e469173889fb6d6dc810be9b9e022 (patch) | |
tree | e2827e87486675b514c68f06d67ac5980cd6ceb1 /mm | |
parent | 6c182dc0de26ef97efb6a97a8deab074833764e7 (diff) |
slub: Dynamically size kmalloc cache allocations
kmalloc caches are statically defined and may take up a lot of space just
because the sizes of the node array has to be dimensioned for the largest
node count supported.
This patch makes the size of the kmem_cache structure dynamic throughout by
creating a kmem_cache slab cache for the kmem_cache objects. The bootstrap
occurs by allocating the initial one or two kmem_cache objects from the
page allocator.
C2->C3
- Fix various issues indicated by David
- Make create kmalloc_cache return a kmem_cache * pointer.
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Christoph Lameter <cl@linux-foundation.org>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/slub.c | 191 |
1 files changed, 138 insertions, 53 deletions
@@ -168,7 +168,6 @@ static inline int kmem_cache_debug(struct kmem_cache *s) | |||
168 | 168 | ||
169 | /* Internal SLUB flags */ | 169 | /* Internal SLUB flags */ |
170 | #define __OBJECT_POISON 0x80000000UL /* Poison object */ | 170 | #define __OBJECT_POISON 0x80000000UL /* Poison object */ |
171 | #define __SYSFS_ADD_DEFERRED 0x40000000UL /* Not yet visible via sysfs */ | ||
172 | 171 | ||
173 | static int kmem_size = sizeof(struct kmem_cache); | 172 | static int kmem_size = sizeof(struct kmem_cache); |
174 | 173 | ||
@@ -178,7 +177,7 @@ static struct notifier_block slab_notifier; | |||
178 | 177 | ||
179 | static enum { | 178 | static enum { |
180 | DOWN, /* No slab functionality available */ | 179 | DOWN, /* No slab functionality available */ |
181 | PARTIAL, /* kmem_cache_open() works but kmalloc does not */ | 180 | PARTIAL, /* Kmem_cache_node works */ |
182 | UP, /* Everything works but does not show up in sysfs */ | 181 | UP, /* Everything works but does not show up in sysfs */ |
183 | SYSFS /* Sysfs up */ | 182 | SYSFS /* Sysfs up */ |
184 | } slab_state = DOWN; | 183 | } slab_state = DOWN; |
@@ -2073,6 +2072,8 @@ static inline int alloc_kmem_cache_cpus(struct kmem_cache *s) | |||
2073 | } | 2072 | } |
2074 | 2073 | ||
2075 | #ifdef CONFIG_NUMA | 2074 | #ifdef CONFIG_NUMA |
2075 | static struct kmem_cache *kmem_cache_node; | ||
2076 | |||
2076 | /* | 2077 | /* |
2077 | * No kmalloc_node yet so do it by hand. We know that this is the first | 2078 | * No kmalloc_node yet so do it by hand. We know that this is the first |
2078 | * slab on the node for this slabcache. There are no concurrent accesses | 2079 | * slab on the node for this slabcache. There are no concurrent accesses |
@@ -2088,9 +2089,9 @@ static void early_kmem_cache_node_alloc(int node) | |||
2088 | struct kmem_cache_node *n; | 2089 | struct kmem_cache_node *n; |
2089 | unsigned long flags; | 2090 | unsigned long flags; |
2090 | 2091 | ||
2091 | BUG_ON(kmalloc_caches->size < sizeof(struct kmem_cache_node)); | 2092 | BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node)); |
2092 | 2093 | ||
2093 | page = new_slab(kmalloc_caches, GFP_NOWAIT, node); | 2094 | page = new_slab(kmem_cache_node, GFP_NOWAIT, node); |
2094 | 2095 | ||
2095 | BUG_ON(!page); | 2096 | BUG_ON(!page); |
2096 | if (page_to_nid(page) != node) { | 2097 | if (page_to_nid(page) != node) { |
@@ -2102,15 +2103,15 @@ static void early_kmem_cache_node_alloc(int node) | |||
2102 | 2103 | ||
2103 | n = page->freelist; | 2104 | n = page->freelist; |
2104 | BUG_ON(!n); | 2105 | BUG_ON(!n); |
2105 | page->freelist = get_freepointer(kmalloc_caches, n); | 2106 | page->freelist = get_freepointer(kmem_cache_node, n); |
2106 | page->inuse++; | 2107 | page->inuse++; |
2107 | kmalloc_caches->node[node] = n; | 2108 | kmem_cache_node->node[node] = n; |
2108 | #ifdef CONFIG_SLUB_DEBUG | 2109 | #ifdef CONFIG_SLUB_DEBUG |
2109 | init_object(kmalloc_caches, n, 1); | 2110 | init_object(kmem_cache_node, n, 1); |
2110 | init_tracking(kmalloc_caches, n); | 2111 | init_tracking(kmem_cache_node, n); |
2111 | #endif | 2112 | #endif |
2112 | init_kmem_cache_node(n, kmalloc_caches); | 2113 | init_kmem_cache_node(n, kmem_cache_node); |
2113 | inc_slabs_node(kmalloc_caches, node, page->objects); | 2114 | inc_slabs_node(kmem_cache_node, node, page->objects); |
2114 | 2115 | ||
2115 | /* | 2116 | /* |
2116 | * lockdep requires consistent irq usage for each lock | 2117 | * lockdep requires consistent irq usage for each lock |
@@ -2128,8 +2129,10 @@ static void free_kmem_cache_nodes(struct kmem_cache *s) | |||
2128 | 2129 | ||
2129 | for_each_node_state(node, N_NORMAL_MEMORY) { | 2130 | for_each_node_state(node, N_NORMAL_MEMORY) { |
2130 | struct kmem_cache_node *n = s->node[node]; | 2131 | struct kmem_cache_node *n = s->node[node]; |
2132 | |||
2131 | if (n) | 2133 | if (n) |
2132 | kmem_cache_free(kmalloc_caches, n); | 2134 | kmem_cache_free(kmem_cache_node, n); |
2135 | |||
2133 | s->node[node] = NULL; | 2136 | s->node[node] = NULL; |
2134 | } | 2137 | } |
2135 | } | 2138 | } |
@@ -2145,7 +2148,7 @@ static int init_kmem_cache_nodes(struct kmem_cache *s) | |||
2145 | early_kmem_cache_node_alloc(node); | 2148 | early_kmem_cache_node_alloc(node); |
2146 | continue; | 2149 | continue; |
2147 | } | 2150 | } |
2148 | n = kmem_cache_alloc_node(kmalloc_caches, | 2151 | n = kmem_cache_alloc_node(kmem_cache_node, |
2149 | GFP_KERNEL, node); | 2152 | GFP_KERNEL, node); |
2150 | 2153 | ||
2151 | if (!n) { | 2154 | if (!n) { |
@@ -2498,11 +2501,13 @@ EXPORT_SYMBOL(kmem_cache_destroy); | |||
2498 | * Kmalloc subsystem | 2501 | * Kmalloc subsystem |
2499 | *******************************************************************/ | 2502 | *******************************************************************/ |
2500 | 2503 | ||
2501 | struct kmem_cache kmalloc_caches[KMALLOC_CACHES] __cacheline_aligned; | 2504 | struct kmem_cache *kmalloc_caches[SLUB_PAGE_SHIFT]; |
2502 | EXPORT_SYMBOL(kmalloc_caches); | 2505 | EXPORT_SYMBOL(kmalloc_caches); |
2503 | 2506 | ||
2507 | static struct kmem_cache *kmem_cache; | ||
2508 | |||
2504 | #ifdef CONFIG_ZONE_DMA | 2509 | #ifdef CONFIG_ZONE_DMA |
2505 | static struct kmem_cache kmalloc_dma_caches[SLUB_PAGE_SHIFT]; | 2510 | static struct kmem_cache *kmalloc_dma_caches[SLUB_PAGE_SHIFT]; |
2506 | #endif | 2511 | #endif |
2507 | 2512 | ||
2508 | static int __init setup_slub_min_order(char *str) | 2513 | static int __init setup_slub_min_order(char *str) |
@@ -2541,9 +2546,13 @@ static int __init setup_slub_nomerge(char *str) | |||
2541 | 2546 | ||
2542 | __setup("slub_nomerge", setup_slub_nomerge); | 2547 | __setup("slub_nomerge", setup_slub_nomerge); |
2543 | 2548 | ||
2544 | static void create_kmalloc_cache(struct kmem_cache *s, | 2549 | static struct kmem_cache *__init create_kmalloc_cache(const char *name, |
2545 | const char *name, int size, unsigned int flags) | 2550 | int size, unsigned int flags) |
2546 | { | 2551 | { |
2552 | struct kmem_cache *s; | ||
2553 | |||
2554 | s = kmem_cache_alloc(kmem_cache, GFP_NOWAIT); | ||
2555 | |||
2547 | /* | 2556 | /* |
2548 | * This function is called with IRQs disabled during early-boot on | 2557 | * This function is called with IRQs disabled during early-boot on |
2549 | * single CPU so there's no need to take slub_lock here. | 2558 | * single CPU so there's no need to take slub_lock here. |
@@ -2553,12 +2562,11 @@ static void create_kmalloc_cache(struct kmem_cache *s, | |||
2553 | goto panic; | 2562 | goto panic; |
2554 | 2563 | ||
2555 | list_add(&s->list, &slab_caches); | 2564 | list_add(&s->list, &slab_caches); |
2556 | 2565 | return s; | |
2557 | if (!sysfs_slab_add(s)) | ||
2558 | return; | ||
2559 | 2566 | ||
2560 | panic: | 2567 | panic: |
2561 | panic("Creation of kmalloc slab %s size=%d failed.\n", name, size); | 2568 | panic("Creation of kmalloc slab %s size=%d failed.\n", name, size); |
2569 | return NULL; | ||
2562 | } | 2570 | } |
2563 | 2571 | ||
2564 | /* | 2572 | /* |
@@ -2613,10 +2621,10 @@ static struct kmem_cache *get_slab(size_t size, gfp_t flags) | |||
2613 | 2621 | ||
2614 | #ifdef CONFIG_ZONE_DMA | 2622 | #ifdef CONFIG_ZONE_DMA |
2615 | if (unlikely((flags & SLUB_DMA))) | 2623 | if (unlikely((flags & SLUB_DMA))) |
2616 | return &kmalloc_dma_caches[index]; | 2624 | return kmalloc_dma_caches[index]; |
2617 | 2625 | ||
2618 | #endif | 2626 | #endif |
2619 | return &kmalloc_caches[index]; | 2627 | return kmalloc_caches[index]; |
2620 | } | 2628 | } |
2621 | 2629 | ||
2622 | void *__kmalloc(size_t size, gfp_t flags) | 2630 | void *__kmalloc(size_t size, gfp_t flags) |
@@ -2940,46 +2948,113 @@ static int slab_memory_callback(struct notifier_block *self, | |||
2940 | * Basic setup of slabs | 2948 | * Basic setup of slabs |
2941 | *******************************************************************/ | 2949 | *******************************************************************/ |
2942 | 2950 | ||
2951 | /* | ||
2952 | * Used for early kmem_cache structures that were allocated using | ||
2953 | * the page allocator | ||
2954 | */ | ||
2955 | |||
2956 | static void __init kmem_cache_bootstrap_fixup(struct kmem_cache *s) | ||
2957 | { | ||
2958 | int node; | ||
2959 | |||
2960 | list_add(&s->list, &slab_caches); | ||
2961 | s->refcount = -1; | ||
2962 | |||
2963 | for_each_node_state(node, N_NORMAL_MEMORY) { | ||
2964 | struct kmem_cache_node *n = get_node(s, node); | ||
2965 | struct page *p; | ||
2966 | |||
2967 | if (n) { | ||
2968 | list_for_each_entry(p, &n->partial, lru) | ||
2969 | p->slab = s; | ||
2970 | |||
2971 | #ifdef CONFIG_SLAB_DEBUG | ||
2972 | list_for_each_entry(p, &n->full, lru) | ||
2973 | p->slab = s; | ||
2974 | #endif | ||
2975 | } | ||
2976 | } | ||
2977 | } | ||
2978 | |||
2943 | void __init kmem_cache_init(void) | 2979 | void __init kmem_cache_init(void) |
2944 | { | 2980 | { |
2945 | int i; | 2981 | int i; |
2946 | int caches = 0; | 2982 | int caches = 0; |
2983 | struct kmem_cache *temp_kmem_cache; | ||
2984 | int order; | ||
2947 | 2985 | ||
2948 | #ifdef CONFIG_NUMA | 2986 | #ifdef CONFIG_NUMA |
2987 | struct kmem_cache *temp_kmem_cache_node; | ||
2988 | unsigned long kmalloc_size; | ||
2989 | |||
2990 | kmem_size = offsetof(struct kmem_cache, node) + | ||
2991 | nr_node_ids * sizeof(struct kmem_cache_node *); | ||
2992 | |||
2993 | /* Allocate two kmem_caches from the page allocator */ | ||
2994 | kmalloc_size = ALIGN(kmem_size, cache_line_size()); | ||
2995 | order = get_order(2 * kmalloc_size); | ||
2996 | kmem_cache = (void *)__get_free_pages(GFP_NOWAIT, order); | ||
2997 | |||
2949 | /* | 2998 | /* |
2950 | * Must first have the slab cache available for the allocations of the | 2999 | * Must first have the slab cache available for the allocations of the |
2951 | * struct kmem_cache_node's. There is special bootstrap code in | 3000 | * struct kmem_cache_node's. There is special bootstrap code in |
2952 | * kmem_cache_open for slab_state == DOWN. | 3001 | * kmem_cache_open for slab_state == DOWN. |
2953 | */ | 3002 | */ |
2954 | create_kmalloc_cache(&kmalloc_caches[0], "kmem_cache_node", | 3003 | kmem_cache_node = (void *)kmem_cache + kmalloc_size; |
2955 | sizeof(struct kmem_cache_node), 0); | 3004 | |
2956 | kmalloc_caches[0].refcount = -1; | 3005 | kmem_cache_open(kmem_cache_node, "kmem_cache_node", |
2957 | caches++; | 3006 | sizeof(struct kmem_cache_node), |
3007 | 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); | ||
2958 | 3008 | ||
2959 | hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI); | 3009 | hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI); |
3010 | #else | ||
3011 | /* Allocate a single kmem_cache from the page allocator */ | ||
3012 | kmem_size = sizeof(struct kmem_cache); | ||
3013 | order = get_order(kmem_size); | ||
3014 | kmem_cache = (void *)__get_free_pages(GFP_NOWAIT, order); | ||
2960 | #endif | 3015 | #endif |
2961 | 3016 | ||
2962 | /* Able to allocate the per node structures */ | 3017 | /* Able to allocate the per node structures */ |
2963 | slab_state = PARTIAL; | 3018 | slab_state = PARTIAL; |
2964 | 3019 | ||
2965 | /* Caches that are not of the two-to-the-power-of size */ | 3020 | temp_kmem_cache = kmem_cache; |
2966 | if (KMALLOC_MIN_SIZE <= 32) { | 3021 | kmem_cache_open(kmem_cache, "kmem_cache", kmem_size, |
2967 | create_kmalloc_cache(&kmalloc_caches[1], | 3022 | 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); |
2968 | "kmalloc-96", 96, 0); | 3023 | kmem_cache = kmem_cache_alloc(kmem_cache, GFP_NOWAIT); |
2969 | caches++; | 3024 | memcpy(kmem_cache, temp_kmem_cache, kmem_size); |
2970 | } | ||
2971 | if (KMALLOC_MIN_SIZE <= 64) { | ||
2972 | create_kmalloc_cache(&kmalloc_caches[2], | ||
2973 | "kmalloc-192", 192, 0); | ||
2974 | caches++; | ||
2975 | } | ||
2976 | 3025 | ||
2977 | for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) { | 3026 | #ifdef CONFIG_NUMA |
2978 | create_kmalloc_cache(&kmalloc_caches[i], | 3027 | /* |
2979 | "kmalloc", 1 << i, 0); | 3028 | * Allocate kmem_cache_node properly from the kmem_cache slab. |
2980 | caches++; | 3029 | * kmem_cache_node is separately allocated so no need to |
2981 | } | 3030 | * update any list pointers. |
3031 | */ | ||
3032 | temp_kmem_cache_node = kmem_cache_node; | ||
2982 | 3033 | ||
3034 | kmem_cache_node = kmem_cache_alloc(kmem_cache, GFP_NOWAIT); | ||
3035 | memcpy(kmem_cache_node, temp_kmem_cache_node, kmem_size); | ||
3036 | |||
3037 | kmem_cache_bootstrap_fixup(kmem_cache_node); | ||
3038 | |||
3039 | caches++; | ||
3040 | #else | ||
3041 | /* | ||
3042 | * kmem_cache has kmem_cache_node embedded and we moved it! | ||
3043 | * Update the list heads | ||
3044 | */ | ||
3045 | INIT_LIST_HEAD(&kmem_cache->local_node.partial); | ||
3046 | list_splice(&temp_kmem_cache->local_node.partial, &kmem_cache->local_node.partial); | ||
3047 | #ifdef CONFIG_SLUB_DEBUG | ||
3048 | INIT_LIST_HEAD(&kmem_cache->local_node.full); | ||
3049 | list_splice(&temp_kmem_cache->local_node.full, &kmem_cache->local_node.full); | ||
3050 | #endif | ||
3051 | #endif | ||
3052 | kmem_cache_bootstrap_fixup(kmem_cache); | ||
3053 | caches++; | ||
3054 | /* Free temporary boot structure */ | ||
3055 | free_pages((unsigned long)temp_kmem_cache, order); | ||
3056 | |||
3057 | /* Now we can use the kmem_cache to allocate kmalloc slabs */ | ||
2983 | 3058 | ||
2984 | /* | 3059 | /* |
2985 | * Patch up the size_index table if we have strange large alignment | 3060 | * Patch up the size_index table if we have strange large alignment |
@@ -3019,6 +3094,22 @@ void __init kmem_cache_init(void) | |||
3019 | size_index[size_index_elem(i)] = 8; | 3094 | size_index[size_index_elem(i)] = 8; |
3020 | } | 3095 | } |
3021 | 3096 | ||
3097 | /* Caches that are not of the two-to-the-power-of size */ | ||
3098 | if (KMALLOC_MIN_SIZE <= 32) { | ||
3099 | kmalloc_caches[1] = create_kmalloc_cache("kmalloc-96", 96, 0); | ||
3100 | caches++; | ||
3101 | } | ||
3102 | |||
3103 | if (KMALLOC_MIN_SIZE <= 64) { | ||
3104 | kmalloc_caches[2] = create_kmalloc_cache("kmalloc-192", 192, 0); | ||
3105 | caches++; | ||
3106 | } | ||
3107 | |||
3108 | for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) { | ||
3109 | kmalloc_caches[i] = create_kmalloc_cache("kmalloc", 1 << i, 0); | ||
3110 | caches++; | ||
3111 | } | ||
3112 | |||
3022 | slab_state = UP; | 3113 | slab_state = UP; |
3023 | 3114 | ||
3024 | /* Provide the correct kmalloc names now that the caches are up */ | 3115 | /* Provide the correct kmalloc names now that the caches are up */ |
@@ -3026,30 +3117,24 @@ void __init kmem_cache_init(void) | |||
3026 | char *s = kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i); | 3117 | char *s = kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i); |
3027 | 3118 | ||
3028 | BUG_ON(!s); | 3119 | BUG_ON(!s); |
3029 | kmalloc_caches[i].name = s; | 3120 | kmalloc_caches[i]->name = s; |
3030 | } | 3121 | } |
3031 | 3122 | ||
3032 | #ifdef CONFIG_SMP | 3123 | #ifdef CONFIG_SMP |
3033 | register_cpu_notifier(&slab_notifier); | 3124 | register_cpu_notifier(&slab_notifier); |
3034 | #endif | 3125 | #endif |
3035 | #ifdef CONFIG_NUMA | ||
3036 | kmem_size = offsetof(struct kmem_cache, node) + | ||
3037 | nr_node_ids * sizeof(struct kmem_cache_node *); | ||
3038 | #else | ||
3039 | kmem_size = sizeof(struct kmem_cache); | ||
3040 | #endif | ||
3041 | 3126 | ||
3042 | #ifdef CONFIG_ZONE_DMA | 3127 | #ifdef CONFIG_ZONE_DMA |
3043 | for (i = 1; i < SLUB_PAGE_SHIFT; i++) { | 3128 | for (i = 0; i < SLUB_PAGE_SHIFT; i++) { |
3044 | struct kmem_cache *s = &kmalloc_caches[i]; | 3129 | struct kmem_cache *s = kmalloc_caches[i]; |
3045 | 3130 | ||
3046 | if (s->size) { | 3131 | if (s && s->size) { |
3047 | char *name = kasprintf(GFP_NOWAIT, | 3132 | char *name = kasprintf(GFP_NOWAIT, |
3048 | "dma-kmalloc-%d", s->objsize); | 3133 | "dma-kmalloc-%d", s->objsize); |
3049 | 3134 | ||
3050 | BUG_ON(!name); | 3135 | BUG_ON(!name); |
3051 | create_kmalloc_cache(&kmalloc_dma_caches[i], | 3136 | kmalloc_dma_caches[i] = create_kmalloc_cache(name, |
3052 | name, s->objsize, SLAB_CACHE_DMA); | 3137 | s->objsize, SLAB_CACHE_DMA); |
3053 | } | 3138 | } |
3054 | } | 3139 | } |
3055 | #endif | 3140 | #endif |