aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorChristoph Lameter <cl@linux.com>2010-08-20 13:37:15 -0400
committerPekka Enberg <penberg@kernel.org>2010-10-02 03:24:27 -0400
commit51df1142816e469173889fb6d6dc810be9b9e022 (patch)
treee2827e87486675b514c68f06d67ac5980cd6ceb1 /mm
parent6c182dc0de26ef97efb6a97a8deab074833764e7 (diff)
slub: Dynamically size kmalloc cache allocations
kmalloc caches are statically defined and may take up a lot of space just because the sizes of the node array has to be dimensioned for the largest node count supported. This patch makes the size of the kmem_cache structure dynamic throughout by creating a kmem_cache slab cache for the kmem_cache objects. The bootstrap occurs by allocating the initial one or two kmem_cache objects from the page allocator. C2->C3 - Fix various issues indicated by David - Make create kmalloc_cache return a kmem_cache * pointer. Acked-by: David Rientjes <rientjes@google.com> Signed-off-by: Christoph Lameter <cl@linux-foundation.org> Signed-off-by: Pekka Enberg <penberg@kernel.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/slub.c191
1 files changed, 138 insertions, 53 deletions
diff --git a/mm/slub.c b/mm/slub.c
index e8c117595367..94fee96da0d2 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -168,7 +168,6 @@ static inline int kmem_cache_debug(struct kmem_cache *s)
168 168
169/* Internal SLUB flags */ 169/* Internal SLUB flags */
170#define __OBJECT_POISON 0x80000000UL /* Poison object */ 170#define __OBJECT_POISON 0x80000000UL /* Poison object */
171#define __SYSFS_ADD_DEFERRED 0x40000000UL /* Not yet visible via sysfs */
172 171
173static int kmem_size = sizeof(struct kmem_cache); 172static int kmem_size = sizeof(struct kmem_cache);
174 173
@@ -178,7 +177,7 @@ static struct notifier_block slab_notifier;
178 177
179static enum { 178static enum {
180 DOWN, /* No slab functionality available */ 179 DOWN, /* No slab functionality available */
181 PARTIAL, /* kmem_cache_open() works but kmalloc does not */ 180 PARTIAL, /* Kmem_cache_node works */
182 UP, /* Everything works but does not show up in sysfs */ 181 UP, /* Everything works but does not show up in sysfs */
183 SYSFS /* Sysfs up */ 182 SYSFS /* Sysfs up */
184} slab_state = DOWN; 183} slab_state = DOWN;
@@ -2073,6 +2072,8 @@ static inline int alloc_kmem_cache_cpus(struct kmem_cache *s)
2073} 2072}
2074 2073
2075#ifdef CONFIG_NUMA 2074#ifdef CONFIG_NUMA
2075static struct kmem_cache *kmem_cache_node;
2076
2076/* 2077/*
2077 * No kmalloc_node yet so do it by hand. We know that this is the first 2078 * No kmalloc_node yet so do it by hand. We know that this is the first
2078 * slab on the node for this slabcache. There are no concurrent accesses 2079 * slab on the node for this slabcache. There are no concurrent accesses
@@ -2088,9 +2089,9 @@ static void early_kmem_cache_node_alloc(int node)
2088 struct kmem_cache_node *n; 2089 struct kmem_cache_node *n;
2089 unsigned long flags; 2090 unsigned long flags;
2090 2091
2091 BUG_ON(kmalloc_caches->size < sizeof(struct kmem_cache_node)); 2092 BUG_ON(kmem_cache_node->size < sizeof(struct kmem_cache_node));
2092 2093
2093 page = new_slab(kmalloc_caches, GFP_NOWAIT, node); 2094 page = new_slab(kmem_cache_node, GFP_NOWAIT, node);
2094 2095
2095 BUG_ON(!page); 2096 BUG_ON(!page);
2096 if (page_to_nid(page) != node) { 2097 if (page_to_nid(page) != node) {
@@ -2102,15 +2103,15 @@ static void early_kmem_cache_node_alloc(int node)
2102 2103
2103 n = page->freelist; 2104 n = page->freelist;
2104 BUG_ON(!n); 2105 BUG_ON(!n);
2105 page->freelist = get_freepointer(kmalloc_caches, n); 2106 page->freelist = get_freepointer(kmem_cache_node, n);
2106 page->inuse++; 2107 page->inuse++;
2107 kmalloc_caches->node[node] = n; 2108 kmem_cache_node->node[node] = n;
2108#ifdef CONFIG_SLUB_DEBUG 2109#ifdef CONFIG_SLUB_DEBUG
2109 init_object(kmalloc_caches, n, 1); 2110 init_object(kmem_cache_node, n, 1);
2110 init_tracking(kmalloc_caches, n); 2111 init_tracking(kmem_cache_node, n);
2111#endif 2112#endif
2112 init_kmem_cache_node(n, kmalloc_caches); 2113 init_kmem_cache_node(n, kmem_cache_node);
2113 inc_slabs_node(kmalloc_caches, node, page->objects); 2114 inc_slabs_node(kmem_cache_node, node, page->objects);
2114 2115
2115 /* 2116 /*
2116 * lockdep requires consistent irq usage for each lock 2117 * lockdep requires consistent irq usage for each lock
@@ -2128,8 +2129,10 @@ static void free_kmem_cache_nodes(struct kmem_cache *s)
2128 2129
2129 for_each_node_state(node, N_NORMAL_MEMORY) { 2130 for_each_node_state(node, N_NORMAL_MEMORY) {
2130 struct kmem_cache_node *n = s->node[node]; 2131 struct kmem_cache_node *n = s->node[node];
2132
2131 if (n) 2133 if (n)
2132 kmem_cache_free(kmalloc_caches, n); 2134 kmem_cache_free(kmem_cache_node, n);
2135
2133 s->node[node] = NULL; 2136 s->node[node] = NULL;
2134 } 2137 }
2135} 2138}
@@ -2145,7 +2148,7 @@ static int init_kmem_cache_nodes(struct kmem_cache *s)
2145 early_kmem_cache_node_alloc(node); 2148 early_kmem_cache_node_alloc(node);
2146 continue; 2149 continue;
2147 } 2150 }
2148 n = kmem_cache_alloc_node(kmalloc_caches, 2151 n = kmem_cache_alloc_node(kmem_cache_node,
2149 GFP_KERNEL, node); 2152 GFP_KERNEL, node);
2150 2153
2151 if (!n) { 2154 if (!n) {
@@ -2498,11 +2501,13 @@ EXPORT_SYMBOL(kmem_cache_destroy);
2498 * Kmalloc subsystem 2501 * Kmalloc subsystem
2499 *******************************************************************/ 2502 *******************************************************************/
2500 2503
2501struct kmem_cache kmalloc_caches[KMALLOC_CACHES] __cacheline_aligned; 2504struct kmem_cache *kmalloc_caches[SLUB_PAGE_SHIFT];
2502EXPORT_SYMBOL(kmalloc_caches); 2505EXPORT_SYMBOL(kmalloc_caches);
2503 2506
2507static struct kmem_cache *kmem_cache;
2508
2504#ifdef CONFIG_ZONE_DMA 2509#ifdef CONFIG_ZONE_DMA
2505static struct kmem_cache kmalloc_dma_caches[SLUB_PAGE_SHIFT]; 2510static struct kmem_cache *kmalloc_dma_caches[SLUB_PAGE_SHIFT];
2506#endif 2511#endif
2507 2512
2508static int __init setup_slub_min_order(char *str) 2513static int __init setup_slub_min_order(char *str)
@@ -2541,9 +2546,13 @@ static int __init setup_slub_nomerge(char *str)
2541 2546
2542__setup("slub_nomerge", setup_slub_nomerge); 2547__setup("slub_nomerge", setup_slub_nomerge);
2543 2548
2544static void create_kmalloc_cache(struct kmem_cache *s, 2549static struct kmem_cache *__init create_kmalloc_cache(const char *name,
2545 const char *name, int size, unsigned int flags) 2550 int size, unsigned int flags)
2546{ 2551{
2552 struct kmem_cache *s;
2553
2554 s = kmem_cache_alloc(kmem_cache, GFP_NOWAIT);
2555
2547 /* 2556 /*
2548 * This function is called with IRQs disabled during early-boot on 2557 * This function is called with IRQs disabled during early-boot on
2549 * single CPU so there's no need to take slub_lock here. 2558 * single CPU so there's no need to take slub_lock here.
@@ -2553,12 +2562,11 @@ static void create_kmalloc_cache(struct kmem_cache *s,
2553 goto panic; 2562 goto panic;
2554 2563
2555 list_add(&s->list, &slab_caches); 2564 list_add(&s->list, &slab_caches);
2556 2565 return s;
2557 if (!sysfs_slab_add(s))
2558 return;
2559 2566
2560panic: 2567panic:
2561 panic("Creation of kmalloc slab %s size=%d failed.\n", name, size); 2568 panic("Creation of kmalloc slab %s size=%d failed.\n", name, size);
2569 return NULL;
2562} 2570}
2563 2571
2564/* 2572/*
@@ -2613,10 +2621,10 @@ static struct kmem_cache *get_slab(size_t size, gfp_t flags)
2613 2621
2614#ifdef CONFIG_ZONE_DMA 2622#ifdef CONFIG_ZONE_DMA
2615 if (unlikely((flags & SLUB_DMA))) 2623 if (unlikely((flags & SLUB_DMA)))
2616 return &kmalloc_dma_caches[index]; 2624 return kmalloc_dma_caches[index];
2617 2625
2618#endif 2626#endif
2619 return &kmalloc_caches[index]; 2627 return kmalloc_caches[index];
2620} 2628}
2621 2629
2622void *__kmalloc(size_t size, gfp_t flags) 2630void *__kmalloc(size_t size, gfp_t flags)
@@ -2940,46 +2948,113 @@ static int slab_memory_callback(struct notifier_block *self,
2940 * Basic setup of slabs 2948 * Basic setup of slabs
2941 *******************************************************************/ 2949 *******************************************************************/
2942 2950
2951/*
2952 * Used for early kmem_cache structures that were allocated using
2953 * the page allocator
2954 */
2955
2956static void __init kmem_cache_bootstrap_fixup(struct kmem_cache *s)
2957{
2958 int node;
2959
2960 list_add(&s->list, &slab_caches);
2961 s->refcount = -1;
2962
2963 for_each_node_state(node, N_NORMAL_MEMORY) {
2964 struct kmem_cache_node *n = get_node(s, node);
2965 struct page *p;
2966
2967 if (n) {
2968 list_for_each_entry(p, &n->partial, lru)
2969 p->slab = s;
2970
2971#ifdef CONFIG_SLAB_DEBUG
2972 list_for_each_entry(p, &n->full, lru)
2973 p->slab = s;
2974#endif
2975 }
2976 }
2977}
2978
2943void __init kmem_cache_init(void) 2979void __init kmem_cache_init(void)
2944{ 2980{
2945 int i; 2981 int i;
2946 int caches = 0; 2982 int caches = 0;
2983 struct kmem_cache *temp_kmem_cache;
2984 int order;
2947 2985
2948#ifdef CONFIG_NUMA 2986#ifdef CONFIG_NUMA
2987 struct kmem_cache *temp_kmem_cache_node;
2988 unsigned long kmalloc_size;
2989
2990 kmem_size = offsetof(struct kmem_cache, node) +
2991 nr_node_ids * sizeof(struct kmem_cache_node *);
2992
2993 /* Allocate two kmem_caches from the page allocator */
2994 kmalloc_size = ALIGN(kmem_size, cache_line_size());
2995 order = get_order(2 * kmalloc_size);
2996 kmem_cache = (void *)__get_free_pages(GFP_NOWAIT, order);
2997
2949 /* 2998 /*
2950 * Must first have the slab cache available for the allocations of the 2999 * Must first have the slab cache available for the allocations of the
2951 * struct kmem_cache_node's. There is special bootstrap code in 3000 * struct kmem_cache_node's. There is special bootstrap code in
2952 * kmem_cache_open for slab_state == DOWN. 3001 * kmem_cache_open for slab_state == DOWN.
2953 */ 3002 */
2954 create_kmalloc_cache(&kmalloc_caches[0], "kmem_cache_node", 3003 kmem_cache_node = (void *)kmem_cache + kmalloc_size;
2955 sizeof(struct kmem_cache_node), 0); 3004
2956 kmalloc_caches[0].refcount = -1; 3005 kmem_cache_open(kmem_cache_node, "kmem_cache_node",
2957 caches++; 3006 sizeof(struct kmem_cache_node),
3007 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
2958 3008
2959 hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI); 3009 hotplug_memory_notifier(slab_memory_callback, SLAB_CALLBACK_PRI);
3010#else
3011 /* Allocate a single kmem_cache from the page allocator */
3012 kmem_size = sizeof(struct kmem_cache);
3013 order = get_order(kmem_size);
3014 kmem_cache = (void *)__get_free_pages(GFP_NOWAIT, order);
2960#endif 3015#endif
2961 3016
2962 /* Able to allocate the per node structures */ 3017 /* Able to allocate the per node structures */
2963 slab_state = PARTIAL; 3018 slab_state = PARTIAL;
2964 3019
2965 /* Caches that are not of the two-to-the-power-of size */ 3020 temp_kmem_cache = kmem_cache;
2966 if (KMALLOC_MIN_SIZE <= 32) { 3021 kmem_cache_open(kmem_cache, "kmem_cache", kmem_size,
2967 create_kmalloc_cache(&kmalloc_caches[1], 3022 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
2968 "kmalloc-96", 96, 0); 3023 kmem_cache = kmem_cache_alloc(kmem_cache, GFP_NOWAIT);
2969 caches++; 3024 memcpy(kmem_cache, temp_kmem_cache, kmem_size);
2970 }
2971 if (KMALLOC_MIN_SIZE <= 64) {
2972 create_kmalloc_cache(&kmalloc_caches[2],
2973 "kmalloc-192", 192, 0);
2974 caches++;
2975 }
2976 3025
2977 for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) { 3026#ifdef CONFIG_NUMA
2978 create_kmalloc_cache(&kmalloc_caches[i], 3027 /*
2979 "kmalloc", 1 << i, 0); 3028 * Allocate kmem_cache_node properly from the kmem_cache slab.
2980 caches++; 3029 * kmem_cache_node is separately allocated so no need to
2981 } 3030 * update any list pointers.
3031 */
3032 temp_kmem_cache_node = kmem_cache_node;
2982 3033
3034 kmem_cache_node = kmem_cache_alloc(kmem_cache, GFP_NOWAIT);
3035 memcpy(kmem_cache_node, temp_kmem_cache_node, kmem_size);
3036
3037 kmem_cache_bootstrap_fixup(kmem_cache_node);
3038
3039 caches++;
3040#else
3041 /*
3042 * kmem_cache has kmem_cache_node embedded and we moved it!
3043 * Update the list heads
3044 */
3045 INIT_LIST_HEAD(&kmem_cache->local_node.partial);
3046 list_splice(&temp_kmem_cache->local_node.partial, &kmem_cache->local_node.partial);
3047#ifdef CONFIG_SLUB_DEBUG
3048 INIT_LIST_HEAD(&kmem_cache->local_node.full);
3049 list_splice(&temp_kmem_cache->local_node.full, &kmem_cache->local_node.full);
3050#endif
3051#endif
3052 kmem_cache_bootstrap_fixup(kmem_cache);
3053 caches++;
3054 /* Free temporary boot structure */
3055 free_pages((unsigned long)temp_kmem_cache, order);
3056
3057 /* Now we can use the kmem_cache to allocate kmalloc slabs */
2983 3058
2984 /* 3059 /*
2985 * Patch up the size_index table if we have strange large alignment 3060 * Patch up the size_index table if we have strange large alignment
@@ -3019,6 +3094,22 @@ void __init kmem_cache_init(void)
3019 size_index[size_index_elem(i)] = 8; 3094 size_index[size_index_elem(i)] = 8;
3020 } 3095 }
3021 3096
3097 /* Caches that are not of the two-to-the-power-of size */
3098 if (KMALLOC_MIN_SIZE <= 32) {
3099 kmalloc_caches[1] = create_kmalloc_cache("kmalloc-96", 96, 0);
3100 caches++;
3101 }
3102
3103 if (KMALLOC_MIN_SIZE <= 64) {
3104 kmalloc_caches[2] = create_kmalloc_cache("kmalloc-192", 192, 0);
3105 caches++;
3106 }
3107
3108 for (i = KMALLOC_SHIFT_LOW; i < SLUB_PAGE_SHIFT; i++) {
3109 kmalloc_caches[i] = create_kmalloc_cache("kmalloc", 1 << i, 0);
3110 caches++;
3111 }
3112
3022 slab_state = UP; 3113 slab_state = UP;
3023 3114
3024 /* Provide the correct kmalloc names now that the caches are up */ 3115 /* Provide the correct kmalloc names now that the caches are up */
@@ -3026,30 +3117,24 @@ void __init kmem_cache_init(void)
3026 char *s = kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i); 3117 char *s = kasprintf(GFP_NOWAIT, "kmalloc-%d", 1 << i);
3027 3118
3028 BUG_ON(!s); 3119 BUG_ON(!s);
3029 kmalloc_caches[i].name = s; 3120 kmalloc_caches[i]->name = s;
3030 } 3121 }
3031 3122
3032#ifdef CONFIG_SMP 3123#ifdef CONFIG_SMP
3033 register_cpu_notifier(&slab_notifier); 3124 register_cpu_notifier(&slab_notifier);
3034#endif 3125#endif
3035#ifdef CONFIG_NUMA
3036 kmem_size = offsetof(struct kmem_cache, node) +
3037 nr_node_ids * sizeof(struct kmem_cache_node *);
3038#else
3039 kmem_size = sizeof(struct kmem_cache);
3040#endif
3041 3126
3042#ifdef CONFIG_ZONE_DMA 3127#ifdef CONFIG_ZONE_DMA
3043 for (i = 1; i < SLUB_PAGE_SHIFT; i++) { 3128 for (i = 0; i < SLUB_PAGE_SHIFT; i++) {
3044 struct kmem_cache *s = &kmalloc_caches[i]; 3129 struct kmem_cache *s = kmalloc_caches[i];
3045 3130
3046 if (s->size) { 3131 if (s && s->size) {
3047 char *name = kasprintf(GFP_NOWAIT, 3132 char *name = kasprintf(GFP_NOWAIT,
3048 "dma-kmalloc-%d", s->objsize); 3133 "dma-kmalloc-%d", s->objsize);
3049 3134
3050 BUG_ON(!name); 3135 BUG_ON(!name);
3051 create_kmalloc_cache(&kmalloc_dma_caches[i], 3136 kmalloc_dma_caches[i] = create_kmalloc_cache(name,
3052 name, s->objsize, SLAB_CACHE_DMA); 3137 s->objsize, SLAB_CACHE_DMA);
3053 } 3138 }
3054 } 3139 }
3055#endif 3140#endif