diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2011-07-20 13:04:23 -0400 |
---|---|---|
committer | Pekka Enberg <penberg@kernel.org> | 2011-07-20 13:27:56 -0400 |
commit | b56efcf0a45aa7fc32de90d5f9838541082fbc19 (patch) | |
tree | e1de7ba007c79042d30454c11b78aa942e2666b3 | |
parent | c225150b86fef9f7663219b6e9f7606ea1607312 (diff) |
slab: shrink sizeof(struct kmem_cache)
Reduce high order allocations for some setups.
(NR_CPUS=4096 -> we need 64KB per kmem_cache struct)
We now allocate exact needed size (using nr_cpu_ids and nr_node_ids)
This also makes code a bit smaller on x86_64, since some field offsets
are less than the 127 limit :
Before patch :
# size mm/slab.o
text data bss dec hex filename
22605 361665 32 384302 5dd2e mm/slab.o
After patch :
# size mm/slab.o
text data bss dec hex filename
22349 353473 8224 384046 5dc2e mm/slab.o
CC: Andrew Morton <akpm@linux-foundation.org>
Reported-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Acked-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Pekka Enberg <penberg@kernel.org>
-rw-r--r-- | include/linux/slab_def.h | 26 | ||||
-rw-r--r-- | mm/slab.c | 10 |
2 files changed, 19 insertions, 17 deletions
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index d7f63112f63c..d00e0bacda93 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h | |||
@@ -24,21 +24,19 @@ | |||
24 | */ | 24 | */ |
25 | 25 | ||
26 | struct kmem_cache { | 26 | struct kmem_cache { |
27 | /* 1) per-cpu data, touched during every alloc/free */ | 27 | /* 1) Cache tunables. Protected by cache_chain_mutex */ |
28 | struct array_cache *array[NR_CPUS]; | ||
29 | /* 2) Cache tunables. Protected by cache_chain_mutex */ | ||
30 | unsigned int batchcount; | 28 | unsigned int batchcount; |
31 | unsigned int limit; | 29 | unsigned int limit; |
32 | unsigned int shared; | 30 | unsigned int shared; |
33 | 31 | ||
34 | unsigned int buffer_size; | 32 | unsigned int buffer_size; |
35 | u32 reciprocal_buffer_size; | 33 | u32 reciprocal_buffer_size; |
36 | /* 3) touched by every alloc & free from the backend */ | 34 | /* 2) touched by every alloc & free from the backend */ |
37 | 35 | ||
38 | unsigned int flags; /* constant flags */ | 36 | unsigned int flags; /* constant flags */ |
39 | unsigned int num; /* # of objs per slab */ | 37 | unsigned int num; /* # of objs per slab */ |
40 | 38 | ||
41 | /* 4) cache_grow/shrink */ | 39 | /* 3) cache_grow/shrink */ |
42 | /* order of pgs per slab (2^n) */ | 40 | /* order of pgs per slab (2^n) */ |
43 | unsigned int gfporder; | 41 | unsigned int gfporder; |
44 | 42 | ||
@@ -54,11 +52,11 @@ struct kmem_cache { | |||
54 | /* constructor func */ | 52 | /* constructor func */ |
55 | void (*ctor)(void *obj); | 53 | void (*ctor)(void *obj); |
56 | 54 | ||
57 | /* 5) cache creation/removal */ | 55 | /* 4) cache creation/removal */ |
58 | const char *name; | 56 | const char *name; |
59 | struct list_head next; | 57 | struct list_head next; |
60 | 58 | ||
61 | /* 6) statistics */ | 59 | /* 5) statistics */ |
62 | #ifdef CONFIG_DEBUG_SLAB | 60 | #ifdef CONFIG_DEBUG_SLAB |
63 | unsigned long num_active; | 61 | unsigned long num_active; |
64 | unsigned long num_allocations; | 62 | unsigned long num_allocations; |
@@ -85,16 +83,18 @@ struct kmem_cache { | |||
85 | int obj_size; | 83 | int obj_size; |
86 | #endif /* CONFIG_DEBUG_SLAB */ | 84 | #endif /* CONFIG_DEBUG_SLAB */ |
87 | 85 | ||
86 | /* 6) per-cpu/per-node data, touched during every alloc/free */ | ||
88 | /* | 87 | /* |
89 | * We put nodelists[] at the end of kmem_cache, because we want to size | 88 | * We put array[] at the end of kmem_cache, because we want to size |
90 | * this array to nr_node_ids slots instead of MAX_NUMNODES | 89 | * this array to nr_cpu_ids slots instead of NR_CPUS |
91 | * (see kmem_cache_init()) | 90 | * (see kmem_cache_init()) |
92 | * We still use [MAX_NUMNODES] and not [1] or [0] because cache_cache | 91 | * We still use [NR_CPUS] and not [1] or [0] because cache_cache |
93 | * is statically defined, so we reserve the max number of nodes. | 92 | * is statically defined, so we reserve the max number of cpus. |
94 | */ | 93 | */ |
95 | struct kmem_list3 *nodelists[MAX_NUMNODES]; | 94 | struct kmem_list3 **nodelists; |
95 | struct array_cache *array[NR_CPUS]; | ||
96 | /* | 96 | /* |
97 | * Do not add fields after nodelists[] | 97 | * Do not add fields after array[] |
98 | */ | 98 | */ |
99 | }; | 99 | }; |
100 | 100 | ||
@@ -574,7 +574,9 @@ static struct arraycache_init initarray_generic = | |||
574 | { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} }; | 574 | { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} }; |
575 | 575 | ||
576 | /* internal cache of cache description objs */ | 576 | /* internal cache of cache description objs */ |
577 | static struct kmem_list3 *cache_cache_nodelists[MAX_NUMNODES]; | ||
577 | static struct kmem_cache cache_cache = { | 578 | static struct kmem_cache cache_cache = { |
579 | .nodelists = cache_cache_nodelists, | ||
578 | .batchcount = 1, | 580 | .batchcount = 1, |
579 | .limit = BOOT_CPUCACHE_ENTRIES, | 581 | .limit = BOOT_CPUCACHE_ENTRIES, |
580 | .shared = 1, | 582 | .shared = 1, |
@@ -1492,11 +1494,10 @@ void __init kmem_cache_init(void) | |||
1492 | cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE + node]; | 1494 | cache_cache.nodelists[node] = &initkmem_list3[CACHE_CACHE + node]; |
1493 | 1495 | ||
1494 | /* | 1496 | /* |
1495 | * struct kmem_cache size depends on nr_node_ids, which | 1497 | * struct kmem_cache size depends on nr_node_ids & nr_cpu_ids |
1496 | * can be less than MAX_NUMNODES. | ||
1497 | */ | 1498 | */ |
1498 | cache_cache.buffer_size = offsetof(struct kmem_cache, nodelists) + | 1499 | cache_cache.buffer_size = offsetof(struct kmem_cache, array[nr_cpu_ids]) + |
1499 | nr_node_ids * sizeof(struct kmem_list3 *); | 1500 | nr_node_ids * sizeof(struct kmem_list3 *); |
1500 | #if DEBUG | 1501 | #if DEBUG |
1501 | cache_cache.obj_size = cache_cache.buffer_size; | 1502 | cache_cache.obj_size = cache_cache.buffer_size; |
1502 | #endif | 1503 | #endif |
@@ -2308,6 +2309,7 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
2308 | if (!cachep) | 2309 | if (!cachep) |
2309 | goto oops; | 2310 | goto oops; |
2310 | 2311 | ||
2312 | cachep->nodelists = (struct kmem_list3 **)&cachep->array[nr_cpu_ids]; | ||
2311 | #if DEBUG | 2313 | #if DEBUG |
2312 | cachep->obj_size = size; | 2314 | cachep->obj_size = size; |
2313 | 2315 | ||