diff options
author | Christoph Lameter <clameter@sgi.com> | 2008-04-14 12:11:31 -0400 |
---|---|---|
committer | Pekka Enberg <penberg@cs.helsinki.fi> | 2008-04-27 11:28:17 -0400 |
commit | 834f3d119234b35a1985a2449831d99356637937 (patch) | |
tree | 3106946ecbe174935daa7ac2ff4c7806cc3004b8 | |
parent | 224a88be40c45c0da5bdc45a8118004a37c60e8a (diff) |
slub: Add kmem_cache_order_objects struct
Pack the order and the number of objects into a single word.
This saves some memory in the kmem_cache_structure and more importantly
allows us to fetch both values atomically.
Later the slab orders become runtime configurable and we need to fetch these
two items together in order to properly allocate a slab and initialize its
objects.
Fix the race by fetching the order and the number of objects in one word.
[penberg@cs.helsinki.fi: fix memset() page order in new_slab()]
Signed-off-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Pekka Enberg <penberg@cs.helsinki.fi>
-rw-r--r-- | include/linux/slub_def.h | 12 | ||||
-rw-r--r-- | mm/slub.c | 76 |
2 files changed, 61 insertions, 27 deletions
diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 79d59c937fac..4131e5fbd18b 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h | |||
@@ -53,6 +53,15 @@ struct kmem_cache_node { | |||
53 | }; | 53 | }; |
54 | 54 | ||
55 | /* | 55 | /* |
56 | * Word size structure that can be atomically updated or read and that | ||
57 | * contains both the order and the number of objects that a slab of the | ||
58 | * given order would contain. | ||
59 | */ | ||
60 | struct kmem_cache_order_objects { | ||
61 | unsigned long x; | ||
62 | }; | ||
63 | |||
64 | /* | ||
56 | * Slab cache management. | 65 | * Slab cache management. |
57 | */ | 66 | */ |
58 | struct kmem_cache { | 67 | struct kmem_cache { |
@@ -61,7 +70,7 @@ struct kmem_cache { | |||
61 | int size; /* The size of an object including meta data */ | 70 | int size; /* The size of an object including meta data */ |
62 | int objsize; /* The size of an object without meta data */ | 71 | int objsize; /* The size of an object without meta data */ |
63 | int offset; /* Free pointer offset. */ | 72 | int offset; /* Free pointer offset. */ |
64 | int order; /* Current preferred allocation order */ | 73 | struct kmem_cache_order_objects oo; |
65 | 74 | ||
66 | /* | 75 | /* |
67 | * Avoid an extra cache line for UP, SMP and for the node local to | 76 | * Avoid an extra cache line for UP, SMP and for the node local to |
@@ -70,7 +79,6 @@ struct kmem_cache { | |||
70 | struct kmem_cache_node local_node; | 79 | struct kmem_cache_node local_node; |
71 | 80 | ||
72 | /* Allocation and freeing of slabs */ | 81 | /* Allocation and freeing of slabs */ |
73 | int objects; /* Number of objects in slab */ | ||
74 | gfp_t allocflags; /* gfp flags to use on each alloc */ | 82 | gfp_t allocflags; /* gfp flags to use on each alloc */ |
75 | int refcount; /* Refcount for slab cache destroy */ | 83 | int refcount; /* Refcount for slab cache destroy */ |
76 | void (*ctor)(struct kmem_cache *, void *); | 84 | void (*ctor)(struct kmem_cache *, void *); |
@@ -341,6 +341,26 @@ static inline int slab_index(void *p, struct kmem_cache *s, void *addr) | |||
341 | return (p - addr) / s->size; | 341 | return (p - addr) / s->size; |
342 | } | 342 | } |
343 | 343 | ||
344 | static inline struct kmem_cache_order_objects oo_make(int order, | ||
345 | unsigned long size) | ||
346 | { | ||
347 | struct kmem_cache_order_objects x = { | ||
348 | (order << 16) + (PAGE_SIZE << order) / size | ||
349 | }; | ||
350 | |||
351 | return x; | ||
352 | } | ||
353 | |||
354 | static inline int oo_order(struct kmem_cache_order_objects x) | ||
355 | { | ||
356 | return x.x >> 16; | ||
357 | } | ||
358 | |||
359 | static inline int oo_objects(struct kmem_cache_order_objects x) | ||
360 | { | ||
361 | return x.x & ((1 << 16) - 1); | ||
362 | } | ||
363 | |||
344 | #ifdef CONFIG_SLUB_DEBUG | 364 | #ifdef CONFIG_SLUB_DEBUG |
345 | /* | 365 | /* |
346 | * Debug settings: | 366 | * Debug settings: |
@@ -665,7 +685,7 @@ static int slab_pad_check(struct kmem_cache *s, struct page *page) | |||
665 | return 1; | 685 | return 1; |
666 | 686 | ||
667 | start = page_address(page); | 687 | start = page_address(page); |
668 | length = (PAGE_SIZE << s->order); | 688 | length = (PAGE_SIZE << compound_order(page)); |
669 | end = start + length; | 689 | end = start + length; |
670 | remainder = length % s->size; | 690 | remainder = length % s->size; |
671 | if (!remainder) | 691 | if (!remainder) |
@@ -1090,19 +1110,21 @@ static inline void dec_slabs_node(struct kmem_cache *s, int node) {} | |||
1090 | static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) | 1110 | static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node) |
1091 | { | 1111 | { |
1092 | struct page *page; | 1112 | struct page *page; |
1093 | int pages = 1 << s->order; | 1113 | struct kmem_cache_order_objects oo = s->oo; |
1114 | int order = oo_order(oo); | ||
1115 | int pages = 1 << order; | ||
1094 | 1116 | ||
1095 | flags |= s->allocflags; | 1117 | flags |= s->allocflags; |
1096 | 1118 | ||
1097 | if (node == -1) | 1119 | if (node == -1) |
1098 | page = alloc_pages(flags, s->order); | 1120 | page = alloc_pages(flags, order); |
1099 | else | 1121 | else |
1100 | page = alloc_pages_node(node, flags, s->order); | 1122 | page = alloc_pages_node(node, flags, order); |
1101 | 1123 | ||
1102 | if (!page) | 1124 | if (!page) |
1103 | return NULL; | 1125 | return NULL; |
1104 | 1126 | ||
1105 | page->objects = s->objects; | 1127 | page->objects = oo_objects(oo); |
1106 | mod_zone_page_state(page_zone(page), | 1128 | mod_zone_page_state(page_zone(page), |
1107 | (s->flags & SLAB_RECLAIM_ACCOUNT) ? | 1129 | (s->flags & SLAB_RECLAIM_ACCOUNT) ? |
1108 | NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, | 1130 | NR_SLAB_RECLAIMABLE : NR_SLAB_UNRECLAIMABLE, |
@@ -1143,7 +1165,7 @@ static struct page *new_slab(struct kmem_cache *s, gfp_t flags, int node) | |||
1143 | start = page_address(page); | 1165 | start = page_address(page); |
1144 | 1166 | ||
1145 | if (unlikely(s->flags & SLAB_POISON)) | 1167 | if (unlikely(s->flags & SLAB_POISON)) |
1146 | memset(start, POISON_INUSE, PAGE_SIZE << s->order); | 1168 | memset(start, POISON_INUSE, PAGE_SIZE << compound_order(page)); |
1147 | 1169 | ||
1148 | last = start; | 1170 | last = start; |
1149 | for_each_object(p, s, start, page->objects) { | 1171 | for_each_object(p, s, start, page->objects) { |
@@ -1162,7 +1184,8 @@ out: | |||
1162 | 1184 | ||
1163 | static void __free_slab(struct kmem_cache *s, struct page *page) | 1185 | static void __free_slab(struct kmem_cache *s, struct page *page) |
1164 | { | 1186 | { |
1165 | int pages = 1 << s->order; | 1187 | int order = compound_order(page); |
1188 | int pages = 1 << order; | ||
1166 | 1189 | ||
1167 | if (unlikely(SlabDebug(page))) { | 1190 | if (unlikely(SlabDebug(page))) { |
1168 | void *p; | 1191 | void *p; |
@@ -1181,7 +1204,7 @@ static void __free_slab(struct kmem_cache *s, struct page *page) | |||
1181 | 1204 | ||
1182 | __ClearPageSlab(page); | 1205 | __ClearPageSlab(page); |
1183 | reset_page_mapcount(page); | 1206 | reset_page_mapcount(page); |
1184 | __free_pages(page, s->order); | 1207 | __free_pages(page, order); |
1185 | } | 1208 | } |
1186 | 1209 | ||
1187 | static void rcu_free_slab(struct rcu_head *h) | 1210 | static void rcu_free_slab(struct rcu_head *h) |
@@ -2202,6 +2225,7 @@ static int calculate_sizes(struct kmem_cache *s) | |||
2202 | unsigned long flags = s->flags; | 2225 | unsigned long flags = s->flags; |
2203 | unsigned long size = s->objsize; | 2226 | unsigned long size = s->objsize; |
2204 | unsigned long align = s->align; | 2227 | unsigned long align = s->align; |
2228 | int order; | ||
2205 | 2229 | ||
2206 | /* | 2230 | /* |
2207 | * Round up object size to the next word boundary. We can only | 2231 | * Round up object size to the next word boundary. We can only |
@@ -2294,17 +2318,17 @@ static int calculate_sizes(struct kmem_cache *s) | |||
2294 | * page allocator order 0 allocs so take a reasonably large | 2318 | * page allocator order 0 allocs so take a reasonably large |
2295 | * order that will allows us a good number of objects. | 2319 | * order that will allows us a good number of objects. |
2296 | */ | 2320 | */ |
2297 | s->order = max(slub_max_order, PAGE_ALLOC_COSTLY_ORDER); | 2321 | order = max(slub_max_order, PAGE_ALLOC_COSTLY_ORDER); |
2298 | s->flags |= __PAGE_ALLOC_FALLBACK; | 2322 | s->flags |= __PAGE_ALLOC_FALLBACK; |
2299 | s->allocflags |= __GFP_NOWARN; | 2323 | s->allocflags |= __GFP_NOWARN; |
2300 | } else | 2324 | } else |
2301 | s->order = calculate_order(size); | 2325 | order = calculate_order(size); |
2302 | 2326 | ||
2303 | if (s->order < 0) | 2327 | if (order < 0) |
2304 | return 0; | 2328 | return 0; |
2305 | 2329 | ||
2306 | s->allocflags = 0; | 2330 | s->allocflags = 0; |
2307 | if (s->order) | 2331 | if (order) |
2308 | s->allocflags |= __GFP_COMP; | 2332 | s->allocflags |= __GFP_COMP; |
2309 | 2333 | ||
2310 | if (s->flags & SLAB_CACHE_DMA) | 2334 | if (s->flags & SLAB_CACHE_DMA) |
@@ -2316,9 +2340,9 @@ static int calculate_sizes(struct kmem_cache *s) | |||
2316 | /* | 2340 | /* |
2317 | * Determine the number of objects per slab | 2341 | * Determine the number of objects per slab |
2318 | */ | 2342 | */ |
2319 | s->objects = (PAGE_SIZE << s->order) / size; | 2343 | s->oo = oo_make(order, size); |
2320 | 2344 | ||
2321 | return !!s->objects; | 2345 | return !!oo_objects(s->oo); |
2322 | 2346 | ||
2323 | } | 2347 | } |
2324 | 2348 | ||
@@ -2351,7 +2375,7 @@ error: | |||
2351 | if (flags & SLAB_PANIC) | 2375 | if (flags & SLAB_PANIC) |
2352 | panic("Cannot create slab %s size=%lu realsize=%u " | 2376 | panic("Cannot create slab %s size=%lu realsize=%u " |
2353 | "order=%u offset=%u flags=%lx\n", | 2377 | "order=%u offset=%u flags=%lx\n", |
2354 | s->name, (unsigned long)size, s->size, s->order, | 2378 | s->name, (unsigned long)size, s->size, oo_order(s->oo), |
2355 | s->offset, flags); | 2379 | s->offset, flags); |
2356 | return 0; | 2380 | return 0; |
2357 | } | 2381 | } |
@@ -2789,8 +2813,9 @@ int kmem_cache_shrink(struct kmem_cache *s) | |||
2789 | struct kmem_cache_node *n; | 2813 | struct kmem_cache_node *n; |
2790 | struct page *page; | 2814 | struct page *page; |
2791 | struct page *t; | 2815 | struct page *t; |
2816 | int objects = oo_objects(s->oo); | ||
2792 | struct list_head *slabs_by_inuse = | 2817 | struct list_head *slabs_by_inuse = |
2793 | kmalloc(sizeof(struct list_head) * s->objects, GFP_KERNEL); | 2818 | kmalloc(sizeof(struct list_head) * objects, GFP_KERNEL); |
2794 | unsigned long flags; | 2819 | unsigned long flags; |
2795 | 2820 | ||
2796 | if (!slabs_by_inuse) | 2821 | if (!slabs_by_inuse) |
@@ -2803,7 +2828,7 @@ int kmem_cache_shrink(struct kmem_cache *s) | |||
2803 | if (!n->nr_partial) | 2828 | if (!n->nr_partial) |
2804 | continue; | 2829 | continue; |
2805 | 2830 | ||
2806 | for (i = 0; i < s->objects; i++) | 2831 | for (i = 0; i < objects; i++) |
2807 | INIT_LIST_HEAD(slabs_by_inuse + i); | 2832 | INIT_LIST_HEAD(slabs_by_inuse + i); |
2808 | 2833 | ||
2809 | spin_lock_irqsave(&n->list_lock, flags); | 2834 | spin_lock_irqsave(&n->list_lock, flags); |
@@ -2835,7 +2860,7 @@ int kmem_cache_shrink(struct kmem_cache *s) | |||
2835 | * Rebuild the partial list with the slabs filled up most | 2860 | * Rebuild the partial list with the slabs filled up most |
2836 | * first and the least used slabs at the end. | 2861 | * first and the least used slabs at the end. |
2837 | */ | 2862 | */ |
2838 | for (i = s->objects - 1; i >= 0; i--) | 2863 | for (i = objects - 1; i >= 0; i--) |
2839 | list_splice(slabs_by_inuse + i, n->partial.prev); | 2864 | list_splice(slabs_by_inuse + i, n->partial.prev); |
2840 | 2865 | ||
2841 | spin_unlock_irqrestore(&n->list_lock, flags); | 2866 | spin_unlock_irqrestore(&n->list_lock, flags); |
@@ -3351,7 +3376,7 @@ static long validate_slab_cache(struct kmem_cache *s) | |||
3351 | { | 3376 | { |
3352 | int node; | 3377 | int node; |
3353 | unsigned long count = 0; | 3378 | unsigned long count = 0; |
3354 | unsigned long *map = kmalloc(BITS_TO_LONGS(s->objects) * | 3379 | unsigned long *map = kmalloc(BITS_TO_LONGS(oo_objects(s->oo)) * |
3355 | sizeof(unsigned long), GFP_KERNEL); | 3380 | sizeof(unsigned long), GFP_KERNEL); |
3356 | 3381 | ||
3357 | if (!map) | 3382 | if (!map) |
@@ -3719,7 +3744,7 @@ static ssize_t show_slab_objects(struct kmem_cache *s, | |||
3719 | - n->nr_partial; | 3744 | - n->nr_partial; |
3720 | 3745 | ||
3721 | if (flags & SO_OBJECTS) | 3746 | if (flags & SO_OBJECTS) |
3722 | x = full_slabs * s->objects; | 3747 | x = full_slabs * oo_objects(s->oo); |
3723 | else | 3748 | else |
3724 | x = full_slabs; | 3749 | x = full_slabs; |
3725 | total += x; | 3750 | total += x; |
@@ -3798,13 +3823,13 @@ SLAB_ATTR_RO(object_size); | |||
3798 | 3823 | ||
3799 | static ssize_t objs_per_slab_show(struct kmem_cache *s, char *buf) | 3824 | static ssize_t objs_per_slab_show(struct kmem_cache *s, char *buf) |
3800 | { | 3825 | { |
3801 | return sprintf(buf, "%d\n", s->objects); | 3826 | return sprintf(buf, "%d\n", oo_objects(s->oo)); |
3802 | } | 3827 | } |
3803 | SLAB_ATTR_RO(objs_per_slab); | 3828 | SLAB_ATTR_RO(objs_per_slab); |
3804 | 3829 | ||
3805 | static ssize_t order_show(struct kmem_cache *s, char *buf) | 3830 | static ssize_t order_show(struct kmem_cache *s, char *buf) |
3806 | { | 3831 | { |
3807 | return sprintf(buf, "%d\n", s->order); | 3832 | return sprintf(buf, "%d\n", oo_order(s->oo)); |
3808 | } | 3833 | } |
3809 | SLAB_ATTR_RO(order); | 3834 | SLAB_ATTR_RO(order); |
3810 | 3835 | ||
@@ -4451,11 +4476,12 @@ static int s_show(struct seq_file *m, void *p) | |||
4451 | nr_inuse += count_partial(n); | 4476 | nr_inuse += count_partial(n); |
4452 | } | 4477 | } |
4453 | 4478 | ||
4454 | nr_objs = nr_slabs * s->objects; | 4479 | nr_objs = nr_slabs * oo_objects(s->oo); |
4455 | nr_inuse += (nr_slabs - nr_partials) * s->objects; | 4480 | nr_inuse += (nr_slabs - nr_partials) * oo_objects(s->oo); |
4456 | 4481 | ||
4457 | seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", s->name, nr_inuse, | 4482 | seq_printf(m, "%-17s %6lu %6lu %6u %4u %4d", s->name, nr_inuse, |
4458 | nr_objs, s->size, s->objects, (1 << s->order)); | 4483 | nr_objs, s->size, oo_objects(s->oo), |
4484 | (1 << oo_order(s->oo))); | ||
4459 | seq_printf(m, " : tunables %4u %4u %4u", 0, 0, 0); | 4485 | seq_printf(m, " : tunables %4u %4u %4u", 0, 0, 0); |
4460 | seq_printf(m, " : slabdata %6lu %6lu %6lu", nr_slabs, nr_slabs, | 4486 | seq_printf(m, " : slabdata %6lu %6lu %6lu", nr_slabs, nr_slabs, |
4461 | 0UL); | 4487 | 0UL); |