diff options
-rw-r--r-- | include/linux/reciprocal_div.h | 32 | ||||
-rw-r--r-- | lib/Makefile | 2 | ||||
-rw-r--r-- | lib/reciprocal_div.c | 9 | ||||
-rw-r--r-- | mm/slab.c | 18 |
4 files changed, 57 insertions, 4 deletions
diff --git a/include/linux/reciprocal_div.h b/include/linux/reciprocal_div.h new file mode 100644 index 000000000000..f9c90b33285b --- /dev/null +++ b/include/linux/reciprocal_div.h | |||
@@ -0,0 +1,32 @@ | |||
1 | #ifndef _LINUX_RECIPROCAL_DIV_H | ||
2 | #define _LINUX_RECIPROCAL_DIV_H | ||
3 | |||
4 | #include <linux/types.h> | ||
5 | |||
6 | /* | ||
7 | * This file describes reciprocical division. | ||
8 | * | ||
9 | * This optimizes the (A/B) problem, when A and B are two u32 | ||
10 | * and B is a known value (but not known at compile time) | ||
11 | * | ||
12 | * The math principle used is : | ||
13 | * Let RECIPROCAL_VALUE(B) be (((1LL << 32) + (B - 1))/ B) | ||
14 | * Then A / B = (u32)(((u64)(A) * (R)) >> 32) | ||
15 | * | ||
16 | * This replaces a divide by a multiply (and a shift), and | ||
17 | * is generally less expensive in CPU cycles. | ||
18 | */ | ||
19 | |||
20 | /* | ||
21 | * Computes the reciprocal value (R) for the value B of the divisor. | ||
22 | * Should not be called before each reciprocal_divide(), | ||
23 | * or else the performance is slower than a normal divide. | ||
24 | */ | ||
25 | extern u32 reciprocal_value(u32 B); | ||
26 | |||
27 | |||
28 | static inline u32 reciprocal_divide(u32 A, u32 R) | ||
29 | { | ||
30 | return (u32)(((u64)A * R) >> 32); | ||
31 | } | ||
32 | #endif | ||
diff --git a/lib/Makefile b/lib/Makefile index 2d6106af53cd..c9ec8f11e833 100644 --- a/lib/Makefile +++ b/lib/Makefile | |||
@@ -5,7 +5,7 @@ | |||
5 | lib-y := ctype.o string.o vsprintf.o cmdline.o \ | 5 | lib-y := ctype.o string.o vsprintf.o cmdline.o \ |
6 | bust_spinlocks.o rbtree.o radix-tree.o dump_stack.o \ | 6 | bust_spinlocks.o rbtree.o radix-tree.o dump_stack.o \ |
7 | idr.o div64.o int_sqrt.o bitmap.o extable.o prio_tree.o \ | 7 | idr.o div64.o int_sqrt.o bitmap.o extable.o prio_tree.o \ |
8 | sha1.o irq_regs.o | 8 | sha1.o irq_regs.o reciprocal_div.o |
9 | 9 | ||
10 | lib-$(CONFIG_MMU) += ioremap.o | 10 | lib-$(CONFIG_MMU) += ioremap.o |
11 | lib-$(CONFIG_SMP) += cpumask.o | 11 | lib-$(CONFIG_SMP) += cpumask.o |
diff --git a/lib/reciprocal_div.c b/lib/reciprocal_div.c new file mode 100644 index 000000000000..6a3bd48fa2a0 --- /dev/null +++ b/lib/reciprocal_div.c | |||
@@ -0,0 +1,9 @@ | |||
1 | #include <asm/div64.h> | ||
2 | #include <linux/reciprocal_div.h> | ||
3 | |||
4 | u32 reciprocal_value(u32 k) | ||
5 | { | ||
6 | u64 val = (1LL << 32) + (k - 1); | ||
7 | do_div(val, k); | ||
8 | return (u32)val; | ||
9 | } | ||
@@ -109,6 +109,7 @@ | |||
109 | #include <linux/mutex.h> | 109 | #include <linux/mutex.h> |
110 | #include <linux/fault-inject.h> | 110 | #include <linux/fault-inject.h> |
111 | #include <linux/rtmutex.h> | 111 | #include <linux/rtmutex.h> |
112 | #include <linux/reciprocal_div.h> | ||
112 | 113 | ||
113 | #include <asm/cacheflush.h> | 114 | #include <asm/cacheflush.h> |
114 | #include <asm/tlbflush.h> | 115 | #include <asm/tlbflush.h> |
@@ -386,6 +387,7 @@ struct kmem_cache { | |||
386 | unsigned int shared; | 387 | unsigned int shared; |
387 | 388 | ||
388 | unsigned int buffer_size; | 389 | unsigned int buffer_size; |
390 | u32 reciprocal_buffer_size; | ||
389 | /* 3) touched by every alloc & free from the backend */ | 391 | /* 3) touched by every alloc & free from the backend */ |
390 | struct kmem_list3 *nodelists[MAX_NUMNODES]; | 392 | struct kmem_list3 *nodelists[MAX_NUMNODES]; |
391 | 393 | ||
@@ -627,10 +629,17 @@ static inline void *index_to_obj(struct kmem_cache *cache, struct slab *slab, | |||
627 | return slab->s_mem + cache->buffer_size * idx; | 629 | return slab->s_mem + cache->buffer_size * idx; |
628 | } | 630 | } |
629 | 631 | ||
630 | static inline unsigned int obj_to_index(struct kmem_cache *cache, | 632 | /* |
631 | struct slab *slab, void *obj) | 633 | * We want to avoid an expensive divide : (offset / cache->buffer_size) |
634 | * Using the fact that buffer_size is a constant for a particular cache, | ||
635 | * we can replace (offset / cache->buffer_size) by | ||
636 | * reciprocal_divide(offset, cache->reciprocal_buffer_size) | ||
637 | */ | ||
638 | static inline unsigned int obj_to_index(const struct kmem_cache *cache, | ||
639 | const struct slab *slab, void *obj) | ||
632 | { | 640 | { |
633 | return (unsigned)(obj - slab->s_mem) / cache->buffer_size; | 641 | u32 offset = (obj - slab->s_mem); |
642 | return reciprocal_divide(offset, cache->reciprocal_buffer_size); | ||
634 | } | 643 | } |
635 | 644 | ||
636 | /* | 645 | /* |
@@ -1427,6 +1436,8 @@ void __init kmem_cache_init(void) | |||
1427 | 1436 | ||
1428 | cache_cache.buffer_size = ALIGN(cache_cache.buffer_size, | 1437 | cache_cache.buffer_size = ALIGN(cache_cache.buffer_size, |
1429 | cache_line_size()); | 1438 | cache_line_size()); |
1439 | cache_cache.reciprocal_buffer_size = | ||
1440 | reciprocal_value(cache_cache.buffer_size); | ||
1430 | 1441 | ||
1431 | for (order = 0; order < MAX_ORDER; order++) { | 1442 | for (order = 0; order < MAX_ORDER; order++) { |
1432 | cache_estimate(order, cache_cache.buffer_size, | 1443 | cache_estimate(order, cache_cache.buffer_size, |
@@ -2313,6 +2324,7 @@ kmem_cache_create (const char *name, size_t size, size_t align, | |||
2313 | if (flags & SLAB_CACHE_DMA) | 2324 | if (flags & SLAB_CACHE_DMA) |
2314 | cachep->gfpflags |= GFP_DMA; | 2325 | cachep->gfpflags |= GFP_DMA; |
2315 | cachep->buffer_size = size; | 2326 | cachep->buffer_size = size; |
2327 | cachep->reciprocal_buffer_size = reciprocal_value(size); | ||
2316 | 2328 | ||
2317 | if (flags & CFLGS_OFF_SLAB) { | 2329 | if (flags & CFLGS_OFF_SLAB) { |
2318 | cachep->slabp_cache = kmem_find_general_cachep(slab_size, 0u); | 2330 | cachep->slabp_cache = kmem_find_general_cachep(slab_size, 0u); |