diff options
Diffstat (limited to 'include/linux')
| -rw-r--r-- | include/linux/flex_array.h | 3 | ||||
| -rw-r--r-- | include/linux/reciprocal_div.h | 39 | ||||
| -rw-r--r-- | include/linux/slab_def.h | 4 |
3 files changed, 26 insertions, 20 deletions
diff --git a/include/linux/flex_array.h b/include/linux/flex_array.h index 6843cf193a44..b6efb0c64408 100644 --- a/include/linux/flex_array.h +++ b/include/linux/flex_array.h | |||
| @@ -2,6 +2,7 @@ | |||
| 2 | #define _FLEX_ARRAY_H | 2 | #define _FLEX_ARRAY_H |
| 3 | 3 | ||
| 4 | #include <linux/types.h> | 4 | #include <linux/types.h> |
| 5 | #include <linux/reciprocal_div.h> | ||
| 5 | #include <asm/page.h> | 6 | #include <asm/page.h> |
| 6 | 7 | ||
| 7 | #define FLEX_ARRAY_PART_SIZE PAGE_SIZE | 8 | #define FLEX_ARRAY_PART_SIZE PAGE_SIZE |
| @@ -22,7 +23,7 @@ struct flex_array { | |||
| 22 | int element_size; | 23 | int element_size; |
| 23 | int total_nr_elements; | 24 | int total_nr_elements; |
| 24 | int elems_per_part; | 25 | int elems_per_part; |
| 25 | u32 reciprocal_elems; | 26 | struct reciprocal_value reciprocal_elems; |
| 26 | struct flex_array_part *parts[]; | 27 | struct flex_array_part *parts[]; |
| 27 | }; | 28 | }; |
| 28 | /* | 29 | /* |
diff --git a/include/linux/reciprocal_div.h b/include/linux/reciprocal_div.h index f9c90b33285b..8c5a3fb6c6c5 100644 --- a/include/linux/reciprocal_div.h +++ b/include/linux/reciprocal_div.h | |||
| @@ -4,29 +4,32 @@ | |||
| 4 | #include <linux/types.h> | 4 | #include <linux/types.h> |
| 5 | 5 | ||
| 6 | /* | 6 | /* |
| 7 | * This file describes reciprocical division. | 7 | * This algorithm is based on the paper "Division by Invariant |
| 8 | * Integers Using Multiplication" by Torbjörn Granlund and Peter | ||
| 9 | * L. Montgomery. | ||
| 8 | * | 10 | * |
| 9 | * This optimizes the (A/B) problem, when A and B are two u32 | 11 | * The assembler implementation from Agner Fog, which this code is |
| 10 | * and B is a known value (but not known at compile time) | 12 | * based on, can be found here: |
| 13 | * http://www.agner.org/optimize/asmlib.zip | ||
| 11 | * | 14 | * |
| 12 | * The math principle used is : | 15 | * This optimization for A/B is helpful if the divisor B is mostly |
| 13 | * Let RECIPROCAL_VALUE(B) be (((1LL << 32) + (B - 1))/ B) | 16 | * runtime invariant. The reciprocal of B is calculated in the |
| 14 | * Then A / B = (u32)(((u64)(A) * (R)) >> 32) | 17 | * slow-path with reciprocal_value(). The fast-path can then just use |
| 15 | * | 18 | * a much faster multiplication operation with a variable dividend A |
| 16 | * This replaces a divide by a multiply (and a shift), and | 19 | * to calculate the division A/B. |
| 17 | * is generally less expensive in CPU cycles. | ||
| 18 | */ | 20 | */ |
| 19 | 21 | ||
| 20 | /* | 22 | struct reciprocal_value { |
| 21 | * Computes the reciprocal value (R) for the value B of the divisor. | 23 | u32 m; |
| 22 | * Should not be called before each reciprocal_divide(), | 24 | u8 sh1, sh2; |
| 23 | * or else the performance is slower than a normal divide. | 25 | }; |
| 24 | */ | ||
| 25 | extern u32 reciprocal_value(u32 B); | ||
| 26 | 26 | ||
| 27 | struct reciprocal_value reciprocal_value(u32 d); | ||
| 27 | 28 | ||
| 28 | static inline u32 reciprocal_divide(u32 A, u32 R) | 29 | static inline u32 reciprocal_divide(u32 a, struct reciprocal_value R) |
| 29 | { | 30 | { |
| 30 | return (u32)(((u64)A * R) >> 32); | 31 | u32 t = (u32)(((u64)a * R.m) >> 32); |
| 32 | return (t + ((a - t) >> R.sh1)) >> R.sh2; | ||
| 31 | } | 33 | } |
| 32 | #endif | 34 | |
| 35 | #endif /* _LINUX_RECIPROCAL_DIV_H */ | ||
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 09bfffb08a56..96e8abae19a9 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h | |||
| @@ -1,6 +1,8 @@ | |||
| 1 | #ifndef _LINUX_SLAB_DEF_H | 1 | #ifndef _LINUX_SLAB_DEF_H |
| 2 | #define _LINUX_SLAB_DEF_H | 2 | #define _LINUX_SLAB_DEF_H |
| 3 | 3 | ||
| 4 | #include <linux/reciprocal_div.h> | ||
| 5 | |||
| 4 | /* | 6 | /* |
| 5 | * Definitions unique to the original Linux SLAB allocator. | 7 | * Definitions unique to the original Linux SLAB allocator. |
| 6 | */ | 8 | */ |
| @@ -12,7 +14,7 @@ struct kmem_cache { | |||
| 12 | unsigned int shared; | 14 | unsigned int shared; |
| 13 | 15 | ||
| 14 | unsigned int size; | 16 | unsigned int size; |
| 15 | u32 reciprocal_buffer_size; | 17 | struct reciprocal_value reciprocal_buffer_size; |
| 16 | /* 2) touched by every alloc & free from the backend */ | 18 | /* 2) touched by every alloc & free from the backend */ |
| 17 | 19 | ||
| 18 | unsigned int flags; /* constant flags */ | 20 | unsigned int flags; /* constant flags */ |
