3 files changed, 26 insertions, 20 deletions
diff --git a/include/linux/flex_array.h b/include/linux/flex_array.h
index 6843cf193a44..b6efb0c64408 100644
--- a/include/linux/flex_array.h
+++ b/include/linux/flex_array.h
@@ -2,6 +2,7 @@
 #define _FLEX_ARRAY_H
 #include <linux/types.h>
+#include <linux/reciprocal_div.h>
 #include <asm/page.h>
 #define FLEX_ARRAY_PART_SIZE PAGE_SIZE
@@ -22,7 +23,7 @@ struct flex_array {
                        int element_size;
                        int total_nr_elements;
                        int elems_per_part;
-                        u32 reciprocal_elems;
+                        struct reciprocal_value reciprocal_elems;
                        struct flex_array_part *parts[];
                };
                /*
diff --git a/include/linux/reciprocal_div.h b/include/linux/reciprocal_div.h
index f9c90b33285b..8c5a3fb6c6c5 100644
--- a/include/linux/reciprocal_div.h
+++ b/include/linux/reciprocal_div.h
@@ -4,29 +4,32 @@
 #include <linux/types.h>
 /*
- * This file describes reciprocical division.
+ * This algorithm is based on the paper "Division by Invariant
+ * Integers Using Multiplication" by Torbjörn Granlund and Peter
+ * L. Montgomery.
 *
- * This optimizes the (A/B) problem, when A and B are two u32
+ * The assembler implementation from Agner Fog, which this code is
- * and B is a known value (but not known at compile time)
+ * based on, can be found here:
+ * http://www.agner.org/optimize/asmlib.zip
 *
- * The math principle used is :
+ * This optimization for A/B is helpful if the divisor B is mostly
- *   Let RECIPROCAL_VALUE(B) be (((1LL << 32) + (B - 1))/ B)
+ * runtime invariant. The reciprocal of B is calculated in the
- *   Then A / B = (u32)(((u64)(A) * (R)) >> 32)
+ * slow-path with reciprocal_value(). The fast-path can then just use
- *
+ * a much faster multiplication operation with a variable dividend A
- * This replaces a divide by a multiply (and a shift), and
+ * to calculate the division A/B.
- * is generally less expensive in CPU cycles.
 */
-/*
+struct reciprocal_value {
- * Computes the reciprocal value (R) for the value B of the divisor.
+        u32 m;
- * Should not be called before each reciprocal_divide(),
+        u8 sh1, sh2;
- * or else the performance is slower than a normal divide.
+};
- */
-extern u32 reciprocal_value(u32 B);
+struct reciprocal_value reciprocal_value(u32 d);
-static inline u32 reciprocal_divide(u32 A, u32 R)
+static inline u32 reciprocal_divide(u32 a, struct reciprocal_value R)
 {
-        return (u32)(((u64)A * R) >> 32);
+        u32 t = (u32)(((u64)a * R.m) >> 32);
+        return (t + ((a - t) >> R.sh1)) >> R.sh2;
 }
-#endif
+#endif /* _LINUX_RECIPROCAL_DIV_H */
diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index 09bfffb08a56..96e8abae19a9 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -1,6 +1,8 @@
 #ifndef _LINUX_SLAB_DEF_H
 #define _LINUX_SLAB_DEF_H
+#include <linux/reciprocal_div.h>
 /*
 * Definitions unique to the original Linux SLAB allocator.
 */
@@ -12,7 +14,7 @@ struct kmem_cache {
        unsigned int shared;
        unsigned int size;
-        u32 reciprocal_buffer_size;
+        struct reciprocal_value reciprocal_buffer_size;
 /* 2) touched by every alloc & free from the backend */
        unsigned int flags;             /* constant flags */

diff --git a/include/linux/flex_array.h b/include/linux/flex_array.h index 6843cf193a44..b6efb0c64408 100644 --- a/include/linux/flex_array.h +++ b/include/linux/flex_array.h
@@ -2,6 +2,7 @@
2	#define _FLEX_ARRAY_H	2	#define _FLEX_ARRAY_H
3		3
4	#include <linux/types.h>	4	#include <linux/types.h>
		5	#include <linux/reciprocal_div.h>
5	#include <asm/page.h>	6	#include <asm/page.h>
6		7
7	#define FLEX_ARRAY_PART_SIZE PAGE_SIZE	8	#define FLEX_ARRAY_PART_SIZE PAGE_SIZE
@@ -22,7 +23,7 @@ struct flex_array {
22	int element_size;	23	int element_size;
23	int total_nr_elements;	24	int total_nr_elements;
24	int elems_per_part;	25	int elems_per_part;
25	u32 reciprocal_elems;	26	struct reciprocal_value reciprocal_elems;
26	struct flex_array_part *parts[];	27	struct flex_array_part *parts[];
27	};	28	};
28	/*	29	/*


diff --git a/include/linux/reciprocal_div.h b/include/linux/reciprocal_div.h index f9c90b33285b..8c5a3fb6c6c5 100644 --- a/include/linux/reciprocal_div.h +++ b/include/linux/reciprocal_div.h
@@ -4,29 +4,32 @@
4	#include <linux/types.h>	4	#include <linux/types.h>
5		5
6	/*	6	/*
7	* This file describes reciprocical division.	7	* This algorithm is based on the paper "Division by Invariant
		8	* Integers Using Multiplication" by Torbjörn Granlund and Peter
		9	* L. Montgomery.
8	*	10	*
9	* This optimizes the (A/B) problem, when A and B are two u32	11	* The assembler implementation from Agner Fog, which this code is
10	* and B is a known value (but not known at compile time)	12	* based on, can be found here:
		13	* http://www.agner.org/optimize/asmlib.zip
11	*	14	*
12	* The math principle used is :	15	* This optimization for A/B is helpful if the divisor B is mostly
13	* Let RECIPROCAL_VALUE(B) be (((1LL << 32) + (B - 1))/ B)	16	* runtime invariant. The reciprocal of B is calculated in the
14	* Then A / B = (u32)(((u64)(A) * (R)) >> 32)	17	* slow-path with reciprocal_value(). The fast-path can then just use
15	*	18	* a much faster multiplication operation with a variable dividend A
16	* This replaces a divide by a multiply (and a shift), and	19	* to calculate the division A/B.
17	* is generally less expensive in CPU cycles.
18	*/	20	*/
19		21
20	/*	22	struct reciprocal_value {
21	* Computes the reciprocal value (R) for the value B of the divisor.	23	u32 m;
22	* Should not be called before each reciprocal_divide(),	24	u8 sh1, sh2;
23	* or else the performance is slower than a normal divide.	25	};
24	*/
25	extern u32 reciprocal_value(u32 B);
26		26
		27	struct reciprocal_value reciprocal_value(u32 d);
27		28
28	static inline u32 reciprocal_divide(u32 A, u32 R)	29	static inline u32 reciprocal_divide(u32 a, struct reciprocal_value R)
29	{	30	{
30	return (u32)(((u64)A * R) >> 32);	31	u32 t = (u32)(((u64)a * R.m) >> 32);
		32	return (t + ((a - t) >> R.sh1)) >> R.sh2;
31	}	33	}
32	#endif	34
		35	#endif /* _LINUX_RECIPROCAL_DIV_H */


diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 09bfffb08a56..96e8abae19a9 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h
@@ -1,6 +1,8 @@
1	#ifndef _LINUX_SLAB_DEF_H	1	#ifndef _LINUX_SLAB_DEF_H
2	#define _LINUX_SLAB_DEF_H	2	#define _LINUX_SLAB_DEF_H
3		3
		4	#include <linux/reciprocal_div.h>
		5
4	/*	6	/*
5	* Definitions unique to the original Linux SLAB allocator.	7	* Definitions unique to the original Linux SLAB allocator.
6	*/	8	*/
@@ -12,7 +14,7 @@ struct kmem_cache {
12	unsigned int shared;	14	unsigned int shared;
13		15
14	unsigned int size;	16	unsigned int size;
15	u32 reciprocal_buffer_size;	17	struct reciprocal_value reciprocal_buffer_size;
16	/* 2) touched by every alloc & free from the backend */	18	/* 2) touched by every alloc & free from the backend */
17		19
18	unsigned int flags; /* constant flags */	20	unsigned int flags; /* constant flags */