[TCP]: cubic optimization

Use willy's work in optimizing cube root by having table for small values. Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org> Signed-off-by: David S. Miller <davem@davemloft.net>
author: Stephen Hemminger <shemminger@linux-foundation.org> 2007-03-22 15:10:58 -0400
committer: David S. Miller <davem@sunset.davemloft.net> 2007-04-26 01:27:19 -0400
commit: 7e58886b45bc4a309aeaa8178ef89ff767daaf7f (patch)
tree: cd375c5d4a6ec5584531992f8bdbcae775659ed6
parent: 22b9a0a3a49ab1a856e0853b3f3dd2abd156bd7c (diff)
1 files changed, 39 insertions, 11 deletions
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 0e6cdfeb207a..15c580375002 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -91,23 +91,51 @@ static void bictcp_init(struct sock *sk)
                tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
 }
-/*
+/* calculate the cubic root of x using a table lookup followed by one
- * calculate the cubic root of x using Newton-Raphson
+ * Newton-Raphson iteration.
+ * Avg err ~= 0.195%
 */
 static u32 cubic_root(u64 a)
 {
-        u32 x;
+        u32 x, b, shift;
+        /*
-        /* Initial estimate is based on:
+         * cbrt(x) MSB values for x MSB values in [0..63].
-         * cbrt(x) = exp(log(x) / 3)
+         * Precomputed then refined by hand - Willy Tarreau
+         *
+         * For x in [0..63],
+         *   v = cbrt(x << 18) - 1
+         *   cbrt(x) = (v[x] + 10) >> 6
         */
-        x = 1u << (fls64(a)/3);
+        static const u8 v[] = {
+                /* 0x00 */    0,   54,   54,   54,  118,  118,  118,  118,
+                /* 0x08 */  123,  129,  134,  138,  143,  147,  151,  156,
+                /* 0x10 */  157,  161,  164,  168,  170,  173,  176,  179,
+                /* 0x18 */  181,  185,  187,  190,  192,  194,  197,  199,
+                /* 0x20 */  200,  202,  204,  206,  209,  211,  213,  215,
+                /* 0x28 */  217,  219,  221,  222,  224,  225,  227,  229,
+                /* 0x30 */  231,  232,  234,  236,  237,  239,  240,  242,
+                /* 0x38 */  244,  245,  246,  248,  250,  251,  252,  254,
+        };
+        b = fls64(a);
+        if (b < 7) {
+                /* a in [0..63] */
+                return ((u32)v[(u32)a] + 35) >> 6;
+        }
+        b = ((b * 84) >> 8) - 1;
+        shift = (a >> (b * 3));
-        /* converges to 32 bits in 3 iterations */
+        x = ((u32)(((u32)v[shift] + 10) << b)) >> 6;
-        x = (2 * x + (u32)div64_64(a, (u64)x*(u64)x)) / 3;
-        x = (2 * x + (u32)div64_64(a, (u64)x*(u64)x)) / 3;
-        x = (2 * x + (u32)div64_64(a, (u64)x*(u64)x)) / 3;
+        /*
+         * Newton-Raphson iteration
+         *                         2
+         * x    = ( 2 * x  +  a / x  ) / 3
+         *  k+1          k         k
+         */
+        x = (2 * x + (u32)div64_64(a, (u64)x * (u64)(x - 1)));
+        x = ((x * 341) >> 10);
        return x;
 }
author	Stephen Hemminger <shemminger@linux-foundation.org>	2007-03-22 15:10:58 -0400
committer	David S. Miller <davem@sunset.davemloft.net>	2007-04-26 01:27:19 -0400
commit	7e58886b45bc4a309aeaa8178ef89ff767daaf7f (patch)
tree	cd375c5d4a6ec5584531992f8bdbcae775659ed6
parent	22b9a0a3a49ab1a856e0853b3f3dd2abd156bd7c (diff)

diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 0e6cdfeb207a..15c580375002 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c
@@ -91,23 +91,51 @@ static void bictcp_init(struct sock *sk)
91	tcp_sk(sk)->snd_ssthresh = initial_ssthresh;	91	tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
92	}	92	}
93		93
94	/*	94	/* calculate the cubic root of x using a table lookup followed by one
95	* calculate the cubic root of x using Newton-Raphson	95	* Newton-Raphson iteration.
		96	* Avg err ~= 0.195%
96	*/	97	*/
97	static u32 cubic_root(u64 a)	98	static u32 cubic_root(u64 a)
98	{	99	{
99	u32 x;	100	u32 x, b, shift;
100		101	/*
101	/* Initial estimate is based on:	102	* cbrt(x) MSB values for x MSB values in [0..63].
102	* cbrt(x) = exp(log(x) / 3)	103	* Precomputed then refined by hand - Willy Tarreau
		104	*
		105	* For x in [0..63],
		106	* v = cbrt(x << 18) - 1
		107	* cbrt(x) = (v[x] + 10) >> 6
103	*/	108	*/
104	x = 1u << (fls64(a)/3);	109	static const u8 v[] = {
		110	/* 0x00 */ 0, 54, 54, 54, 118, 118, 118, 118,
		111	/* 0x08 */ 123, 129, 134, 138, 143, 147, 151, 156,
		112	/* 0x10 */ 157, 161, 164, 168, 170, 173, 176, 179,
		113	/* 0x18 */ 181, 185, 187, 190, 192, 194, 197, 199,
		114	/* 0x20 */ 200, 202, 204, 206, 209, 211, 213, 215,
		115	/* 0x28 */ 217, 219, 221, 222, 224, 225, 227, 229,
		116	/* 0x30 */ 231, 232, 234, 236, 237, 239, 240, 242,
		117	/* 0x38 */ 244, 245, 246, 248, 250, 251, 252, 254,
		118	};
		119
		120	b = fls64(a);
		121	if (b < 7) {
		122	/* a in [0..63] */
		123	return ((u32)v[(u32)a] + 35) >> 6;
		124	}
		125
		126	b = ((b * 84) >> 8) - 1;
		127	shift = (a >> (b * 3));
105		128
106	/* converges to 32 bits in 3 iterations */	129	x = ((u32)(((u32)v[shift] + 10) << b)) >> 6;
107	x = (2 * x + (u32)div64_64(a, (u64)x*(u64)x)) / 3;
108	x = (2 * x + (u32)div64_64(a, (u64)x*(u64)x)) / 3;
109	x = (2 * x + (u32)div64_64(a, (u64)x*(u64)x)) / 3;
110		130
		131	/*
		132	* Newton-Raphson iteration
		133	* 2
		134	* x = ( 2 * x + a / x ) / 3
		135	* k+1 k k
		136	*/
		137	x = (2 * x + (u32)div64_64(a, (u64)x * (u64)(x - 1)));
		138	x = ((x * 341) >> 10);
111	return x;	139	return x;
112	}	140	}
113		141