aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorStephen Hemminger <shemminger@linux-foundation.org>2007-03-22 15:10:58 -0400
committerDavid S. Miller <davem@sunset.davemloft.net>2007-04-26 01:27:19 -0400
commit7e58886b45bc4a309aeaa8178ef89ff767daaf7f (patch)
treecd375c5d4a6ec5584531992f8bdbcae775659ed6
parent22b9a0a3a49ab1a856e0853b3f3dd2abd156bd7c (diff)
[TCP]: cubic optimization
Use willy's work in optimizing cube root by having table for small values. Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--net/ipv4/tcp_cubic.c50
1 files changed, 39 insertions, 11 deletions
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 0e6cdfeb207a..15c580375002 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -91,23 +91,51 @@ static void bictcp_init(struct sock *sk)
91 tcp_sk(sk)->snd_ssthresh = initial_ssthresh; 91 tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
92} 92}
93 93
94/* 94/* calculate the cubic root of x using a table lookup followed by one
95 * calculate the cubic root of x using Newton-Raphson 95 * Newton-Raphson iteration.
96 * Avg err ~= 0.195%
96 */ 97 */
97static u32 cubic_root(u64 a) 98static u32 cubic_root(u64 a)
98{ 99{
99 u32 x; 100 u32 x, b, shift;
100 101 /*
101 /* Initial estimate is based on: 102 * cbrt(x) MSB values for x MSB values in [0..63].
102 * cbrt(x) = exp(log(x) / 3) 103 * Precomputed then refined by hand - Willy Tarreau
104 *
105 * For x in [0..63],
106 * v = cbrt(x << 18) - 1
107 * cbrt(x) = (v[x] + 10) >> 6
103 */ 108 */
104 x = 1u << (fls64(a)/3); 109 static const u8 v[] = {
110 /* 0x00 */ 0, 54, 54, 54, 118, 118, 118, 118,
111 /* 0x08 */ 123, 129, 134, 138, 143, 147, 151, 156,
112 /* 0x10 */ 157, 161, 164, 168, 170, 173, 176, 179,
113 /* 0x18 */ 181, 185, 187, 190, 192, 194, 197, 199,
114 /* 0x20 */ 200, 202, 204, 206, 209, 211, 213, 215,
115 /* 0x28 */ 217, 219, 221, 222, 224, 225, 227, 229,
116 /* 0x30 */ 231, 232, 234, 236, 237, 239, 240, 242,
117 /* 0x38 */ 244, 245, 246, 248, 250, 251, 252, 254,
118 };
119
120 b = fls64(a);
121 if (b < 7) {
122 /* a in [0..63] */
123 return ((u32)v[(u32)a] + 35) >> 6;
124 }
125
126 b = ((b * 84) >> 8) - 1;
127 shift = (a >> (b * 3));
105 128
106 /* converges to 32 bits in 3 iterations */ 129 x = ((u32)(((u32)v[shift] + 10) << b)) >> 6;
107 x = (2 * x + (u32)div64_64(a, (u64)x*(u64)x)) / 3;
108 x = (2 * x + (u32)div64_64(a, (u64)x*(u64)x)) / 3;
109 x = (2 * x + (u32)div64_64(a, (u64)x*(u64)x)) / 3;
110 130
131 /*
132 * Newton-Raphson iteration
133 * 2
134 * x = ( 2 * x + a / x ) / 3
135 * k+1 k k
136 */
137 x = (2 * x + (u32)div64_64(a, (u64)x * (u64)(x - 1)));
138 x = ((x * 341) >> 10);
111 return x; 139 return x;
112} 140}
113 141