diff options
author | Stephen Hemminger <shemminger@linux-foundation.org> | 2007-03-22 15:10:58 -0400 |
---|---|---|
committer | David S. Miller <davem@sunset.davemloft.net> | 2007-04-26 01:27:19 -0400 |
commit | 7e58886b45bc4a309aeaa8178ef89ff767daaf7f (patch) | |
tree | cd375c5d4a6ec5584531992f8bdbcae775659ed6 | |
parent | 22b9a0a3a49ab1a856e0853b3f3dd2abd156bd7c (diff) |
[TCP]: cubic optimization
Use willy's work in optimizing cube root by having table for small values.
Signed-off-by: Stephen Hemminger <shemminger@linux-foundation.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | net/ipv4/tcp_cubic.c | 50 |
1 files changed, 39 insertions, 11 deletions
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 0e6cdfeb207a..15c580375002 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c | |||
@@ -91,23 +91,51 @@ static void bictcp_init(struct sock *sk) | |||
91 | tcp_sk(sk)->snd_ssthresh = initial_ssthresh; | 91 | tcp_sk(sk)->snd_ssthresh = initial_ssthresh; |
92 | } | 92 | } |
93 | 93 | ||
94 | /* | 94 | /* calculate the cubic root of x using a table lookup followed by one |
95 | * calculate the cubic root of x using Newton-Raphson | 95 | * Newton-Raphson iteration. |
96 | * Avg err ~= 0.195% | ||
96 | */ | 97 | */ |
97 | static u32 cubic_root(u64 a) | 98 | static u32 cubic_root(u64 a) |
98 | { | 99 | { |
99 | u32 x; | 100 | u32 x, b, shift; |
100 | 101 | /* | |
101 | /* Initial estimate is based on: | 102 | * cbrt(x) MSB values for x MSB values in [0..63]. |
102 | * cbrt(x) = exp(log(x) / 3) | 103 | * Precomputed then refined by hand - Willy Tarreau |
104 | * | ||
105 | * For x in [0..63], | ||
106 | * v = cbrt(x << 18) - 1 | ||
107 | * cbrt(x) = (v[x] + 10) >> 6 | ||
103 | */ | 108 | */ |
104 | x = 1u << (fls64(a)/3); | 109 | static const u8 v[] = { |
110 | /* 0x00 */ 0, 54, 54, 54, 118, 118, 118, 118, | ||
111 | /* 0x08 */ 123, 129, 134, 138, 143, 147, 151, 156, | ||
112 | /* 0x10 */ 157, 161, 164, 168, 170, 173, 176, 179, | ||
113 | /* 0x18 */ 181, 185, 187, 190, 192, 194, 197, 199, | ||
114 | /* 0x20 */ 200, 202, 204, 206, 209, 211, 213, 215, | ||
115 | /* 0x28 */ 217, 219, 221, 222, 224, 225, 227, 229, | ||
116 | /* 0x30 */ 231, 232, 234, 236, 237, 239, 240, 242, | ||
117 | /* 0x38 */ 244, 245, 246, 248, 250, 251, 252, 254, | ||
118 | }; | ||
119 | |||
120 | b = fls64(a); | ||
121 | if (b < 7) { | ||
122 | /* a in [0..63] */ | ||
123 | return ((u32)v[(u32)a] + 35) >> 6; | ||
124 | } | ||
125 | |||
126 | b = ((b * 84) >> 8) - 1; | ||
127 | shift = (a >> (b * 3)); | ||
105 | 128 | ||
106 | /* converges to 32 bits in 3 iterations */ | 129 | x = ((u32)(((u32)v[shift] + 10) << b)) >> 6; |
107 | x = (2 * x + (u32)div64_64(a, (u64)x*(u64)x)) / 3; | ||
108 | x = (2 * x + (u32)div64_64(a, (u64)x*(u64)x)) / 3; | ||
109 | x = (2 * x + (u32)div64_64(a, (u64)x*(u64)x)) / 3; | ||
110 | 130 | ||
131 | /* | ||
132 | * Newton-Raphson iteration | ||
133 | * 2 | ||
134 | * x = ( 2 * x + a / x ) / 3 | ||
135 | * k+1 k k | ||
136 | */ | ||
137 | x = (2 * x + (u32)div64_64(a, (u64)x * (u64)(x - 1))); | ||
138 | x = ((x * 341) >> 10); | ||
111 | return x; | 139 | return x; |
112 | } | 140 | } |
113 | 141 | ||