diff options
author | Stephen Hemminger <shemminger@osdl.org> | 2005-12-21 22:32:36 -0500 |
---|---|---|
committer | David S. Miller <davem@sunset.davemloft.net> | 2006-01-03 16:11:09 -0500 |
commit | 9eb2d627190a8afe4b9276b24615a9559504fa60 (patch) | |
tree | 39479e9d761e53462bd05f979d0ced132bbfcb0a /net/ipv4 | |
parent | 89b3d9aaf46791177c5a5fa07a3ed38a035b5ef5 (diff) |
[TCP] cubic: use Newton-Raphson
Replace cube root algorithim with a faster version using Newton-Raphson.
Surprisingly, doing the scaled div64_64 is faster than a true 64 bit
division on 64 bit CPU's.
Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/tcp_cubic.c | 93 |
1 files changed, 39 insertions, 54 deletions
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 44fd40891acd..31a4986dfbf7 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c | |||
@@ -52,6 +52,7 @@ MODULE_PARM_DESC(bic_scale, "scale (scaled by 1024) value for bic function (bic_ | |||
52 | module_param(tcp_friendliness, int, 0644); | 52 | module_param(tcp_friendliness, int, 0644); |
53 | MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness"); | 53 | MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness"); |
54 | 54 | ||
55 | #include <asm/div64.h> | ||
55 | 56 | ||
56 | /* BIC TCP Parameters */ | 57 | /* BIC TCP Parameters */ |
57 | struct bictcp { | 58 | struct bictcp { |
@@ -93,67 +94,51 @@ static void bictcp_init(struct sock *sk) | |||
93 | tcp_sk(sk)->snd_ssthresh = initial_ssthresh; | 94 | tcp_sk(sk)->snd_ssthresh = initial_ssthresh; |
94 | } | 95 | } |
95 | 96 | ||
96 | /* 65536 times the cubic root */ | 97 | /* 64bit divisor, dividend and result. dynamic precision */ |
97 | static const u64 cubic_table[8] | 98 | static inline u_int64_t div64_64(u_int64_t dividend, u_int64_t divisor) |
98 | = {0, 65536, 82570, 94519, 104030, 112063, 119087, 125367}; | 99 | { |
100 | u_int32_t d = divisor; | ||
101 | |||
102 | if (divisor > 0xffffffffULL) { | ||
103 | unsigned int shift = fls(divisor >> 32); | ||
104 | |||
105 | d = divisor >> shift; | ||
106 | dividend >>= shift; | ||
107 | } | ||
108 | |||
109 | /* avoid 64 bit division if possible */ | ||
110 | if (dividend >> 32) | ||
111 | do_div(dividend, d); | ||
112 | else | ||
113 | dividend = (uint32_t) dividend / d; | ||
114 | |||
115 | return dividend; | ||
116 | } | ||
99 | 117 | ||
100 | /* | 118 | /* |
101 | * calculate the cubic root of x | 119 | * calculate the cubic root of x using Newton-Raphson |
102 | * the basic idea is that x can be expressed as i*8^j | ||
103 | * so cubic_root(x) = cubic_root(i)*2^j | ||
104 | * in the following code, x is i, and y is 2^j | ||
105 | * because of integer calculation, there are errors in calculation | ||
106 | * so finally use binary search to find out the exact solution | ||
107 | */ | 120 | */ |
108 | static u32 cubic_root(u64 x) | 121 | static u32 cubic_root(u64 a) |
109 | { | 122 | { |
110 | u64 y, app, target, start, end, mid, start_diff, end_diff; | 123 | u32 x, x1; |
111 | |||
112 | if (x == 0) | ||
113 | return 0; | ||
114 | 124 | ||
115 | target = x; | 125 | /* Initial estimate is based on: |
116 | 126 | * cbrt(x) = exp(log(x) / 3) | |
117 | /* first estimate lower and upper bound */ | 127 | */ |
118 | y = 1; | 128 | x = 1u << (fls64(a)/3); |
119 | while (x >= 8){ | ||
120 | x = (x >> 3); | ||
121 | y = (y << 1); | ||
122 | } | ||
123 | start = (y*cubic_table[x])>>16; | ||
124 | if (x==7) | ||
125 | end = (y<<1); | ||
126 | else | ||
127 | end = (y*cubic_table[x+1]+65535)>>16; | ||
128 | |||
129 | /* binary search for more accurate one */ | ||
130 | while (start < end-1) { | ||
131 | mid = (start+end) >> 1; | ||
132 | app = mid*mid*mid; | ||
133 | if (app < target) | ||
134 | start = mid; | ||
135 | else if (app > target) | ||
136 | end = mid; | ||
137 | else | ||
138 | return mid; | ||
139 | } | ||
140 | 129 | ||
141 | /* find the most accurate one from start and end */ | 130 | /* |
142 | app = start*start*start; | 131 | * Iteration based on: |
143 | if (app < target) | 132 | * 2 |
144 | start_diff = target - app; | 133 | * x = ( 2 * x + a / x ) / 3 |
145 | else | 134 | * k+1 k k |
146 | start_diff = app - target; | 135 | */ |
147 | app = end*end*end; | 136 | do { |
148 | if (app < target) | 137 | x1 = x; |
149 | end_diff = target - app; | 138 | x = (2 * x + (uint32_t) div64_64(a, x*x)) / 3; |
150 | else | 139 | } while (abs(x1 - x) > 1); |
151 | end_diff = app - target; | ||
152 | 140 | ||
153 | if (start_diff < end_diff) | 141 | return x; |
154 | return (u32)start; | ||
155 | else | ||
156 | return (u32)end; | ||
157 | } | 142 | } |
158 | 143 | ||
159 | /* | 144 | /* |