aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_cubic.c
diff options
context:
space:
mode:
authorStephen Hemminger <shemminger@osdl.org>2005-12-21 22:32:36 -0500
committerDavid S. Miller <davem@sunset.davemloft.net>2006-01-03 16:11:09 -0500
commit9eb2d627190a8afe4b9276b24615a9559504fa60 (patch)
tree39479e9d761e53462bd05f979d0ced132bbfcb0a /net/ipv4/tcp_cubic.c
parent89b3d9aaf46791177c5a5fa07a3ed38a035b5ef5 (diff)
[TCP] cubic: use Newton-Raphson
Replace cube root algorithim with a faster version using Newton-Raphson. Surprisingly, doing the scaled div64_64 is faster than a true 64 bit division on 64 bit CPU's. Signed-off-by: Stephen Hemminger <shemminger@osdl.org> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_cubic.c')
-rw-r--r--net/ipv4/tcp_cubic.c93
1 files changed, 39 insertions, 54 deletions
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 44fd40891acd..31a4986dfbf7 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -52,6 +52,7 @@ MODULE_PARM_DESC(bic_scale, "scale (scaled by 1024) value for bic function (bic_
52module_param(tcp_friendliness, int, 0644); 52module_param(tcp_friendliness, int, 0644);
53MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness"); 53MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness");
54 54
55#include <asm/div64.h>
55 56
56/* BIC TCP Parameters */ 57/* BIC TCP Parameters */
57struct bictcp { 58struct bictcp {
@@ -93,67 +94,51 @@ static void bictcp_init(struct sock *sk)
93 tcp_sk(sk)->snd_ssthresh = initial_ssthresh; 94 tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
94} 95}
95 96
96/* 65536 times the cubic root */ 97/* 64bit divisor, dividend and result. dynamic precision */
97static const u64 cubic_table[8] 98static inline u_int64_t div64_64(u_int64_t dividend, u_int64_t divisor)
98 = {0, 65536, 82570, 94519, 104030, 112063, 119087, 125367}; 99{
100 u_int32_t d = divisor;
101
102 if (divisor > 0xffffffffULL) {
103 unsigned int shift = fls(divisor >> 32);
104
105 d = divisor >> shift;
106 dividend >>= shift;
107 }
108
109 /* avoid 64 bit division if possible */
110 if (dividend >> 32)
111 do_div(dividend, d);
112 else
113 dividend = (uint32_t) dividend / d;
114
115 return dividend;
116}
99 117
100/* 118/*
101 * calculate the cubic root of x 119 * calculate the cubic root of x using Newton-Raphson
102 * the basic idea is that x can be expressed as i*8^j
103 * so cubic_root(x) = cubic_root(i)*2^j
104 * in the following code, x is i, and y is 2^j
105 * because of integer calculation, there are errors in calculation
106 * so finally use binary search to find out the exact solution
107 */ 120 */
108static u32 cubic_root(u64 x) 121static u32 cubic_root(u64 a)
109{ 122{
110 u64 y, app, target, start, end, mid, start_diff, end_diff; 123 u32 x, x1;
111
112 if (x == 0)
113 return 0;
114 124
115 target = x; 125 /* Initial estimate is based on:
116 126 * cbrt(x) = exp(log(x) / 3)
117 /* first estimate lower and upper bound */ 127 */
118 y = 1; 128 x = 1u << (fls64(a)/3);
119 while (x >= 8){
120 x = (x >> 3);
121 y = (y << 1);
122 }
123 start = (y*cubic_table[x])>>16;
124 if (x==7)
125 end = (y<<1);
126 else
127 end = (y*cubic_table[x+1]+65535)>>16;
128
129 /* binary search for more accurate one */
130 while (start < end-1) {
131 mid = (start+end) >> 1;
132 app = mid*mid*mid;
133 if (app < target)
134 start = mid;
135 else if (app > target)
136 end = mid;
137 else
138 return mid;
139 }
140 129
141 /* find the most accurate one from start and end */ 130 /*
142 app = start*start*start; 131 * Iteration based on:
143 if (app < target) 132 * 2
144 start_diff = target - app; 133 * x = ( 2 * x + a / x ) / 3
145 else 134 * k+1 k k
146 start_diff = app - target; 135 */
147 app = end*end*end; 136 do {
148 if (app < target) 137 x1 = x;
149 end_diff = target - app; 138 x = (2 * x + (uint32_t) div64_64(a, x*x)) / 3;
150 else 139 } while (abs(x1 - x) > 1);
151 end_diff = app - target;
152 140
153 if (start_diff < end_diff) 141 return x;
154 return (u32)start;
155 else
156 return (u32)end;
157} 142}
158 143
159/* 144/*