1 files changed, 39 insertions, 54 deletions
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 44fd40891acd..31a4986dfbf7 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -52,6 +52,7 @@ MODULE_PARM_DESC(bic_scale, "scale (scaled by 1024) value for bic function (bic_
 module_param(tcp_friendliness, int, 0644);
 MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness");
+#include <asm/div64.h>
 /* BIC TCP Parameters */
 struct bictcp {
@@ -93,67 +94,51 @@ static void bictcp_init(struct sock *sk)
                tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
 }
-/* 65536 times the cubic root */
+/* 64bit divisor, dividend and result. dynamic precision */
-static const u64 cubic_table[8]
+static inline u_int64_t div64_64(u_int64_t dividend, u_int64_t divisor)
-        = {0, 65536, 82570, 94519, 104030, 112063, 119087, 125367};
+{
+        u_int32_t d = divisor;
+        if (divisor > 0xffffffffULL) {
+                unsigned int shift = fls(divisor >> 32);
+                d = divisor >> shift;
+                dividend >>= shift;
+        }
+        /* avoid 64 bit division if possible */
+        if (dividend >> 32)
+                do_div(dividend, d);
+        else
+                dividend = (uint32_t) dividend / d;
+        return dividend;
+}
 /*
- * calculate the cubic root of x
+ * calculate the cubic root of x using Newton-Raphson
- * the basic idea is that x can be expressed as i*8^j
- * so cubic_root(x) = cubic_root(i)*2^j
- *  in the following code, x is i, and y is 2^j
- *  because of integer calculation, there are errors in calculation
- *  so finally use binary search to find out the exact solution
 */
-static u32 cubic_root(u64 x)
+static u32 cubic_root(u64 a)
 {
-        u64 y, app, target, start, end, mid, start_diff, end_diff;
+        u32 x, x1;
-        if (x == 0)
-                return 0;
-        target = x;
+        /* Initial estimate is based on:
+         * cbrt(x) = exp(log(x) / 3)
-        /* first estimate lower and upper bound */
+         */
-        y = 1;
+        x = 1u << (fls64(a)/3);
-        while (x >= 8){
-                x = (x >> 3);
-                y = (y << 1);
-        }
-        start = (y*cubic_table[x])>>16;
-        if (x==7)
-                end = (y<<1);
-        else
-                end = (y*cubic_table[x+1]+65535)>>16;
-        /* binary search for more accurate one */
-        while (start < end-1) {
-                mid = (start+end) >> 1;
-                app = mid*mid*mid;
-                if (app < target)
-                        start = mid;
-                else if (app > target)
-                        end = mid;
-                else
-                        return mid;
-        }
-        /* find the most accurate one from start and end */
+        /*
-        app = start*start*start;
+         * Iteration based on:
-        if (app < target)
+         *                         2
-                start_diff = target - app;
+         * x    = ( 2 * x  +  a / x  ) / 3
-        else
+         *  k+1          k         k
-                start_diff = app - target;
+         */
-        app = end*end*end;
+        do {
-        if (app < target)
+                x1 = x;
-                end_diff = target - app;
+                x = (2 * x + (uint32_t) div64_64(a, x*x)) / 3;
-        else
+        } while (abs(x1 - x) > 1);
-                end_diff = app - target;
-        if (start_diff < end_diff)
+        return x;
-                return (u32)start;
-        else
-                return (u32)end;
 }
 /*

diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 44fd40891acd..31a4986dfbf7 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c
@@ -52,6 +52,7 @@ MODULE_PARM_DESC(bic_scale, "scale (scaled by 1024) value for bic function (bic_
52	module_param(tcp_friendliness, int, 0644);	52	module_param(tcp_friendliness, int, 0644);
53	MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness");	53	MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness");
54		54
		55	#include <asm/div64.h>
55		56
56	/* BIC TCP Parameters */	57	/* BIC TCP Parameters */
57	struct bictcp {	58	struct bictcp {
@@ -93,67 +94,51 @@ static void bictcp_init(struct sock *sk)
93	tcp_sk(sk)->snd_ssthresh = initial_ssthresh;	94	tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
94	}	95	}
95		96
96	/* 65536 times the cubic root */	97	/* 64bit divisor, dividend and result. dynamic precision */
97	static const u64 cubic_table[8]	98	static inline u_int64_t div64_64(u_int64_t dividend, u_int64_t divisor)
98	= {0, 65536, 82570, 94519, 104030, 112063, 119087, 125367};	99	{
		100	u_int32_t d = divisor;
		101
		102	if (divisor > 0xffffffffULL) {
		103	unsigned int shift = fls(divisor >> 32);
		104
		105	d = divisor >> shift;
		106	dividend >>= shift;
		107	}
		108
		109	/* avoid 64 bit division if possible */
		110	if (dividend >> 32)
		111	do_div(dividend, d);
		112	else
		113	dividend = (uint32_t) dividend / d;
		114
		115	return dividend;
		116	}
99		117
100	/*	118	/*
101	* calculate the cubic root of x	119	* calculate the cubic root of x using Newton-Raphson
102	* the basic idea is that x can be expressed as i*8^j
103	* so cubic_root(x) = cubic_root(i)*2^j
104	* in the following code, x is i, and y is 2^j
105	* because of integer calculation, there are errors in calculation
106	* so finally use binary search to find out the exact solution
107	*/	120	*/
108	static u32 cubic_root(u64 x)	121	static u32 cubic_root(u64 a)
109	{	122	{
110	u64 y, app, target, start, end, mid, start_diff, end_diff;	123	u32 x, x1;
111
112	if (x == 0)
113	return 0;
114		124
115	target = x;	125	/* Initial estimate is based on:
116		126	* cbrt(x) = exp(log(x) / 3)
117	/* first estimate lower and upper bound */	127	*/
118	y = 1;	128	x = 1u << (fls64(a)/3);
119	while (x >= 8){
120	x = (x >> 3);
121	y = (y << 1);
122	}
123	start = (y*cubic_table[x])>>16;
124	if (x==7)
125	end = (y<<1);
126	else
127	end = (y*cubic_table[x+1]+65535)>>16;
128
129	/* binary search for more accurate one */
130	while (start < end-1) {
131	mid = (start+end) >> 1;
132	app = midmidmid;
133	if (app < target)
134	start = mid;
135	else if (app > target)
136	end = mid;
137	else
138	return mid;
139	}
140		129
141	/* find the most accurate one from start and end */	130	/*
142	app = startstartstart;	131	* Iteration based on:
143	if (app < target)	132	* 2
144	start_diff = target - app;	133	* x = ( 2 * x + a / x ) / 3
145	else	134	* k+1 k k
146	start_diff = app - target;	135	*/
147	app = endendend;	136	do {
148	if (app < target)	137	x1 = x;
149	end_diff = target - app;	138	x = (2 * x + (uint32_t) div64_64(a, x*x)) / 3;
150	else	139	} while (abs(x1 - x) > 1);
151	end_diff = app - target;
152		140
153	if (start_diff < end_diff)	141	return x;
154	return (u32)start;
155	else
156	return (u32)end;
157	}	142	}
158		143
159	/*	144	/*