1 files changed, 57 insertions, 76 deletions
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index bb5dc4bfb6b6..44fd40891acd 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -16,7 +16,7 @@
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <net/tcp.h>
+#include <asm/div64.h>
 #define BICTCP_BETA_SCALE    1024       /* Scale factor beta calculation
                                         * max_cwnd = snd_cwnd * beta
@@ -34,15 +34,20 @@ static int initial_ssthresh = 100;
 static int bic_scale = 41;
 static int tcp_friendliness = 1;
+static u32 cube_rtt_scale;
+static u32 beta_scale;
+static u64 cube_factor;
+/* Note parameters that are used for precomputing scale factors are read-only */
 module_param(fast_convergence, int, 0644);
 MODULE_PARM_DESC(fast_convergence, "turn on/off fast convergence");
 module_param(max_increment, int, 0644);
 MODULE_PARM_DESC(max_increment, "Limit on increment allowed during binary search");
-module_param(beta, int, 0644);
+module_param(beta, int, 0444);
 MODULE_PARM_DESC(beta, "beta for multiplicative increase");
 module_param(initial_ssthresh, int, 0644);
 MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold");
-module_param(bic_scale, int, 0644);
+module_param(bic_scale, int, 0444);
 MODULE_PARM_DESC(bic_scale, "scale (scaled by 1024) value for bic function (bic_scale/1024)");
 module_param(tcp_friendliness, int, 0644);
 MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness");
@@ -151,65 +156,13 @@ static u32 cubic_root(u64 x)
                return (u32)end;
 }
-static inline u32 bictcp_K(u32 dist, u32 srtt)
-{
-        u64 d64;
-        u32 d32;
-        u32 count;
-        u32 result;
-        /* calculate the "K" for (wmax-cwnd) = c/rtt * K^3
-           so K = cubic_root( (wmax-cwnd)*rtt/c )
-           the unit of K is bictcp_HZ=2^10, not HZ
-           c = bic_scale >> 10
-           rtt = (tp->srtt >> 3 ) / HZ
-           the following code has been designed and tested for
-           cwnd < 1 million packets
-           RTT < 100 seconds
-           HZ < 1,000,00  (corresponding to 10 nano-second)
-        */
-        /* 1/c * 2^2*bictcp_HZ */
-        d32 = (1 << (10+2*BICTCP_HZ)) / bic_scale;
-        d64 = (__u64)d32;
-        /* srtt * 2^count / HZ
-           1) to get a better accuracy of the following d32,
-           the larger the "count", the better the accuracy
-           2) and avoid overflow of the following d64
-           the larger the "count", the high possibility of overflow
-           3) so find a "count" between bictcp_hz-3 and bictcp_hz
-           "count" may be less than bictcp_HZ,
-           then d64 becomes 0. that is OK
-        */
-        d32 = srtt;
-        count = 0;
-        while (((d32 & 0x80000000)==0) && (count < BICTCP_HZ)){
-                d32 = d32 << 1;
-                count++;
-        }
-        d32 = d32 / HZ;
-        /* (wmax-cwnd) * (srtt>>3 / HZ) / c * 2^(3*bictcp_HZ)  */
-        d64 = (d64 * dist * d32) >> (count+3-BICTCP_HZ);
-        /* cubic root */
-        d64 = cubic_root(d64);
-        result = (u32)d64;
-        return result;
-}
 /*
 * Compute congestion window to use.
 */
 static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
 {
-        u64 d64;
+        u64 offs;
-        u32 d32, t, srtt, bic_target, min_cnt, max_cnt;
+        u32 delta, t, bic_target, min_cnt, max_cnt;
        ca->ack_cnt++;  /* count the number of ACKs */
@@ -220,8 +173,6 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
        ca->last_cwnd = cwnd;
        ca->last_time = tcp_time_stamp;
-        srtt = (HZ << 3)/10;    /* use real time-based growth function */
        if (ca->epoch_start == 0) {
                ca->epoch_start = tcp_time_stamp;       /* record the beginning of an epoch */
                ca->ack_cnt = 1;                        /* start counting */
@@ -231,7 +182,11 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
                        ca->bic_K = 0;
                        ca->bic_origin_point = cwnd;
                } else {
-                        ca->bic_K = bictcp_K(ca->last_max_cwnd-cwnd, srtt);
+                        /* Compute new K based on
+                         * (wmax-cwnd) * (srtt>>3 / HZ) / c * 2^(3*bictcp_HZ)
+                         */
+                        ca->bic_K = cubic_root(cube_factor
+                                               * (ca->last_max_cwnd - cwnd));
                        ca->bic_origin_point = ca->last_max_cwnd;
                }
        }
@@ -239,9 +194,9 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
        /* cubic function - calc*/
        /* calculate c * time^3 / rtt,
         *  while considering overflow in calculation of time^3
-         * (so time^3 is done by using d64)
+         * (so time^3 is done by using 64 bit)
         * and without the support of division of 64bit numbers
-         * (so all divisions are done by using d32)
+         * (so all divisions are done by using 32 bit)
         *  also NOTE the unit of those veriables
         *        time  = (t - K) / 2^bictcp_HZ
         *        c = bic_scale >> 10
@@ -255,18 +210,16 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
             << BICTCP_HZ) / HZ;
        if (t < ca->bic_K)              /* t - K */
-                d32 = ca->bic_K - t;
+                offs = ca->bic_K - t;
        else
-                d32 = t - ca->bic_K;
+                offs = t - ca->bic_K;
-        d64 = (u64)d32;
+        /* c/rtt * (t-K)^3 */
-        d32 = (bic_scale << 3) * HZ / srtt;                     /* 1024*c/rtt */
+        delta = (cube_rtt_scale * offs * offs * offs) >> (10+3*BICTCP_HZ);
-        d64 = (d32 * d64 * d64 * d64) >> (10+3*BICTCP_HZ);      /* c/rtt * (t-K)^3 */
-        d32 = (u32)d64;
        if (t < ca->bic_K)                                      /* below origin*/
-                bic_target = ca->bic_origin_point - d32;
+                bic_target = ca->bic_origin_point - delta;
        else                                                    /* above origin*/
-                bic_target = ca->bic_origin_point + d32;
+                bic_target = ca->bic_origin_point + delta;
        /* cubic function - calc bictcp_cnt*/
        if (bic_target > cwnd) {
@@ -288,16 +241,16 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
        /* TCP Friendly */
        if (tcp_friendliness) {
-                u32 scale = 8*(BICTCP_BETA_SCALE+beta)/3/(BICTCP_BETA_SCALE-beta);
+                u32 scale = beta_scale;
-                d32 = (cwnd * scale) >> 3;
+                delta = (cwnd * scale) >> 3;
-                while (ca->ack_cnt > d32) {             /* update tcp cwnd */
+                while (ca->ack_cnt > delta) {           /* update tcp cwnd */
-                        ca->ack_cnt -= d32;
+                        ca->ack_cnt -= delta;
                        ca->tcp_cwnd++;
                }
                if (ca->tcp_cwnd > cwnd){       /* if bic is slower than tcp */
-                        d32 = ca->tcp_cwnd - cwnd;
+                        delta = ca->tcp_cwnd - cwnd;
-                        max_cnt = cwnd / d32;
+                        max_cnt = cwnd / delta;
                        if (ca->cnt > max_cnt)
                                ca->cnt = max_cnt;
                }
@@ -428,6 +381,34 @@ static struct tcp_congestion_ops cubictcp = {
 static int __init cubictcp_register(void)
 {
        BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE);
+        /* Precompute a bunch of the scaling factors that are used per-packet
+         * based on SRTT of 100ms
+         */
+        beta_scale = 8*(BICTCP_BETA_SCALE+beta)/ 3 / (BICTCP_BETA_SCALE - beta);
+        cube_rtt_scale = (bic_scale << 3) / 10; /* 1024*c/rtt */
+        /* calculate the "K" for (wmax-cwnd) = c/rtt * K^3
+         *  so K = cubic_root( (wmax-cwnd)*rtt/c )
+         * the unit of K is bictcp_HZ=2^10, not HZ
+         *
+         *  c = bic_scale >> 10
+         *  rtt = 100ms
+         *
+         * the following code has been designed and tested for
+         * cwnd < 1 million packets
+         * RTT < 100 seconds
+         * HZ < 1,000,00  (corresponding to 10 nano-second)
+         */
+        /* 1/c * 2^2*bictcp_HZ * srtt */
+        cube_factor = 1ull << (10+3*BICTCP_HZ); /* 2^40 */
+        /* divide by bic_scale and by constant Srtt (100ms) */
+        do_div(cube_factor, bic_scale * 10);
        return tcp_register_congestion_control(&cubictcp);
 }

diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index bb5dc4bfb6b6..44fd40891acd 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c
@@ -16,7 +16,7 @@
16	#include <linux/mm.h>	16	#include <linux/mm.h>
17	#include <linux/module.h>	17	#include <linux/module.h>
18	#include <net/tcp.h>	18	#include <net/tcp.h>
19		19	#include <asm/div64.h>
20		20
21	#define BICTCP_BETA_SCALE 1024 /* Scale factor beta calculation	21	#define BICTCP_BETA_SCALE 1024 /* Scale factor beta calculation
22	* max_cwnd = snd_cwnd * beta	22	* max_cwnd = snd_cwnd * beta
@@ -34,15 +34,20 @@ static int initial_ssthresh = 100;
34	static int bic_scale = 41;	34	static int bic_scale = 41;
35	static int tcp_friendliness = 1;	35	static int tcp_friendliness = 1;
36		36
		37	static u32 cube_rtt_scale;
		38	static u32 beta_scale;
		39	static u64 cube_factor;
		40
		41	/* Note parameters that are used for precomputing scale factors are read-only */
37	module_param(fast_convergence, int, 0644);	42	module_param(fast_convergence, int, 0644);
38	MODULE_PARM_DESC(fast_convergence, "turn on/off fast convergence");	43	MODULE_PARM_DESC(fast_convergence, "turn on/off fast convergence");
39	module_param(max_increment, int, 0644);	44	module_param(max_increment, int, 0644);
40	MODULE_PARM_DESC(max_increment, "Limit on increment allowed during binary search");	45	MODULE_PARM_DESC(max_increment, "Limit on increment allowed during binary search");
41	module_param(beta, int, 0644);	46	module_param(beta, int, 0444);
42	MODULE_PARM_DESC(beta, "beta for multiplicative increase");	47	MODULE_PARM_DESC(beta, "beta for multiplicative increase");
43	module_param(initial_ssthresh, int, 0644);	48	module_param(initial_ssthresh, int, 0644);
44	MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold");	49	MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold");
45	module_param(bic_scale, int, 0644);	50	module_param(bic_scale, int, 0444);
46	MODULE_PARM_DESC(bic_scale, "scale (scaled by 1024) value for bic function (bic_scale/1024)");	51	MODULE_PARM_DESC(bic_scale, "scale (scaled by 1024) value for bic function (bic_scale/1024)");
47	module_param(tcp_friendliness, int, 0644);	52	module_param(tcp_friendliness, int, 0644);
48	MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness");	53	MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness");
@@ -151,65 +156,13 @@ static u32 cubic_root(u64 x)
151	return (u32)end;	156	return (u32)end;
152	}	157	}
153		158
154	static inline u32 bictcp_K(u32 dist, u32 srtt)
155	{
156	u64 d64;
157	u32 d32;
158	u32 count;
159	u32 result;
160
161	/* calculate the "K" for (wmax-cwnd) = c/rtt * K^3
162	so K = cubic_root( (wmax-cwnd)*rtt/c )
163	the unit of K is bictcp_HZ=2^10, not HZ
164
165	c = bic_scale >> 10
166	rtt = (tp->srtt >> 3 ) / HZ
167
168	the following code has been designed and tested for
169	cwnd < 1 million packets
170	RTT < 100 seconds
171	HZ < 1,000,00 (corresponding to 10 nano-second)
172
173	*/
174
175	/* 1/c * 2^2bictcp_HZ /
176	d32 = (1 << (10+2*BICTCP_HZ)) / bic_scale;
177	d64 = (__u64)d32;
178
179	/* srtt * 2^count / HZ
180	1) to get a better accuracy of the following d32,
181	the larger the "count", the better the accuracy
182	2) and avoid overflow of the following d64
183	the larger the "count", the high possibility of overflow
184	3) so find a "count" between bictcp_hz-3 and bictcp_hz
185	"count" may be less than bictcp_HZ,
186	then d64 becomes 0. that is OK
187	*/
188	d32 = srtt;
189	count = 0;
190	while (((d32 & 0x80000000)==0) && (count < BICTCP_HZ)){
191	d32 = d32 << 1;
192	count++;
193	}
194	d32 = d32 / HZ;
195
196	/* (wmax-cwnd) * (srtt>>3 / HZ) / c * 2^(3bictcp_HZ) /
197	d64 = (d64 * dist * d32) >> (count+3-BICTCP_HZ);
198
199	/* cubic root */
200	d64 = cubic_root(d64);
201
202	result = (u32)d64;
203	return result;
204	}
205
206	/*	159	/*
207	* Compute congestion window to use.	160	* Compute congestion window to use.
208	*/	161	*/
209	static inline void bictcp_update(struct bictcp *ca, u32 cwnd)	162	static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
210	{	163	{
211	u64 d64;	164	u64 offs;
212	u32 d32, t, srtt, bic_target, min_cnt, max_cnt;	165	u32 delta, t, bic_target, min_cnt, max_cnt;
213		166
214	ca->ack_cnt++; /* count the number of ACKs */	167	ca->ack_cnt++; /* count the number of ACKs */
215		168
@@ -220,8 +173,6 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
220	ca->last_cwnd = cwnd;	173	ca->last_cwnd = cwnd;
221	ca->last_time = tcp_time_stamp;	174	ca->last_time = tcp_time_stamp;
222		175
223	srtt = (HZ << 3)/10; /* use real time-based growth function */
224
225	if (ca->epoch_start == 0) {	176	if (ca->epoch_start == 0) {
226	ca->epoch_start = tcp_time_stamp; /* record the beginning of an epoch */	177	ca->epoch_start = tcp_time_stamp; /* record the beginning of an epoch */
227	ca->ack_cnt = 1; /* start counting */	178	ca->ack_cnt = 1; /* start counting */
@@ -231,7 +182,11 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
231	ca->bic_K = 0;	182	ca->bic_K = 0;
232	ca->bic_origin_point = cwnd;	183	ca->bic_origin_point = cwnd;
233	} else {	184	} else {
234	ca->bic_K = bictcp_K(ca->last_max_cwnd-cwnd, srtt);	185	/* Compute new K based on
		186	* (wmax-cwnd) * (srtt>>3 / HZ) / c * 2^(3*bictcp_HZ)
		187	*/
		188	ca->bic_K = cubic_root(cube_factor
		189	* (ca->last_max_cwnd - cwnd));
235	ca->bic_origin_point = ca->last_max_cwnd;	190	ca->bic_origin_point = ca->last_max_cwnd;
236	}	191	}
237	}	192	}
@@ -239,9 +194,9 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
239	/* cubic function - calc*/	194	/* cubic function - calc*/
240	/* calculate c * time^3 / rtt,	195	/* calculate c * time^3 / rtt,
241	* while considering overflow in calculation of time^3	196	* while considering overflow in calculation of time^3
242	* (so time^3 is done by using d64)	197	* (so time^3 is done by using 64 bit)
243	* and without the support of division of 64bit numbers	198	* and without the support of division of 64bit numbers
244	* (so all divisions are done by using d32)	199	* (so all divisions are done by using 32 bit)
245	* also NOTE the unit of those veriables	200	* also NOTE the unit of those veriables
246	* time = (t - K) / 2^bictcp_HZ	201	* time = (t - K) / 2^bictcp_HZ
247	* c = bic_scale >> 10	202	* c = bic_scale >> 10
@@ -255,18 +210,16 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
255	<< BICTCP_HZ) / HZ;	210	<< BICTCP_HZ) / HZ;
256		211
257	if (t < ca->bic_K) /* t - K */	212	if (t < ca->bic_K) /* t - K */
258	d32 = ca->bic_K - t;	213	offs = ca->bic_K - t;
259	else	214	else
260	d32 = t - ca->bic_K;	215	offs = t - ca->bic_K;
261		216
262	d64 = (u64)d32;	217	/* c/rtt * (t-K)^3 */
263	d32 = (bic_scale << 3) * HZ / srtt; /* 1024c/rtt /	218	delta = (cube_rtt_scale * offs * offs * offs) >> (10+3*BICTCP_HZ);
264	d64 = (d32 * d64 * d64 * d64) >> (10+3BICTCP_HZ); / c/rtt * (t-K)^3 */
265	d32 = (u32)d64;
266	if (t < ca->bic_K) /* below origin*/	219	if (t < ca->bic_K) /* below origin*/
267	bic_target = ca->bic_origin_point - d32;	220	bic_target = ca->bic_origin_point - delta;
268	else /* above origin*/	221	else /* above origin*/
269	bic_target = ca->bic_origin_point + d32;	222	bic_target = ca->bic_origin_point + delta;
270		223
271	/* cubic function - calc bictcp_cnt*/	224	/* cubic function - calc bictcp_cnt*/
272	if (bic_target > cwnd) {	225	if (bic_target > cwnd) {
@@ -288,16 +241,16 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
288		241
289	/* TCP Friendly */	242	/* TCP Friendly */
290	if (tcp_friendliness) {	243	if (tcp_friendliness) {
291	u32 scale = 8*(BICTCP_BETA_SCALE+beta)/3/(BICTCP_BETA_SCALE-beta);	244	u32 scale = beta_scale;
292	d32 = (cwnd * scale) >> 3;	245	delta = (cwnd * scale) >> 3;
293	while (ca->ack_cnt > d32) { /* update tcp cwnd */	246	while (ca->ack_cnt > delta) { /* update tcp cwnd */
294	ca->ack_cnt -= d32;	247	ca->ack_cnt -= delta;
295	ca->tcp_cwnd++;	248	ca->tcp_cwnd++;
296	}	249	}
297		250
298	if (ca->tcp_cwnd > cwnd){ /* if bic is slower than tcp */	251	if (ca->tcp_cwnd > cwnd){ /* if bic is slower than tcp */
299	d32 = ca->tcp_cwnd - cwnd;	252	delta = ca->tcp_cwnd - cwnd;
300	max_cnt = cwnd / d32;	253	max_cnt = cwnd / delta;
301	if (ca->cnt > max_cnt)	254	if (ca->cnt > max_cnt)
302	ca->cnt = max_cnt;	255	ca->cnt = max_cnt;
303	}	256	}
@@ -428,6 +381,34 @@ static struct tcp_congestion_ops cubictcp = {
428	static int __init cubictcp_register(void)	381	static int __init cubictcp_register(void)
429	{	382	{
430	BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE);	383	BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE);
		384
		385	/* Precompute a bunch of the scaling factors that are used per-packet
		386	* based on SRTT of 100ms
		387	*/
		388
		389	beta_scale = 8*(BICTCP_BETA_SCALE+beta)/ 3 / (BICTCP_BETA_SCALE - beta);
		390
		391	cube_rtt_scale = (bic_scale << 3) / 10; /* 1024c/rtt /
		392
		393	/* calculate the "K" for (wmax-cwnd) = c/rtt * K^3
		394	* so K = cubic_root( (wmax-cwnd)*rtt/c )
		395	* the unit of K is bictcp_HZ=2^10, not HZ
		396	*
		397	* c = bic_scale >> 10
		398	* rtt = 100ms
		399	*
		400	* the following code has been designed and tested for
		401	* cwnd < 1 million packets
		402	* RTT < 100 seconds
		403	* HZ < 1,000,00 (corresponding to 10 nano-second)
		404	*/
		405
		406	/* 1/c * 2^2bictcp_HZ srtt */
		407	cube_factor = 1ull << (10+3BICTCP_HZ); / 2^40 */
		408
		409	/* divide by bic_scale and by constant Srtt (100ms) */
		410	do_div(cube_factor, bic_scale * 10);
		411
431	return tcp_register_congestion_control(&cubictcp);	412	return tcp_register_congestion_control(&cubictcp);
432	}	413	}
433		414