aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--net/ipv4/tcp_cubic.c133
1 files changed, 57 insertions, 76 deletions
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index bb5dc4bfb6b6..44fd40891acd 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -16,7 +16,7 @@
16#include <linux/mm.h> 16#include <linux/mm.h>
17#include <linux/module.h> 17#include <linux/module.h>
18#include <net/tcp.h> 18#include <net/tcp.h>
19 19#include <asm/div64.h>
20 20
21#define BICTCP_BETA_SCALE 1024 /* Scale factor beta calculation 21#define BICTCP_BETA_SCALE 1024 /* Scale factor beta calculation
22 * max_cwnd = snd_cwnd * beta 22 * max_cwnd = snd_cwnd * beta
@@ -34,15 +34,20 @@ static int initial_ssthresh = 100;
34static int bic_scale = 41; 34static int bic_scale = 41;
35static int tcp_friendliness = 1; 35static int tcp_friendliness = 1;
36 36
37static u32 cube_rtt_scale;
38static u32 beta_scale;
39static u64 cube_factor;
40
41/* Note parameters that are used for precomputing scale factors are read-only */
37module_param(fast_convergence, int, 0644); 42module_param(fast_convergence, int, 0644);
38MODULE_PARM_DESC(fast_convergence, "turn on/off fast convergence"); 43MODULE_PARM_DESC(fast_convergence, "turn on/off fast convergence");
39module_param(max_increment, int, 0644); 44module_param(max_increment, int, 0644);
40MODULE_PARM_DESC(max_increment, "Limit on increment allowed during binary search"); 45MODULE_PARM_DESC(max_increment, "Limit on increment allowed during binary search");
41module_param(beta, int, 0644); 46module_param(beta, int, 0444);
42MODULE_PARM_DESC(beta, "beta for multiplicative increase"); 47MODULE_PARM_DESC(beta, "beta for multiplicative increase");
43module_param(initial_ssthresh, int, 0644); 48module_param(initial_ssthresh, int, 0644);
44MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold"); 49MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold");
45module_param(bic_scale, int, 0644); 50module_param(bic_scale, int, 0444);
46MODULE_PARM_DESC(bic_scale, "scale (scaled by 1024) value for bic function (bic_scale/1024)"); 51MODULE_PARM_DESC(bic_scale, "scale (scaled by 1024) value for bic function (bic_scale/1024)");
47module_param(tcp_friendliness, int, 0644); 52module_param(tcp_friendliness, int, 0644);
48MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness"); 53MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness");
@@ -151,65 +156,13 @@ static u32 cubic_root(u64 x)
151 return (u32)end; 156 return (u32)end;
152} 157}
153 158
154static inline u32 bictcp_K(u32 dist, u32 srtt)
155{
156 u64 d64;
157 u32 d32;
158 u32 count;
159 u32 result;
160
161 /* calculate the "K" for (wmax-cwnd) = c/rtt * K^3
162 so K = cubic_root( (wmax-cwnd)*rtt/c )
163 the unit of K is bictcp_HZ=2^10, not HZ
164
165 c = bic_scale >> 10
166 rtt = (tp->srtt >> 3 ) / HZ
167
168 the following code has been designed and tested for
169 cwnd < 1 million packets
170 RTT < 100 seconds
171 HZ < 1,000,00 (corresponding to 10 nano-second)
172
173 */
174
175 /* 1/c * 2^2*bictcp_HZ */
176 d32 = (1 << (10+2*BICTCP_HZ)) / bic_scale;
177 d64 = (__u64)d32;
178
179 /* srtt * 2^count / HZ
180 1) to get a better accuracy of the following d32,
181 the larger the "count", the better the accuracy
182 2) and avoid overflow of the following d64
183 the larger the "count", the high possibility of overflow
184 3) so find a "count" between bictcp_hz-3 and bictcp_hz
185 "count" may be less than bictcp_HZ,
186 then d64 becomes 0. that is OK
187 */
188 d32 = srtt;
189 count = 0;
190 while (((d32 & 0x80000000)==0) && (count < BICTCP_HZ)){
191 d32 = d32 << 1;
192 count++;
193 }
194 d32 = d32 / HZ;
195
196 /* (wmax-cwnd) * (srtt>>3 / HZ) / c * 2^(3*bictcp_HZ) */
197 d64 = (d64 * dist * d32) >> (count+3-BICTCP_HZ);
198
199 /* cubic root */
200 d64 = cubic_root(d64);
201
202 result = (u32)d64;
203 return result;
204}
205
206/* 159/*
207 * Compute congestion window to use. 160 * Compute congestion window to use.
208 */ 161 */
209static inline void bictcp_update(struct bictcp *ca, u32 cwnd) 162static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
210{ 163{
211 u64 d64; 164 u64 offs;
212 u32 d32, t, srtt, bic_target, min_cnt, max_cnt; 165 u32 delta, t, bic_target, min_cnt, max_cnt;
213 166
214 ca->ack_cnt++; /* count the number of ACKs */ 167 ca->ack_cnt++; /* count the number of ACKs */
215 168
@@ -220,8 +173,6 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
220 ca->last_cwnd = cwnd; 173 ca->last_cwnd = cwnd;
221 ca->last_time = tcp_time_stamp; 174 ca->last_time = tcp_time_stamp;
222 175
223 srtt = (HZ << 3)/10; /* use real time-based growth function */
224
225 if (ca->epoch_start == 0) { 176 if (ca->epoch_start == 0) {
226 ca->epoch_start = tcp_time_stamp; /* record the beginning of an epoch */ 177 ca->epoch_start = tcp_time_stamp; /* record the beginning of an epoch */
227 ca->ack_cnt = 1; /* start counting */ 178 ca->ack_cnt = 1; /* start counting */
@@ -231,7 +182,11 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
231 ca->bic_K = 0; 182 ca->bic_K = 0;
232 ca->bic_origin_point = cwnd; 183 ca->bic_origin_point = cwnd;
233 } else { 184 } else {
234 ca->bic_K = bictcp_K(ca->last_max_cwnd-cwnd, srtt); 185 /* Compute new K based on
186 * (wmax-cwnd) * (srtt>>3 / HZ) / c * 2^(3*bictcp_HZ)
187 */
188 ca->bic_K = cubic_root(cube_factor
189 * (ca->last_max_cwnd - cwnd));
235 ca->bic_origin_point = ca->last_max_cwnd; 190 ca->bic_origin_point = ca->last_max_cwnd;
236 } 191 }
237 } 192 }
@@ -239,9 +194,9 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
239 /* cubic function - calc*/ 194 /* cubic function - calc*/
240 /* calculate c * time^3 / rtt, 195 /* calculate c * time^3 / rtt,
241 * while considering overflow in calculation of time^3 196 * while considering overflow in calculation of time^3
242 * (so time^3 is done by using d64) 197 * (so time^3 is done by using 64 bit)
243 * and without the support of division of 64bit numbers 198 * and without the support of division of 64bit numbers
244 * (so all divisions are done by using d32) 199 * (so all divisions are done by using 32 bit)
245 * also NOTE the unit of those veriables 200 * also NOTE the unit of those veriables
246 * time = (t - K) / 2^bictcp_HZ 201 * time = (t - K) / 2^bictcp_HZ
247 * c = bic_scale >> 10 202 * c = bic_scale >> 10
@@ -255,18 +210,16 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
255 << BICTCP_HZ) / HZ; 210 << BICTCP_HZ) / HZ;
256 211
257 if (t < ca->bic_K) /* t - K */ 212 if (t < ca->bic_K) /* t - K */
258 d32 = ca->bic_K - t; 213 offs = ca->bic_K - t;
259 else 214 else
260 d32 = t - ca->bic_K; 215 offs = t - ca->bic_K;
261 216
262 d64 = (u64)d32; 217 /* c/rtt * (t-K)^3 */
263 d32 = (bic_scale << 3) * HZ / srtt; /* 1024*c/rtt */ 218 delta = (cube_rtt_scale * offs * offs * offs) >> (10+3*BICTCP_HZ);
264 d64 = (d32 * d64 * d64 * d64) >> (10+3*BICTCP_HZ); /* c/rtt * (t-K)^3 */
265 d32 = (u32)d64;
266 if (t < ca->bic_K) /* below origin*/ 219 if (t < ca->bic_K) /* below origin*/
267 bic_target = ca->bic_origin_point - d32; 220 bic_target = ca->bic_origin_point - delta;
268 else /* above origin*/ 221 else /* above origin*/
269 bic_target = ca->bic_origin_point + d32; 222 bic_target = ca->bic_origin_point + delta;
270 223
271 /* cubic function - calc bictcp_cnt*/ 224 /* cubic function - calc bictcp_cnt*/
272 if (bic_target > cwnd) { 225 if (bic_target > cwnd) {
@@ -288,16 +241,16 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
288 241
289 /* TCP Friendly */ 242 /* TCP Friendly */
290 if (tcp_friendliness) { 243 if (tcp_friendliness) {
291 u32 scale = 8*(BICTCP_BETA_SCALE+beta)/3/(BICTCP_BETA_SCALE-beta); 244 u32 scale = beta_scale;
292 d32 = (cwnd * scale) >> 3; 245 delta = (cwnd * scale) >> 3;
293 while (ca->ack_cnt > d32) { /* update tcp cwnd */ 246 while (ca->ack_cnt > delta) { /* update tcp cwnd */
294 ca->ack_cnt -= d32; 247 ca->ack_cnt -= delta;
295 ca->tcp_cwnd++; 248 ca->tcp_cwnd++;
296 } 249 }
297 250
298 if (ca->tcp_cwnd > cwnd){ /* if bic is slower than tcp */ 251 if (ca->tcp_cwnd > cwnd){ /* if bic is slower than tcp */
299 d32 = ca->tcp_cwnd - cwnd; 252 delta = ca->tcp_cwnd - cwnd;
300 max_cnt = cwnd / d32; 253 max_cnt = cwnd / delta;
301 if (ca->cnt > max_cnt) 254 if (ca->cnt > max_cnt)
302 ca->cnt = max_cnt; 255 ca->cnt = max_cnt;
303 } 256 }
@@ -428,6 +381,34 @@ static struct tcp_congestion_ops cubictcp = {
428static int __init cubictcp_register(void) 381static int __init cubictcp_register(void)
429{ 382{
430 BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE); 383 BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE);
384
385 /* Precompute a bunch of the scaling factors that are used per-packet
386 * based on SRTT of 100ms
387 */
388
389 beta_scale = 8*(BICTCP_BETA_SCALE+beta)/ 3 / (BICTCP_BETA_SCALE - beta);
390
391 cube_rtt_scale = (bic_scale << 3) / 10; /* 1024*c/rtt */
392
393 /* calculate the "K" for (wmax-cwnd) = c/rtt * K^3
394 * so K = cubic_root( (wmax-cwnd)*rtt/c )
395 * the unit of K is bictcp_HZ=2^10, not HZ
396 *
397 * c = bic_scale >> 10
398 * rtt = 100ms
399 *
400 * the following code has been designed and tested for
401 * cwnd < 1 million packets
402 * RTT < 100 seconds
403 * HZ < 1,000,00 (corresponding to 10 nano-second)
404 */
405
406 /* 1/c * 2^2*bictcp_HZ * srtt */
407 cube_factor = 1ull << (10+3*BICTCP_HZ); /* 2^40 */
408
409 /* divide by bic_scale and by constant Srtt (100ms) */
410 do_div(cube_factor, bic_scale * 10);
411
431 return tcp_register_congestion_control(&cubictcp); 412 return tcp_register_congestion_control(&cubictcp);
432} 413}
433 414