diff options
-rw-r--r-- | net/ipv4/tcp_cubic.c | 133 |
1 files changed, 57 insertions, 76 deletions
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index bb5dc4bfb6b6..44fd40891acd 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c | |||
@@ -16,7 +16,7 @@ | |||
16 | #include <linux/mm.h> | 16 | #include <linux/mm.h> |
17 | #include <linux/module.h> | 17 | #include <linux/module.h> |
18 | #include <net/tcp.h> | 18 | #include <net/tcp.h> |
19 | 19 | #include <asm/div64.h> | |
20 | 20 | ||
21 | #define BICTCP_BETA_SCALE 1024 /* Scale factor beta calculation | 21 | #define BICTCP_BETA_SCALE 1024 /* Scale factor beta calculation |
22 | * max_cwnd = snd_cwnd * beta | 22 | * max_cwnd = snd_cwnd * beta |
@@ -34,15 +34,20 @@ static int initial_ssthresh = 100; | |||
34 | static int bic_scale = 41; | 34 | static int bic_scale = 41; |
35 | static int tcp_friendliness = 1; | 35 | static int tcp_friendliness = 1; |
36 | 36 | ||
37 | static u32 cube_rtt_scale; | ||
38 | static u32 beta_scale; | ||
39 | static u64 cube_factor; | ||
40 | |||
41 | /* Note parameters that are used for precomputing scale factors are read-only */ | ||
37 | module_param(fast_convergence, int, 0644); | 42 | module_param(fast_convergence, int, 0644); |
38 | MODULE_PARM_DESC(fast_convergence, "turn on/off fast convergence"); | 43 | MODULE_PARM_DESC(fast_convergence, "turn on/off fast convergence"); |
39 | module_param(max_increment, int, 0644); | 44 | module_param(max_increment, int, 0644); |
40 | MODULE_PARM_DESC(max_increment, "Limit on increment allowed during binary search"); | 45 | MODULE_PARM_DESC(max_increment, "Limit on increment allowed during binary search"); |
41 | module_param(beta, int, 0644); | 46 | module_param(beta, int, 0444); |
42 | MODULE_PARM_DESC(beta, "beta for multiplicative increase"); | 47 | MODULE_PARM_DESC(beta, "beta for multiplicative increase"); |
43 | module_param(initial_ssthresh, int, 0644); | 48 | module_param(initial_ssthresh, int, 0644); |
44 | MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold"); | 49 | MODULE_PARM_DESC(initial_ssthresh, "initial value of slow start threshold"); |
45 | module_param(bic_scale, int, 0644); | 50 | module_param(bic_scale, int, 0444); |
46 | MODULE_PARM_DESC(bic_scale, "scale (scaled by 1024) value for bic function (bic_scale/1024)"); | 51 | MODULE_PARM_DESC(bic_scale, "scale (scaled by 1024) value for bic function (bic_scale/1024)"); |
47 | module_param(tcp_friendliness, int, 0644); | 52 | module_param(tcp_friendliness, int, 0644); |
48 | MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness"); | 53 | MODULE_PARM_DESC(tcp_friendliness, "turn on/off tcp friendliness"); |
@@ -151,65 +156,13 @@ static u32 cubic_root(u64 x) | |||
151 | return (u32)end; | 156 | return (u32)end; |
152 | } | 157 | } |
153 | 158 | ||
154 | static inline u32 bictcp_K(u32 dist, u32 srtt) | ||
155 | { | ||
156 | u64 d64; | ||
157 | u32 d32; | ||
158 | u32 count; | ||
159 | u32 result; | ||
160 | |||
161 | /* calculate the "K" for (wmax-cwnd) = c/rtt * K^3 | ||
162 | so K = cubic_root( (wmax-cwnd)*rtt/c ) | ||
163 | the unit of K is bictcp_HZ=2^10, not HZ | ||
164 | |||
165 | c = bic_scale >> 10 | ||
166 | rtt = (tp->srtt >> 3 ) / HZ | ||
167 | |||
168 | the following code has been designed and tested for | ||
169 | cwnd < 1 million packets | ||
170 | RTT < 100 seconds | ||
171 | HZ < 1,000,00 (corresponding to 10 nano-second) | ||
172 | |||
173 | */ | ||
174 | |||
175 | /* 1/c * 2^2*bictcp_HZ */ | ||
176 | d32 = (1 << (10+2*BICTCP_HZ)) / bic_scale; | ||
177 | d64 = (__u64)d32; | ||
178 | |||
179 | /* srtt * 2^count / HZ | ||
180 | 1) to get a better accuracy of the following d32, | ||
181 | the larger the "count", the better the accuracy | ||
182 | 2) and avoid overflow of the following d64 | ||
183 | the larger the "count", the high possibility of overflow | ||
184 | 3) so find a "count" between bictcp_hz-3 and bictcp_hz | ||
185 | "count" may be less than bictcp_HZ, | ||
186 | then d64 becomes 0. that is OK | ||
187 | */ | ||
188 | d32 = srtt; | ||
189 | count = 0; | ||
190 | while (((d32 & 0x80000000)==0) && (count < BICTCP_HZ)){ | ||
191 | d32 = d32 << 1; | ||
192 | count++; | ||
193 | } | ||
194 | d32 = d32 / HZ; | ||
195 | |||
196 | /* (wmax-cwnd) * (srtt>>3 / HZ) / c * 2^(3*bictcp_HZ) */ | ||
197 | d64 = (d64 * dist * d32) >> (count+3-BICTCP_HZ); | ||
198 | |||
199 | /* cubic root */ | ||
200 | d64 = cubic_root(d64); | ||
201 | |||
202 | result = (u32)d64; | ||
203 | return result; | ||
204 | } | ||
205 | |||
206 | /* | 159 | /* |
207 | * Compute congestion window to use. | 160 | * Compute congestion window to use. |
208 | */ | 161 | */ |
209 | static inline void bictcp_update(struct bictcp *ca, u32 cwnd) | 162 | static inline void bictcp_update(struct bictcp *ca, u32 cwnd) |
210 | { | 163 | { |
211 | u64 d64; | 164 | u64 offs; |
212 | u32 d32, t, srtt, bic_target, min_cnt, max_cnt; | 165 | u32 delta, t, bic_target, min_cnt, max_cnt; |
213 | 166 | ||
214 | ca->ack_cnt++; /* count the number of ACKs */ | 167 | ca->ack_cnt++; /* count the number of ACKs */ |
215 | 168 | ||
@@ -220,8 +173,6 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) | |||
220 | ca->last_cwnd = cwnd; | 173 | ca->last_cwnd = cwnd; |
221 | ca->last_time = tcp_time_stamp; | 174 | ca->last_time = tcp_time_stamp; |
222 | 175 | ||
223 | srtt = (HZ << 3)/10; /* use real time-based growth function */ | ||
224 | |||
225 | if (ca->epoch_start == 0) { | 176 | if (ca->epoch_start == 0) { |
226 | ca->epoch_start = tcp_time_stamp; /* record the beginning of an epoch */ | 177 | ca->epoch_start = tcp_time_stamp; /* record the beginning of an epoch */ |
227 | ca->ack_cnt = 1; /* start counting */ | 178 | ca->ack_cnt = 1; /* start counting */ |
@@ -231,7 +182,11 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) | |||
231 | ca->bic_K = 0; | 182 | ca->bic_K = 0; |
232 | ca->bic_origin_point = cwnd; | 183 | ca->bic_origin_point = cwnd; |
233 | } else { | 184 | } else { |
234 | ca->bic_K = bictcp_K(ca->last_max_cwnd-cwnd, srtt); | 185 | /* Compute new K based on |
186 | * (wmax-cwnd) * (srtt>>3 / HZ) / c * 2^(3*bictcp_HZ) | ||
187 | */ | ||
188 | ca->bic_K = cubic_root(cube_factor | ||
189 | * (ca->last_max_cwnd - cwnd)); | ||
235 | ca->bic_origin_point = ca->last_max_cwnd; | 190 | ca->bic_origin_point = ca->last_max_cwnd; |
236 | } | 191 | } |
237 | } | 192 | } |
@@ -239,9 +194,9 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) | |||
239 | /* cubic function - calc*/ | 194 | /* cubic function - calc*/ |
240 | /* calculate c * time^3 / rtt, | 195 | /* calculate c * time^3 / rtt, |
241 | * while considering overflow in calculation of time^3 | 196 | * while considering overflow in calculation of time^3 |
242 | * (so time^3 is done by using d64) | 197 | * (so time^3 is done by using 64 bit) |
243 | * and without the support of division of 64bit numbers | 198 | * and without the support of division of 64bit numbers |
244 | * (so all divisions are done by using d32) | 199 | * (so all divisions are done by using 32 bit) |
245 | * also NOTE the unit of those veriables | 200 | * also NOTE the unit of those veriables |
246 | * time = (t - K) / 2^bictcp_HZ | 201 | * time = (t - K) / 2^bictcp_HZ |
247 | * c = bic_scale >> 10 | 202 | * c = bic_scale >> 10 |
@@ -255,18 +210,16 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) | |||
255 | << BICTCP_HZ) / HZ; | 210 | << BICTCP_HZ) / HZ; |
256 | 211 | ||
257 | if (t < ca->bic_K) /* t - K */ | 212 | if (t < ca->bic_K) /* t - K */ |
258 | d32 = ca->bic_K - t; | 213 | offs = ca->bic_K - t; |
259 | else | 214 | else |
260 | d32 = t - ca->bic_K; | 215 | offs = t - ca->bic_K; |
261 | 216 | ||
262 | d64 = (u64)d32; | 217 | /* c/rtt * (t-K)^3 */ |
263 | d32 = (bic_scale << 3) * HZ / srtt; /* 1024*c/rtt */ | 218 | delta = (cube_rtt_scale * offs * offs * offs) >> (10+3*BICTCP_HZ); |
264 | d64 = (d32 * d64 * d64 * d64) >> (10+3*BICTCP_HZ); /* c/rtt * (t-K)^3 */ | ||
265 | d32 = (u32)d64; | ||
266 | if (t < ca->bic_K) /* below origin*/ | 219 | if (t < ca->bic_K) /* below origin*/ |
267 | bic_target = ca->bic_origin_point - d32; | 220 | bic_target = ca->bic_origin_point - delta; |
268 | else /* above origin*/ | 221 | else /* above origin*/ |
269 | bic_target = ca->bic_origin_point + d32; | 222 | bic_target = ca->bic_origin_point + delta; |
270 | 223 | ||
271 | /* cubic function - calc bictcp_cnt*/ | 224 | /* cubic function - calc bictcp_cnt*/ |
272 | if (bic_target > cwnd) { | 225 | if (bic_target > cwnd) { |
@@ -288,16 +241,16 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) | |||
288 | 241 | ||
289 | /* TCP Friendly */ | 242 | /* TCP Friendly */ |
290 | if (tcp_friendliness) { | 243 | if (tcp_friendliness) { |
291 | u32 scale = 8*(BICTCP_BETA_SCALE+beta)/3/(BICTCP_BETA_SCALE-beta); | 244 | u32 scale = beta_scale; |
292 | d32 = (cwnd * scale) >> 3; | 245 | delta = (cwnd * scale) >> 3; |
293 | while (ca->ack_cnt > d32) { /* update tcp cwnd */ | 246 | while (ca->ack_cnt > delta) { /* update tcp cwnd */ |
294 | ca->ack_cnt -= d32; | 247 | ca->ack_cnt -= delta; |
295 | ca->tcp_cwnd++; | 248 | ca->tcp_cwnd++; |
296 | } | 249 | } |
297 | 250 | ||
298 | if (ca->tcp_cwnd > cwnd){ /* if bic is slower than tcp */ | 251 | if (ca->tcp_cwnd > cwnd){ /* if bic is slower than tcp */ |
299 | d32 = ca->tcp_cwnd - cwnd; | 252 | delta = ca->tcp_cwnd - cwnd; |
300 | max_cnt = cwnd / d32; | 253 | max_cnt = cwnd / delta; |
301 | if (ca->cnt > max_cnt) | 254 | if (ca->cnt > max_cnt) |
302 | ca->cnt = max_cnt; | 255 | ca->cnt = max_cnt; |
303 | } | 256 | } |
@@ -428,6 +381,34 @@ static struct tcp_congestion_ops cubictcp = { | |||
428 | static int __init cubictcp_register(void) | 381 | static int __init cubictcp_register(void) |
429 | { | 382 | { |
430 | BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE); | 383 | BUG_ON(sizeof(struct bictcp) > ICSK_CA_PRIV_SIZE); |
384 | |||
385 | /* Precompute a bunch of the scaling factors that are used per-packet | ||
386 | * based on SRTT of 100ms | ||
387 | */ | ||
388 | |||
389 | beta_scale = 8*(BICTCP_BETA_SCALE+beta)/ 3 / (BICTCP_BETA_SCALE - beta); | ||
390 | |||
391 | cube_rtt_scale = (bic_scale << 3) / 10; /* 1024*c/rtt */ | ||
392 | |||
393 | /* calculate the "K" for (wmax-cwnd) = c/rtt * K^3 | ||
394 | * so K = cubic_root( (wmax-cwnd)*rtt/c ) | ||
395 | * the unit of K is bictcp_HZ=2^10, not HZ | ||
396 | * | ||
397 | * c = bic_scale >> 10 | ||
398 | * rtt = 100ms | ||
399 | * | ||
400 | * the following code has been designed and tested for | ||
401 | * cwnd < 1 million packets | ||
402 | * RTT < 100 seconds | ||
403 | * HZ < 1,000,00 (corresponding to 10 nano-second) | ||
404 | */ | ||
405 | |||
406 | /* 1/c * 2^2*bictcp_HZ * srtt */ | ||
407 | cube_factor = 1ull << (10+3*BICTCP_HZ); /* 2^40 */ | ||
408 | |||
409 | /* divide by bic_scale and by constant Srtt (100ms) */ | ||
410 | do_div(cube_factor, bic_scale * 10); | ||
411 | |||
431 | return tcp_register_congestion_control(&cubictcp); | 412 | return tcp_register_congestion_control(&cubictcp); |
432 | } | 413 | } |
433 | 414 | ||