aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_cubic.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/tcp_cubic.c')
-rw-r--r--net/ipv4/tcp_cubic.c56
1 files changed, 42 insertions, 14 deletions
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 71d5f2f29fa6..f376b05cca81 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -39,7 +39,7 @@
39 39
40/* Number of delay samples for detecting the increase of delay */ 40/* Number of delay samples for detecting the increase of delay */
41#define HYSTART_MIN_SAMPLES 8 41#define HYSTART_MIN_SAMPLES 8
42#define HYSTART_DELAY_MIN (2U<<3) 42#define HYSTART_DELAY_MIN (4U<<3)
43#define HYSTART_DELAY_MAX (16U<<3) 43#define HYSTART_DELAY_MAX (16U<<3)
44#define HYSTART_DELAY_THRESH(x) clamp(x, HYSTART_DELAY_MIN, HYSTART_DELAY_MAX) 44#define HYSTART_DELAY_THRESH(x) clamp(x, HYSTART_DELAY_MIN, HYSTART_DELAY_MAX)
45 45
@@ -52,6 +52,7 @@ static int tcp_friendliness __read_mostly = 1;
52static int hystart __read_mostly = 1; 52static int hystart __read_mostly = 1;
53static int hystart_detect __read_mostly = HYSTART_ACK_TRAIN | HYSTART_DELAY; 53static int hystart_detect __read_mostly = HYSTART_ACK_TRAIN | HYSTART_DELAY;
54static int hystart_low_window __read_mostly = 16; 54static int hystart_low_window __read_mostly = 16;
55static int hystart_ack_delta __read_mostly = 2;
55 56
56static u32 cube_rtt_scale __read_mostly; 57static u32 cube_rtt_scale __read_mostly;
57static u32 beta_scale __read_mostly; 58static u32 beta_scale __read_mostly;
@@ -75,6 +76,8 @@ MODULE_PARM_DESC(hystart_detect, "hyrbrid slow start detection mechanisms"
75 " 1: packet-train 2: delay 3: both packet-train and delay"); 76 " 1: packet-train 2: delay 3: both packet-train and delay");
76module_param(hystart_low_window, int, 0644); 77module_param(hystart_low_window, int, 0644);
77MODULE_PARM_DESC(hystart_low_window, "lower bound cwnd for hybrid slow start"); 78MODULE_PARM_DESC(hystart_low_window, "lower bound cwnd for hybrid slow start");
79module_param(hystart_ack_delta, int, 0644);
80MODULE_PARM_DESC(hystart_ack_delta, "spacing between ack's indicating train (msecs)");
78 81
79/* BIC TCP Parameters */ 82/* BIC TCP Parameters */
80struct bictcp { 83struct bictcp {
@@ -85,17 +88,18 @@ struct bictcp {
85 u32 last_time; /* time when updated last_cwnd */ 88 u32 last_time; /* time when updated last_cwnd */
86 u32 bic_origin_point;/* origin point of bic function */ 89 u32 bic_origin_point;/* origin point of bic function */
87 u32 bic_K; /* time to origin point from the beginning of the current epoch */ 90 u32 bic_K; /* time to origin point from the beginning of the current epoch */
88 u32 delay_min; /* min delay */ 91 u32 delay_min; /* min delay (msec << 3) */
89 u32 epoch_start; /* beginning of an epoch */ 92 u32 epoch_start; /* beginning of an epoch */
90 u32 ack_cnt; /* number of acks */ 93 u32 ack_cnt; /* number of acks */
91 u32 tcp_cwnd; /* estimated tcp cwnd */ 94 u32 tcp_cwnd; /* estimated tcp cwnd */
92#define ACK_RATIO_SHIFT 4 95#define ACK_RATIO_SHIFT 4
96#define ACK_RATIO_LIMIT (32u << ACK_RATIO_SHIFT)
93 u16 delayed_ack; /* estimate the ratio of Packets/ACKs << 4 */ 97 u16 delayed_ack; /* estimate the ratio of Packets/ACKs << 4 */
94 u8 sample_cnt; /* number of samples to decide curr_rtt */ 98 u8 sample_cnt; /* number of samples to decide curr_rtt */
95 u8 found; /* the exit point is found? */ 99 u8 found; /* the exit point is found? */
96 u32 round_start; /* beginning of each round */ 100 u32 round_start; /* beginning of each round */
97 u32 end_seq; /* end_seq of the round */ 101 u32 end_seq; /* end_seq of the round */
98 u32 last_jiffies; /* last time when the ACK spacing is close */ 102 u32 last_ack; /* last time when the ACK spacing is close */
99 u32 curr_rtt; /* the minimum rtt of current round */ 103 u32 curr_rtt; /* the minimum rtt of current round */
100}; 104};
101 105
@@ -116,12 +120,21 @@ static inline void bictcp_reset(struct bictcp *ca)
116 ca->found = 0; 120 ca->found = 0;
117} 121}
118 122
123static inline u32 bictcp_clock(void)
124{
125#if HZ < 1000
126 return ktime_to_ms(ktime_get_real());
127#else
128 return jiffies_to_msecs(jiffies);
129#endif
130}
131
119static inline void bictcp_hystart_reset(struct sock *sk) 132static inline void bictcp_hystart_reset(struct sock *sk)
120{ 133{
121 struct tcp_sock *tp = tcp_sk(sk); 134 struct tcp_sock *tp = tcp_sk(sk);
122 struct bictcp *ca = inet_csk_ca(sk); 135 struct bictcp *ca = inet_csk_ca(sk);
123 136
124 ca->round_start = ca->last_jiffies = jiffies; 137 ca->round_start = ca->last_ack = bictcp_clock();
125 ca->end_seq = tp->snd_nxt; 138 ca->end_seq = tp->snd_nxt;
126 ca->curr_rtt = 0; 139 ca->curr_rtt = 0;
127 ca->sample_cnt = 0; 140 ca->sample_cnt = 0;
@@ -236,8 +249,8 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
236 */ 249 */
237 250
238 /* change the unit from HZ to bictcp_HZ */ 251 /* change the unit from HZ to bictcp_HZ */
239 t = ((tcp_time_stamp + (ca->delay_min>>3) - ca->epoch_start) 252 t = ((tcp_time_stamp + msecs_to_jiffies(ca->delay_min>>3)
240 << BICTCP_HZ) / HZ; 253 - ca->epoch_start) << BICTCP_HZ) / HZ;
241 254
242 if (t < ca->bic_K) /* t - K */ 255 if (t < ca->bic_K) /* t - K */
243 offs = ca->bic_K - t; 256 offs = ca->bic_K - t;
@@ -258,6 +271,13 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd)
258 ca->cnt = 100 * cwnd; /* very small increment*/ 271 ca->cnt = 100 * cwnd; /* very small increment*/
259 } 272 }
260 273
274 /*
275 * The initial growth of cubic function may be too conservative
276 * when the available bandwidth is still unknown.
277 */
278 if (ca->loss_cwnd == 0 && ca->cnt > 20)
279 ca->cnt = 20; /* increase cwnd 5% per RTT */
280
261 /* TCP Friendly */ 281 /* TCP Friendly */
262 if (tcp_friendliness) { 282 if (tcp_friendliness) {
263 u32 scale = beta_scale; 283 u32 scale = beta_scale;
@@ -339,12 +359,12 @@ static void hystart_update(struct sock *sk, u32 delay)
339 struct bictcp *ca = inet_csk_ca(sk); 359 struct bictcp *ca = inet_csk_ca(sk);
340 360
341 if (!(ca->found & hystart_detect)) { 361 if (!(ca->found & hystart_detect)) {
342 u32 curr_jiffies = jiffies; 362 u32 now = bictcp_clock();
343 363
344 /* first detection parameter - ack-train detection */ 364 /* first detection parameter - ack-train detection */
345 if (curr_jiffies - ca->last_jiffies <= msecs_to_jiffies(2)) { 365 if ((s32)(now - ca->last_ack) <= hystart_ack_delta) {
346 ca->last_jiffies = curr_jiffies; 366 ca->last_ack = now;
347 if (curr_jiffies - ca->round_start >= ca->delay_min>>4) 367 if ((s32)(now - ca->round_start) > ca->delay_min >> 4)
348 ca->found |= HYSTART_ACK_TRAIN; 368 ca->found |= HYSTART_ACK_TRAIN;
349 } 369 }
350 370
@@ -379,8 +399,12 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us)
379 u32 delay; 399 u32 delay;
380 400
381 if (icsk->icsk_ca_state == TCP_CA_Open) { 401 if (icsk->icsk_ca_state == TCP_CA_Open) {
382 cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT; 402 u32 ratio = ca->delayed_ack;
383 ca->delayed_ack += cnt; 403
404 ratio -= ca->delayed_ack >> ACK_RATIO_SHIFT;
405 ratio += cnt;
406
407 ca->delayed_ack = min(ratio, ACK_RATIO_LIMIT);
384 } 408 }
385 409
386 /* Some calls are for duplicates without timetamps */ 410 /* Some calls are for duplicates without timetamps */
@@ -391,7 +415,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us)
391 if ((s32)(tcp_time_stamp - ca->epoch_start) < HZ) 415 if ((s32)(tcp_time_stamp - ca->epoch_start) < HZ)
392 return; 416 return;
393 417
394 delay = usecs_to_jiffies(rtt_us) << 3; 418 delay = (rtt_us << 3) / USEC_PER_MSEC;
395 if (delay == 0) 419 if (delay == 0)
396 delay = 1; 420 delay = 1;
397 421
@@ -405,7 +429,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us)
405 hystart_update(sk, delay); 429 hystart_update(sk, delay);
406} 430}
407 431
408static struct tcp_congestion_ops cubictcp = { 432static struct tcp_congestion_ops cubictcp __read_mostly = {
409 .init = bictcp_init, 433 .init = bictcp_init,
410 .ssthresh = bictcp_recalc_ssthresh, 434 .ssthresh = bictcp_recalc_ssthresh,
411 .cong_avoid = bictcp_cong_avoid, 435 .cong_avoid = bictcp_cong_avoid,
@@ -447,6 +471,10 @@ static int __init cubictcp_register(void)
447 /* divide by bic_scale and by constant Srtt (100ms) */ 471 /* divide by bic_scale and by constant Srtt (100ms) */
448 do_div(cube_factor, bic_scale * 10); 472 do_div(cube_factor, bic_scale * 10);
449 473
474 /* hystart needs ms clock resolution */
475 if (hystart && HZ < 1000)
476 cubictcp.flags |= TCP_CONG_RTT_STAMP;
477
450 return tcp_register_congestion_control(&cubictcp); 478 return tcp_register_congestion_control(&cubictcp);
451} 479}
452 480