diff options
Diffstat (limited to 'net/ipv4/tcp_cubic.c')
-rw-r--r-- | net/ipv4/tcp_cubic.c | 56 |
1 files changed, 42 insertions, 14 deletions
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 71d5f2f29fa6..f376b05cca81 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c | |||
@@ -39,7 +39,7 @@ | |||
39 | 39 | ||
40 | /* Number of delay samples for detecting the increase of delay */ | 40 | /* Number of delay samples for detecting the increase of delay */ |
41 | #define HYSTART_MIN_SAMPLES 8 | 41 | #define HYSTART_MIN_SAMPLES 8 |
42 | #define HYSTART_DELAY_MIN (2U<<3) | 42 | #define HYSTART_DELAY_MIN (4U<<3) |
43 | #define HYSTART_DELAY_MAX (16U<<3) | 43 | #define HYSTART_DELAY_MAX (16U<<3) |
44 | #define HYSTART_DELAY_THRESH(x) clamp(x, HYSTART_DELAY_MIN, HYSTART_DELAY_MAX) | 44 | #define HYSTART_DELAY_THRESH(x) clamp(x, HYSTART_DELAY_MIN, HYSTART_DELAY_MAX) |
45 | 45 | ||
@@ -52,6 +52,7 @@ static int tcp_friendliness __read_mostly = 1; | |||
52 | static int hystart __read_mostly = 1; | 52 | static int hystart __read_mostly = 1; |
53 | static int hystart_detect __read_mostly = HYSTART_ACK_TRAIN | HYSTART_DELAY; | 53 | static int hystart_detect __read_mostly = HYSTART_ACK_TRAIN | HYSTART_DELAY; |
54 | static int hystart_low_window __read_mostly = 16; | 54 | static int hystart_low_window __read_mostly = 16; |
55 | static int hystart_ack_delta __read_mostly = 2; | ||
55 | 56 | ||
56 | static u32 cube_rtt_scale __read_mostly; | 57 | static u32 cube_rtt_scale __read_mostly; |
57 | static u32 beta_scale __read_mostly; | 58 | static u32 beta_scale __read_mostly; |
@@ -75,6 +76,8 @@ MODULE_PARM_DESC(hystart_detect, "hyrbrid slow start detection mechanisms" | |||
75 | " 1: packet-train 2: delay 3: both packet-train and delay"); | 76 | " 1: packet-train 2: delay 3: both packet-train and delay"); |
76 | module_param(hystart_low_window, int, 0644); | 77 | module_param(hystart_low_window, int, 0644); |
77 | MODULE_PARM_DESC(hystart_low_window, "lower bound cwnd for hybrid slow start"); | 78 | MODULE_PARM_DESC(hystart_low_window, "lower bound cwnd for hybrid slow start"); |
79 | module_param(hystart_ack_delta, int, 0644); | ||
80 | MODULE_PARM_DESC(hystart_ack_delta, "spacing between ack's indicating train (msecs)"); | ||
78 | 81 | ||
79 | /* BIC TCP Parameters */ | 82 | /* BIC TCP Parameters */ |
80 | struct bictcp { | 83 | struct bictcp { |
@@ -85,17 +88,18 @@ struct bictcp { | |||
85 | u32 last_time; /* time when updated last_cwnd */ | 88 | u32 last_time; /* time when updated last_cwnd */ |
86 | u32 bic_origin_point;/* origin point of bic function */ | 89 | u32 bic_origin_point;/* origin point of bic function */ |
87 | u32 bic_K; /* time to origin point from the beginning of the current epoch */ | 90 | u32 bic_K; /* time to origin point from the beginning of the current epoch */ |
88 | u32 delay_min; /* min delay */ | 91 | u32 delay_min; /* min delay (msec << 3) */ |
89 | u32 epoch_start; /* beginning of an epoch */ | 92 | u32 epoch_start; /* beginning of an epoch */ |
90 | u32 ack_cnt; /* number of acks */ | 93 | u32 ack_cnt; /* number of acks */ |
91 | u32 tcp_cwnd; /* estimated tcp cwnd */ | 94 | u32 tcp_cwnd; /* estimated tcp cwnd */ |
92 | #define ACK_RATIO_SHIFT 4 | 95 | #define ACK_RATIO_SHIFT 4 |
96 | #define ACK_RATIO_LIMIT (32u << ACK_RATIO_SHIFT) | ||
93 | u16 delayed_ack; /* estimate the ratio of Packets/ACKs << 4 */ | 97 | u16 delayed_ack; /* estimate the ratio of Packets/ACKs << 4 */ |
94 | u8 sample_cnt; /* number of samples to decide curr_rtt */ | 98 | u8 sample_cnt; /* number of samples to decide curr_rtt */ |
95 | u8 found; /* the exit point is found? */ | 99 | u8 found; /* the exit point is found? */ |
96 | u32 round_start; /* beginning of each round */ | 100 | u32 round_start; /* beginning of each round */ |
97 | u32 end_seq; /* end_seq of the round */ | 101 | u32 end_seq; /* end_seq of the round */ |
98 | u32 last_jiffies; /* last time when the ACK spacing is close */ | 102 | u32 last_ack; /* last time when the ACK spacing is close */ |
99 | u32 curr_rtt; /* the minimum rtt of current round */ | 103 | u32 curr_rtt; /* the minimum rtt of current round */ |
100 | }; | 104 | }; |
101 | 105 | ||
@@ -116,12 +120,21 @@ static inline void bictcp_reset(struct bictcp *ca) | |||
116 | ca->found = 0; | 120 | ca->found = 0; |
117 | } | 121 | } |
118 | 122 | ||
123 | static inline u32 bictcp_clock(void) | ||
124 | { | ||
125 | #if HZ < 1000 | ||
126 | return ktime_to_ms(ktime_get_real()); | ||
127 | #else | ||
128 | return jiffies_to_msecs(jiffies); | ||
129 | #endif | ||
130 | } | ||
131 | |||
119 | static inline void bictcp_hystart_reset(struct sock *sk) | 132 | static inline void bictcp_hystart_reset(struct sock *sk) |
120 | { | 133 | { |
121 | struct tcp_sock *tp = tcp_sk(sk); | 134 | struct tcp_sock *tp = tcp_sk(sk); |
122 | struct bictcp *ca = inet_csk_ca(sk); | 135 | struct bictcp *ca = inet_csk_ca(sk); |
123 | 136 | ||
124 | ca->round_start = ca->last_jiffies = jiffies; | 137 | ca->round_start = ca->last_ack = bictcp_clock(); |
125 | ca->end_seq = tp->snd_nxt; | 138 | ca->end_seq = tp->snd_nxt; |
126 | ca->curr_rtt = 0; | 139 | ca->curr_rtt = 0; |
127 | ca->sample_cnt = 0; | 140 | ca->sample_cnt = 0; |
@@ -236,8 +249,8 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) | |||
236 | */ | 249 | */ |
237 | 250 | ||
238 | /* change the unit from HZ to bictcp_HZ */ | 251 | /* change the unit from HZ to bictcp_HZ */ |
239 | t = ((tcp_time_stamp + (ca->delay_min>>3) - ca->epoch_start) | 252 | t = ((tcp_time_stamp + msecs_to_jiffies(ca->delay_min>>3) |
240 | << BICTCP_HZ) / HZ; | 253 | - ca->epoch_start) << BICTCP_HZ) / HZ; |
241 | 254 | ||
242 | if (t < ca->bic_K) /* t - K */ | 255 | if (t < ca->bic_K) /* t - K */ |
243 | offs = ca->bic_K - t; | 256 | offs = ca->bic_K - t; |
@@ -258,6 +271,13 @@ static inline void bictcp_update(struct bictcp *ca, u32 cwnd) | |||
258 | ca->cnt = 100 * cwnd; /* very small increment*/ | 271 | ca->cnt = 100 * cwnd; /* very small increment*/ |
259 | } | 272 | } |
260 | 273 | ||
274 | /* | ||
275 | * The initial growth of cubic function may be too conservative | ||
276 | * when the available bandwidth is still unknown. | ||
277 | */ | ||
278 | if (ca->loss_cwnd == 0 && ca->cnt > 20) | ||
279 | ca->cnt = 20; /* increase cwnd 5% per RTT */ | ||
280 | |||
261 | /* TCP Friendly */ | 281 | /* TCP Friendly */ |
262 | if (tcp_friendliness) { | 282 | if (tcp_friendliness) { |
263 | u32 scale = beta_scale; | 283 | u32 scale = beta_scale; |
@@ -339,12 +359,12 @@ static void hystart_update(struct sock *sk, u32 delay) | |||
339 | struct bictcp *ca = inet_csk_ca(sk); | 359 | struct bictcp *ca = inet_csk_ca(sk); |
340 | 360 | ||
341 | if (!(ca->found & hystart_detect)) { | 361 | if (!(ca->found & hystart_detect)) { |
342 | u32 curr_jiffies = jiffies; | 362 | u32 now = bictcp_clock(); |
343 | 363 | ||
344 | /* first detection parameter - ack-train detection */ | 364 | /* first detection parameter - ack-train detection */ |
345 | if (curr_jiffies - ca->last_jiffies <= msecs_to_jiffies(2)) { | 365 | if ((s32)(now - ca->last_ack) <= hystart_ack_delta) { |
346 | ca->last_jiffies = curr_jiffies; | 366 | ca->last_ack = now; |
347 | if (curr_jiffies - ca->round_start >= ca->delay_min>>4) | 367 | if ((s32)(now - ca->round_start) > ca->delay_min >> 4) |
348 | ca->found |= HYSTART_ACK_TRAIN; | 368 | ca->found |= HYSTART_ACK_TRAIN; |
349 | } | 369 | } |
350 | 370 | ||
@@ -379,8 +399,12 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us) | |||
379 | u32 delay; | 399 | u32 delay; |
380 | 400 | ||
381 | if (icsk->icsk_ca_state == TCP_CA_Open) { | 401 | if (icsk->icsk_ca_state == TCP_CA_Open) { |
382 | cnt -= ca->delayed_ack >> ACK_RATIO_SHIFT; | 402 | u32 ratio = ca->delayed_ack; |
383 | ca->delayed_ack += cnt; | 403 | |
404 | ratio -= ca->delayed_ack >> ACK_RATIO_SHIFT; | ||
405 | ratio += cnt; | ||
406 | |||
407 | ca->delayed_ack = min(ratio, ACK_RATIO_LIMIT); | ||
384 | } | 408 | } |
385 | 409 | ||
386 | /* Some calls are for duplicates without timetamps */ | 410 | /* Some calls are for duplicates without timetamps */ |
@@ -391,7 +415,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us) | |||
391 | if ((s32)(tcp_time_stamp - ca->epoch_start) < HZ) | 415 | if ((s32)(tcp_time_stamp - ca->epoch_start) < HZ) |
392 | return; | 416 | return; |
393 | 417 | ||
394 | delay = usecs_to_jiffies(rtt_us) << 3; | 418 | delay = (rtt_us << 3) / USEC_PER_MSEC; |
395 | if (delay == 0) | 419 | if (delay == 0) |
396 | delay = 1; | 420 | delay = 1; |
397 | 421 | ||
@@ -405,7 +429,7 @@ static void bictcp_acked(struct sock *sk, u32 cnt, s32 rtt_us) | |||
405 | hystart_update(sk, delay); | 429 | hystart_update(sk, delay); |
406 | } | 430 | } |
407 | 431 | ||
408 | static struct tcp_congestion_ops cubictcp = { | 432 | static struct tcp_congestion_ops cubictcp __read_mostly = { |
409 | .init = bictcp_init, | 433 | .init = bictcp_init, |
410 | .ssthresh = bictcp_recalc_ssthresh, | 434 | .ssthresh = bictcp_recalc_ssthresh, |
411 | .cong_avoid = bictcp_cong_avoid, | 435 | .cong_avoid = bictcp_cong_avoid, |
@@ -447,6 +471,10 @@ static int __init cubictcp_register(void) | |||
447 | /* divide by bic_scale and by constant Srtt (100ms) */ | 471 | /* divide by bic_scale and by constant Srtt (100ms) */ |
448 | do_div(cube_factor, bic_scale * 10); | 472 | do_div(cube_factor, bic_scale * 10); |
449 | 473 | ||
474 | /* hystart needs ms clock resolution */ | ||
475 | if (hystart && HZ < 1000) | ||
476 | cubictcp.flags |= TCP_CONG_RTT_STAMP; | ||
477 | |||
450 | return tcp_register_congestion_control(&cubictcp); | 478 | return tcp_register_congestion_control(&cubictcp); |
451 | } | 479 | } |
452 | 480 | ||