diff options
author | Yuchung Cheng <ycheng@google.com> | 2012-05-02 09:30:04 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-05-02 20:56:10 -0400 |
commit | 750ea2bafa55aaed208b2583470ecd7122225634 (patch) | |
tree | 7656d7697566b0cecc7fbbdd8dbae288bca6d7e3 | |
parent | eed530b6c67624db3f2cf477bac7c4d005d8f7ba (diff) |
tcp: early retransmit: delayed fast retransmit
Implementing the advanced early retransmit (sysctl_tcp_early_retrans==2).
Delays the fast retransmit by an interval of RTT/4. We borrow the
RTO timer to implement the delay. If we receive another ACK or send
a new packet, the timer is cancelled and restored to original RTO
value offset by time elapsed. When the delayed-ER timer fires,
we enter fast recovery and perform fast retransmit.
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/linux/tcp.h | 3 | ||||
-rw-r--r-- | include/net/tcp.h | 3 | ||||
-rw-r--r-- | net/ipv4/tcp_input.c | 69 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 5 | ||||
-rw-r--r-- | net/ipv4/tcp_timer.c | 5 |
5 files changed, 74 insertions, 11 deletions
diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 7859b416d46e..d9b42c5be088 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h | |||
@@ -372,7 +372,8 @@ struct tcp_sock { | |||
372 | repair : 1, | 372 | repair : 1, |
373 | unused : 1; | 373 | unused : 1; |
374 | u8 repair_queue; | 374 | u8 repair_queue; |
375 | u8 do_early_retrans:1;/* Enable RFC5827 early-retransmit */ | 375 | u8 do_early_retrans:1,/* Enable RFC5827 early-retransmit */ |
376 | early_retrans_delayed:1; /* Delayed ER timer installed */ | ||
376 | 377 | ||
377 | /* RTT measurement */ | 378 | /* RTT measurement */ |
378 | u32 srtt; /* smoothed round trip time << 3 */ | 379 | u32 srtt; /* smoothed round trip time << 3 */ |
diff --git a/include/net/tcp.h b/include/net/tcp.h index 685437a16c97..5283aa4bfa23 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h | |||
@@ -500,6 +500,8 @@ extern void tcp_send_delayed_ack(struct sock *sk); | |||
500 | 500 | ||
501 | /* tcp_input.c */ | 501 | /* tcp_input.c */ |
502 | extern void tcp_cwnd_application_limited(struct sock *sk); | 502 | extern void tcp_cwnd_application_limited(struct sock *sk); |
503 | extern void tcp_resume_early_retransmit(struct sock *sk); | ||
504 | extern void tcp_rearm_rto(struct sock *sk); | ||
503 | 505 | ||
504 | /* tcp_timer.c */ | 506 | /* tcp_timer.c */ |
505 | extern void tcp_init_xmit_timers(struct sock *); | 507 | extern void tcp_init_xmit_timers(struct sock *); |
@@ -805,6 +807,7 @@ static inline void tcp_enable_early_retrans(struct tcp_sock *tp) | |||
805 | { | 807 | { |
806 | tp->do_early_retrans = sysctl_tcp_early_retrans && | 808 | tp->do_early_retrans = sysctl_tcp_early_retrans && |
807 | !sysctl_tcp_thin_dupack && sysctl_tcp_reordering == 3; | 809 | !sysctl_tcp_thin_dupack && sysctl_tcp_reordering == 3; |
810 | tp->early_retrans_delayed = 0; | ||
808 | } | 811 | } |
809 | 812 | ||
810 | static inline void tcp_disable_early_retrans(struct tcp_sock *tp) | 813 | static inline void tcp_disable_early_retrans(struct tcp_sock *tp) |
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e042cabb695e..7096790e06bf 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -2344,6 +2344,27 @@ static inline int tcp_dupack_heuristics(const struct tcp_sock *tp) | |||
2344 | return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1; | 2344 | return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1; |
2345 | } | 2345 | } |
2346 | 2346 | ||
2347 | static bool tcp_pause_early_retransmit(struct sock *sk, int flag) | ||
2348 | { | ||
2349 | struct tcp_sock *tp = tcp_sk(sk); | ||
2350 | unsigned long delay; | ||
2351 | |||
2352 | /* Delay early retransmit and entering fast recovery for | ||
2353 | * max(RTT/4, 2msec) unless ack has ECE mark, no RTT samples | ||
2354 | * available, or RTO is scheduled to fire first. | ||
2355 | */ | ||
2356 | if (sysctl_tcp_early_retrans < 2 || (flag & FLAG_ECE) || !tp->srtt) | ||
2357 | return false; | ||
2358 | |||
2359 | delay = max_t(unsigned long, (tp->srtt >> 5), msecs_to_jiffies(2)); | ||
2360 | if (!time_after(inet_csk(sk)->icsk_timeout, (jiffies + delay))) | ||
2361 | return false; | ||
2362 | |||
2363 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, delay, TCP_RTO_MAX); | ||
2364 | tp->early_retrans_delayed = 1; | ||
2365 | return true; | ||
2366 | } | ||
2367 | |||
2347 | static inline int tcp_skb_timedout(const struct sock *sk, | 2368 | static inline int tcp_skb_timedout(const struct sock *sk, |
2348 | const struct sk_buff *skb) | 2369 | const struct sk_buff *skb) |
2349 | { | 2370 | { |
@@ -2451,7 +2472,7 @@ static inline int tcp_head_timedout(const struct sock *sk) | |||
2451 | * Main question: may we further continue forward transmission | 2472 | * Main question: may we further continue forward transmission |
2452 | * with the same cwnd? | 2473 | * with the same cwnd? |
2453 | */ | 2474 | */ |
2454 | static int tcp_time_to_recover(struct sock *sk) | 2475 | static int tcp_time_to_recover(struct sock *sk, int flag) |
2455 | { | 2476 | { |
2456 | struct tcp_sock *tp = tcp_sk(sk); | 2477 | struct tcp_sock *tp = tcp_sk(sk); |
2457 | __u32 packets_out; | 2478 | __u32 packets_out; |
@@ -2505,7 +2526,7 @@ static int tcp_time_to_recover(struct sock *sk) | |||
2505 | if (tp->do_early_retrans && !tp->retrans_out && tp->sacked_out && | 2526 | if (tp->do_early_retrans && !tp->retrans_out && tp->sacked_out && |
2506 | (tp->packets_out == (tp->sacked_out + 1) && tp->packets_out < 4) && | 2527 | (tp->packets_out == (tp->sacked_out + 1) && tp->packets_out < 4) && |
2507 | !tcp_may_send_now(sk)) | 2528 | !tcp_may_send_now(sk)) |
2508 | return 1; | 2529 | return !tcp_pause_early_retransmit(sk, flag); |
2509 | 2530 | ||
2510 | return 0; | 2531 | return 0; |
2511 | } | 2532 | } |
@@ -3172,7 +3193,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, | |||
3172 | if (icsk->icsk_ca_state <= TCP_CA_Disorder) | 3193 | if (icsk->icsk_ca_state <= TCP_CA_Disorder) |
3173 | tcp_try_undo_dsack(sk); | 3194 | tcp_try_undo_dsack(sk); |
3174 | 3195 | ||
3175 | if (!tcp_time_to_recover(sk)) { | 3196 | if (!tcp_time_to_recover(sk, flag)) { |
3176 | tcp_try_to_open(sk, flag); | 3197 | tcp_try_to_open(sk, flag); |
3177 | return; | 3198 | return; |
3178 | } | 3199 | } |
@@ -3271,16 +3292,47 @@ static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight) | |||
3271 | /* Restart timer after forward progress on connection. | 3292 | /* Restart timer after forward progress on connection. |
3272 | * RFC2988 recommends to restart timer to now+rto. | 3293 | * RFC2988 recommends to restart timer to now+rto. |
3273 | */ | 3294 | */ |
3274 | static void tcp_rearm_rto(struct sock *sk) | 3295 | void tcp_rearm_rto(struct sock *sk) |
3275 | { | 3296 | { |
3276 | const struct tcp_sock *tp = tcp_sk(sk); | 3297 | struct tcp_sock *tp = tcp_sk(sk); |
3277 | 3298 | ||
3278 | if (!tp->packets_out) { | 3299 | if (!tp->packets_out) { |
3279 | inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); | 3300 | inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); |
3280 | } else { | 3301 | } else { |
3281 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | 3302 | u32 rto = inet_csk(sk)->icsk_rto; |
3282 | inet_csk(sk)->icsk_rto, TCP_RTO_MAX); | 3303 | /* Offset the time elapsed after installing regular RTO */ |
3304 | if (tp->early_retrans_delayed) { | ||
3305 | struct sk_buff *skb = tcp_write_queue_head(sk); | ||
3306 | const u32 rto_time_stamp = TCP_SKB_CB(skb)->when + rto; | ||
3307 | s32 delta = (s32)(rto_time_stamp - tcp_time_stamp); | ||
3308 | /* delta may not be positive if the socket is locked | ||
3309 | * when the delayed ER timer fires and is rescheduled. | ||
3310 | */ | ||
3311 | if (delta > 0) | ||
3312 | rto = delta; | ||
3313 | } | ||
3314 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto, | ||
3315 | TCP_RTO_MAX); | ||
3283 | } | 3316 | } |
3317 | tp->early_retrans_delayed = 0; | ||
3318 | } | ||
3319 | |||
3320 | /* This function is called when the delayed ER timer fires. TCP enters | ||
3321 | * fast recovery and performs fast-retransmit. | ||
3322 | */ | ||
3323 | void tcp_resume_early_retransmit(struct sock *sk) | ||
3324 | { | ||
3325 | struct tcp_sock *tp = tcp_sk(sk); | ||
3326 | |||
3327 | tcp_rearm_rto(sk); | ||
3328 | |||
3329 | /* Stop if ER is disabled after the delayed ER timer is scheduled */ | ||
3330 | if (!tp->do_early_retrans) | ||
3331 | return; | ||
3332 | |||
3333 | tcp_enter_recovery(sk, false); | ||
3334 | tcp_update_scoreboard(sk, 1); | ||
3335 | tcp_xmit_retransmit_queue(sk); | ||
3284 | } | 3336 | } |
3285 | 3337 | ||
3286 | /* If we get here, the whole TSO packet has not been acked. */ | 3338 | /* If we get here, the whole TSO packet has not been acked. */ |
@@ -3729,6 +3781,9 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) | |||
3729 | if (after(ack, tp->snd_nxt)) | 3781 | if (after(ack, tp->snd_nxt)) |
3730 | goto invalid_ack; | 3782 | goto invalid_ack; |
3731 | 3783 | ||
3784 | if (tp->early_retrans_delayed) | ||
3785 | tcp_rearm_rto(sk); | ||
3786 | |||
3732 | if (after(ack, prior_snd_una)) | 3787 | if (after(ack, prior_snd_una)) |
3733 | flag |= FLAG_SND_UNA_ADVANCED; | 3788 | flag |= FLAG_SND_UNA_ADVANCED; |
3734 | 3789 | ||
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 834e89fc541b..d94733009923 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -78,9 +78,8 @@ static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb) | |||
78 | tp->frto_counter = 3; | 78 | tp->frto_counter = 3; |
79 | 79 | ||
80 | tp->packets_out += tcp_skb_pcount(skb); | 80 | tp->packets_out += tcp_skb_pcount(skb); |
81 | if (!prior_packets) | 81 | if (!prior_packets || tp->early_retrans_delayed) |
82 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | 82 | tcp_rearm_rto(sk); |
83 | inet_csk(sk)->icsk_rto, TCP_RTO_MAX); | ||
84 | } | 83 | } |
85 | 84 | ||
86 | /* SND.NXT, if window was not shrunk. | 85 | /* SND.NXT, if window was not shrunk. |
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 34d4a02c2f16..e911e6c523ec 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c | |||
@@ -319,6 +319,11 @@ void tcp_retransmit_timer(struct sock *sk) | |||
319 | struct tcp_sock *tp = tcp_sk(sk); | 319 | struct tcp_sock *tp = tcp_sk(sk); |
320 | struct inet_connection_sock *icsk = inet_csk(sk); | 320 | struct inet_connection_sock *icsk = inet_csk(sk); |
321 | 321 | ||
322 | if (tp->early_retrans_delayed) { | ||
323 | tcp_resume_early_retransmit(sk); | ||
324 | return; | ||
325 | } | ||
326 | |||
322 | if (!tp->packets_out) | 327 | if (!tp->packets_out) |
323 | goto out; | 328 | goto out; |
324 | 329 | ||