aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYuchung Cheng <ycheng@google.com>2012-05-02 09:30:04 -0400
committerDavid S. Miller <davem@davemloft.net>2012-05-02 20:56:10 -0400
commit750ea2bafa55aaed208b2583470ecd7122225634 (patch)
tree7656d7697566b0cecc7fbbdd8dbae288bca6d7e3
parenteed530b6c67624db3f2cf477bac7c4d005d8f7ba (diff)
tcp: early retransmit: delayed fast retransmit
Implementing the advanced early retransmit (sysctl_tcp_early_retrans==2). Delays the fast retransmit by an interval of RTT/4. We borrow the RTO timer to implement the delay. If we receive another ACK or send a new packet, the timer is cancelled and restored to original RTO value offset by time elapsed. When the delayed-ER timer fires, we enter fast recovery and perform fast retransmit. Signed-off-by: Yuchung Cheng <ycheng@google.com> Acked-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/tcp.h3
-rw-r--r--include/net/tcp.h3
-rw-r--r--net/ipv4/tcp_input.c69
-rw-r--r--net/ipv4/tcp_output.c5
-rw-r--r--net/ipv4/tcp_timer.c5
5 files changed, 74 insertions, 11 deletions
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 7859b416d46e..d9b42c5be088 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -372,7 +372,8 @@ struct tcp_sock {
372 repair : 1, 372 repair : 1,
373 unused : 1; 373 unused : 1;
374 u8 repair_queue; 374 u8 repair_queue;
375 u8 do_early_retrans:1;/* Enable RFC5827 early-retransmit */ 375 u8 do_early_retrans:1,/* Enable RFC5827 early-retransmit */
376 early_retrans_delayed:1; /* Delayed ER timer installed */
376 377
377/* RTT measurement */ 378/* RTT measurement */
378 u32 srtt; /* smoothed round trip time << 3 */ 379 u32 srtt; /* smoothed round trip time << 3 */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 685437a16c97..5283aa4bfa23 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -500,6 +500,8 @@ extern void tcp_send_delayed_ack(struct sock *sk);
500 500
501/* tcp_input.c */ 501/* tcp_input.c */
502extern void tcp_cwnd_application_limited(struct sock *sk); 502extern void tcp_cwnd_application_limited(struct sock *sk);
503extern void tcp_resume_early_retransmit(struct sock *sk);
504extern void tcp_rearm_rto(struct sock *sk);
503 505
504/* tcp_timer.c */ 506/* tcp_timer.c */
505extern void tcp_init_xmit_timers(struct sock *); 507extern void tcp_init_xmit_timers(struct sock *);
@@ -805,6 +807,7 @@ static inline void tcp_enable_early_retrans(struct tcp_sock *tp)
805{ 807{
806 tp->do_early_retrans = sysctl_tcp_early_retrans && 808 tp->do_early_retrans = sysctl_tcp_early_retrans &&
807 !sysctl_tcp_thin_dupack && sysctl_tcp_reordering == 3; 809 !sysctl_tcp_thin_dupack && sysctl_tcp_reordering == 3;
810 tp->early_retrans_delayed = 0;
808} 811}
809 812
810static inline void tcp_disable_early_retrans(struct tcp_sock *tp) 813static inline void tcp_disable_early_retrans(struct tcp_sock *tp)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index e042cabb695e..7096790e06bf 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2344,6 +2344,27 @@ static inline int tcp_dupack_heuristics(const struct tcp_sock *tp)
2344 return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1; 2344 return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1;
2345} 2345}
2346 2346
2347static bool tcp_pause_early_retransmit(struct sock *sk, int flag)
2348{
2349 struct tcp_sock *tp = tcp_sk(sk);
2350 unsigned long delay;
2351
2352 /* Delay early retransmit and entering fast recovery for
2353 * max(RTT/4, 2msec) unless ack has ECE mark, no RTT samples
2354 * available, or RTO is scheduled to fire first.
2355 */
2356 if (sysctl_tcp_early_retrans < 2 || (flag & FLAG_ECE) || !tp->srtt)
2357 return false;
2358
2359 delay = max_t(unsigned long, (tp->srtt >> 5), msecs_to_jiffies(2));
2360 if (!time_after(inet_csk(sk)->icsk_timeout, (jiffies + delay)))
2361 return false;
2362
2363 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, delay, TCP_RTO_MAX);
2364 tp->early_retrans_delayed = 1;
2365 return true;
2366}
2367
2347static inline int tcp_skb_timedout(const struct sock *sk, 2368static inline int tcp_skb_timedout(const struct sock *sk,
2348 const struct sk_buff *skb) 2369 const struct sk_buff *skb)
2349{ 2370{
@@ -2451,7 +2472,7 @@ static inline int tcp_head_timedout(const struct sock *sk)
2451 * Main question: may we further continue forward transmission 2472 * Main question: may we further continue forward transmission
2452 * with the same cwnd? 2473 * with the same cwnd?
2453 */ 2474 */
2454static int tcp_time_to_recover(struct sock *sk) 2475static int tcp_time_to_recover(struct sock *sk, int flag)
2455{ 2476{
2456 struct tcp_sock *tp = tcp_sk(sk); 2477 struct tcp_sock *tp = tcp_sk(sk);
2457 __u32 packets_out; 2478 __u32 packets_out;
@@ -2505,7 +2526,7 @@ static int tcp_time_to_recover(struct sock *sk)
2505 if (tp->do_early_retrans && !tp->retrans_out && tp->sacked_out && 2526 if (tp->do_early_retrans && !tp->retrans_out && tp->sacked_out &&
2506 (tp->packets_out == (tp->sacked_out + 1) && tp->packets_out < 4) && 2527 (tp->packets_out == (tp->sacked_out + 1) && tp->packets_out < 4) &&
2507 !tcp_may_send_now(sk)) 2528 !tcp_may_send_now(sk))
2508 return 1; 2529 return !tcp_pause_early_retransmit(sk, flag);
2509 2530
2510 return 0; 2531 return 0;
2511} 2532}
@@ -3172,7 +3193,7 @@ static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked,
3172 if (icsk->icsk_ca_state <= TCP_CA_Disorder) 3193 if (icsk->icsk_ca_state <= TCP_CA_Disorder)
3173 tcp_try_undo_dsack(sk); 3194 tcp_try_undo_dsack(sk);
3174 3195
3175 if (!tcp_time_to_recover(sk)) { 3196 if (!tcp_time_to_recover(sk, flag)) {
3176 tcp_try_to_open(sk, flag); 3197 tcp_try_to_open(sk, flag);
3177 return; 3198 return;
3178 } 3199 }
@@ -3271,16 +3292,47 @@ static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
3271/* Restart timer after forward progress on connection. 3292/* Restart timer after forward progress on connection.
3272 * RFC2988 recommends to restart timer to now+rto. 3293 * RFC2988 recommends to restart timer to now+rto.
3273 */ 3294 */
3274static void tcp_rearm_rto(struct sock *sk) 3295void tcp_rearm_rto(struct sock *sk)
3275{ 3296{
3276 const struct tcp_sock *tp = tcp_sk(sk); 3297 struct tcp_sock *tp = tcp_sk(sk);
3277 3298
3278 if (!tp->packets_out) { 3299 if (!tp->packets_out) {
3279 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS); 3300 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
3280 } else { 3301 } else {
3281 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 3302 u32 rto = inet_csk(sk)->icsk_rto;
3282 inet_csk(sk)->icsk_rto, TCP_RTO_MAX); 3303 /* Offset the time elapsed after installing regular RTO */
3304 if (tp->early_retrans_delayed) {
3305 struct sk_buff *skb = tcp_write_queue_head(sk);
3306 const u32 rto_time_stamp = TCP_SKB_CB(skb)->when + rto;
3307 s32 delta = (s32)(rto_time_stamp - tcp_time_stamp);
3308 /* delta may not be positive if the socket is locked
3309 * when the delayed ER timer fires and is rescheduled.
3310 */
3311 if (delta > 0)
3312 rto = delta;
3313 }
3314 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto,
3315 TCP_RTO_MAX);
3283 } 3316 }
3317 tp->early_retrans_delayed = 0;
3318}
3319
3320/* This function is called when the delayed ER timer fires. TCP enters
3321 * fast recovery and performs fast-retransmit.
3322 */
3323void tcp_resume_early_retransmit(struct sock *sk)
3324{
3325 struct tcp_sock *tp = tcp_sk(sk);
3326
3327 tcp_rearm_rto(sk);
3328
3329 /* Stop if ER is disabled after the delayed ER timer is scheduled */
3330 if (!tp->do_early_retrans)
3331 return;
3332
3333 tcp_enter_recovery(sk, false);
3334 tcp_update_scoreboard(sk, 1);
3335 tcp_xmit_retransmit_queue(sk);
3284} 3336}
3285 3337
3286/* If we get here, the whole TSO packet has not been acked. */ 3338/* If we get here, the whole TSO packet has not been acked. */
@@ -3729,6 +3781,9 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
3729 if (after(ack, tp->snd_nxt)) 3781 if (after(ack, tp->snd_nxt))
3730 goto invalid_ack; 3782 goto invalid_ack;
3731 3783
3784 if (tp->early_retrans_delayed)
3785 tcp_rearm_rto(sk);
3786
3732 if (after(ack, prior_snd_una)) 3787 if (after(ack, prior_snd_una))
3733 flag |= FLAG_SND_UNA_ADVANCED; 3788 flag |= FLAG_SND_UNA_ADVANCED;
3734 3789
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 834e89fc541b..d94733009923 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -78,9 +78,8 @@ static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb)
78 tp->frto_counter = 3; 78 tp->frto_counter = 3;
79 79
80 tp->packets_out += tcp_skb_pcount(skb); 80 tp->packets_out += tcp_skb_pcount(skb);
81 if (!prior_packets) 81 if (!prior_packets || tp->early_retrans_delayed)
82 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, 82 tcp_rearm_rto(sk);
83 inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
84} 83}
85 84
86/* SND.NXT, if window was not shrunk. 85/* SND.NXT, if window was not shrunk.
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 34d4a02c2f16..e911e6c523ec 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -319,6 +319,11 @@ void tcp_retransmit_timer(struct sock *sk)
319 struct tcp_sock *tp = tcp_sk(sk); 319 struct tcp_sock *tp = tcp_sk(sk);
320 struct inet_connection_sock *icsk = inet_csk(sk); 320 struct inet_connection_sock *icsk = inet_csk(sk);
321 321
322 if (tp->early_retrans_delayed) {
323 tcp_resume_early_retransmit(sk);
324 return;
325 }
326
322 if (!tp->packets_out) 327 if (!tp->packets_out)
323 goto out; 328 goto out;
324 329