aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_input.c
diff options
context:
space:
mode:
authorYuchung Cheng <ycheng@google.com>2012-05-02 09:30:03 -0400
committerDavid S. Miller <davem@davemloft.net>2012-05-02 20:56:10 -0400
commiteed530b6c67624db3f2cf477bac7c4d005d8f7ba (patch)
treec07096807ead2adb9d85e85d1a9cd1ada85755ac /net/ipv4/tcp_input.c
parent1fbc340514fc3003514bd681b372e1f47ae6183f (diff)
tcp: early retransmit
This patch implements RFC 5827 early retransmit (ER) for TCP. It reduces DUPACK threshold (dupthresh) if outstanding packets are less than 4 to recover losses by fast recovery instead of timeout. While the algorithm is simple, small but frequent network reordering makes this feature dangerous: the connection repeatedly enter false recovery and degrade performance. Therefore we implement a mitigation suggested in the appendix of the RFC that delays entering fast recovery by a small interval, i.e., RTT/4. Currently ER is conservative and is disabled for the rest of the connection after the first reordering event. A large scale web server experiment on the performance impact of ER is summarized in section 6 of the paper "Proportional Rate Reduction for TCP”, IMC 2011. http://conferences.sigcomm.org/imc/2011/docs/p155.pdf Note that Linux has a similar feature called THIN_DUPACK. The differences are THIN_DUPACK do not mitigate reorderings and is only used after slow start. Currently ER is disabled if THIN_DUPACK is enabled. I would be happy to merge THIN_DUPACK feature with ER if people think it's a good idea. ER is enabled by sysctl_tcp_early_retrans: 0: Disables ER 1: Reduce dupthresh to packets_out - 1 when outstanding packets < 4. 2: (Default) reduce dupthresh like mode 1. In addition, delay entering fast recovery by RTT/4. Note: mode 2 is implemented in the third part of this patch series. Signed-off-by: Yuchung Cheng <ycheng@google.com> Acked-by: Neal Cardwell <ncardwell@google.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r--net/ipv4/tcp_input.c15
1 files changed, 15 insertions, 0 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index be8e09d2c6b1..e042cabb695e 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -99,6 +99,7 @@ int sysctl_tcp_thin_dupack __read_mostly;
99 99
100int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; 100int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
101int sysctl_tcp_abc __read_mostly; 101int sysctl_tcp_abc __read_mostly;
102int sysctl_tcp_early_retrans __read_mostly = 2;
102 103
103#define FLAG_DATA 0x01 /* Incoming frame contained data. */ 104#define FLAG_DATA 0x01 /* Incoming frame contained data. */
104#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ 105#define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */
@@ -906,6 +907,7 @@ static void tcp_init_metrics(struct sock *sk)
906 if (dst_metric(dst, RTAX_REORDERING) && 907 if (dst_metric(dst, RTAX_REORDERING) &&
907 tp->reordering != dst_metric(dst, RTAX_REORDERING)) { 908 tp->reordering != dst_metric(dst, RTAX_REORDERING)) {
908 tcp_disable_fack(tp); 909 tcp_disable_fack(tp);
910 tcp_disable_early_retrans(tp);
909 tp->reordering = dst_metric(dst, RTAX_REORDERING); 911 tp->reordering = dst_metric(dst, RTAX_REORDERING);
910 } 912 }
911 913
@@ -988,6 +990,9 @@ static void tcp_update_reordering(struct sock *sk, const int metric,
988#endif 990#endif
989 tcp_disable_fack(tp); 991 tcp_disable_fack(tp);
990 } 992 }
993
994 if (metric > 0)
995 tcp_disable_early_retrans(tp);
991} 996}
992 997
993/* This must be called before lost_out is incremented */ 998/* This must be called before lost_out is incremented */
@@ -2492,6 +2497,16 @@ static int tcp_time_to_recover(struct sock *sk)
2492 tcp_is_sack(tp) && !tcp_send_head(sk)) 2497 tcp_is_sack(tp) && !tcp_send_head(sk))
2493 return 1; 2498 return 1;
2494 2499
2500 /* Trick#6: TCP early retransmit, per RFC5827. To avoid spurious
2501 * retransmissions due to small network reorderings, we implement
2502 * Mitigation A.3 in the RFC and delay the retransmission for a short
2503 * interval if appropriate.
2504 */
2505 if (tp->do_early_retrans && !tp->retrans_out && tp->sacked_out &&
2506 (tp->packets_out == (tp->sacked_out + 1) && tp->packets_out < 4) &&
2507 !tcp_may_send_now(sk))
2508 return 1;
2509
2495 return 0; 2510 return 0;
2496} 2511}
2497 2512