diff options
author | Yuchung Cheng <ycheng@google.com> | 2012-05-02 09:30:03 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-05-02 20:56:10 -0400 |
commit | eed530b6c67624db3f2cf477bac7c4d005d8f7ba (patch) | |
tree | c07096807ead2adb9d85e85d1a9cd1ada85755ac /net/ipv4/tcp_input.c | |
parent | 1fbc340514fc3003514bd681b372e1f47ae6183f (diff) |
tcp: early retransmit
This patch implements RFC 5827 early retransmit (ER) for TCP.
It reduces DUPACK threshold (dupthresh) if outstanding packets are
less than 4 to recover losses by fast recovery instead of timeout.
While the algorithm is simple, small but frequent network reordering
makes this feature dangerous: the connection repeatedly enter
false recovery and degrade performance. Therefore we implement
a mitigation suggested in the appendix of the RFC that delays
entering fast recovery by a small interval, i.e., RTT/4. Currently
ER is conservative and is disabled for the rest of the connection
after the first reordering event. A large scale web server
experiment on the performance impact of ER is summarized in
section 6 of the paper "Proportional Rate Reduction for TCP”,
IMC 2011. http://conferences.sigcomm.org/imc/2011/docs/p155.pdf
Note that Linux has a similar feature called THIN_DUPACK. The
differences are THIN_DUPACK do not mitigate reorderings and is only
used after slow start. Currently ER is disabled if THIN_DUPACK is
enabled. I would be happy to merge THIN_DUPACK feature with ER if
people think it's a good idea.
ER is enabled by sysctl_tcp_early_retrans:
0: Disables ER
1: Reduce dupthresh to packets_out - 1 when outstanding packets < 4.
2: (Default) reduce dupthresh like mode 1. In addition, delay
entering fast recovery by RTT/4.
Note: mode 2 is implemented in the third part of this patch series.
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_input.c')
-rw-r--r-- | net/ipv4/tcp_input.c | 15 |
1 files changed, 15 insertions, 0 deletions
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index be8e09d2c6b1..e042cabb695e 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c | |||
@@ -99,6 +99,7 @@ int sysctl_tcp_thin_dupack __read_mostly; | |||
99 | 99 | ||
100 | int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; | 100 | int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; |
101 | int sysctl_tcp_abc __read_mostly; | 101 | int sysctl_tcp_abc __read_mostly; |
102 | int sysctl_tcp_early_retrans __read_mostly = 2; | ||
102 | 103 | ||
103 | #define FLAG_DATA 0x01 /* Incoming frame contained data. */ | 104 | #define FLAG_DATA 0x01 /* Incoming frame contained data. */ |
104 | #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ | 105 | #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ |
@@ -906,6 +907,7 @@ static void tcp_init_metrics(struct sock *sk) | |||
906 | if (dst_metric(dst, RTAX_REORDERING) && | 907 | if (dst_metric(dst, RTAX_REORDERING) && |
907 | tp->reordering != dst_metric(dst, RTAX_REORDERING)) { | 908 | tp->reordering != dst_metric(dst, RTAX_REORDERING)) { |
908 | tcp_disable_fack(tp); | 909 | tcp_disable_fack(tp); |
910 | tcp_disable_early_retrans(tp); | ||
909 | tp->reordering = dst_metric(dst, RTAX_REORDERING); | 911 | tp->reordering = dst_metric(dst, RTAX_REORDERING); |
910 | } | 912 | } |
911 | 913 | ||
@@ -988,6 +990,9 @@ static void tcp_update_reordering(struct sock *sk, const int metric, | |||
988 | #endif | 990 | #endif |
989 | tcp_disable_fack(tp); | 991 | tcp_disable_fack(tp); |
990 | } | 992 | } |
993 | |||
994 | if (metric > 0) | ||
995 | tcp_disable_early_retrans(tp); | ||
991 | } | 996 | } |
992 | 997 | ||
993 | /* This must be called before lost_out is incremented */ | 998 | /* This must be called before lost_out is incremented */ |
@@ -2492,6 +2497,16 @@ static int tcp_time_to_recover(struct sock *sk) | |||
2492 | tcp_is_sack(tp) && !tcp_send_head(sk)) | 2497 | tcp_is_sack(tp) && !tcp_send_head(sk)) |
2493 | return 1; | 2498 | return 1; |
2494 | 2499 | ||
2500 | /* Trick#6: TCP early retransmit, per RFC5827. To avoid spurious | ||
2501 | * retransmissions due to small network reorderings, we implement | ||
2502 | * Mitigation A.3 in the RFC and delay the retransmission for a short | ||
2503 | * interval if appropriate. | ||
2504 | */ | ||
2505 | if (tp->do_early_retrans && !tp->retrans_out && tp->sacked_out && | ||
2506 | (tp->packets_out == (tp->sacked_out + 1) && tp->packets_out < 4) && | ||
2507 | !tcp_may_send_now(sk)) | ||
2508 | return 1; | ||
2509 | |||
2495 | return 0; | 2510 | return 0; |
2496 | } | 2511 | } |
2497 | 2512 | ||