diff options
author | Yuchung Cheng <ycheng@google.com> | 2012-05-02 09:30:03 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-05-02 20:56:10 -0400 |
commit | eed530b6c67624db3f2cf477bac7c4d005d8f7ba (patch) | |
tree | c07096807ead2adb9d85e85d1a9cd1ada85755ac /net/ipv4/sysctl_net_ipv4.c | |
parent | 1fbc340514fc3003514bd681b372e1f47ae6183f (diff) |
tcp: early retransmit
This patch implements RFC 5827 early retransmit (ER) for TCP.
It reduces DUPACK threshold (dupthresh) if outstanding packets are
less than 4 to recover losses by fast recovery instead of timeout.
While the algorithm is simple, small but frequent network reordering
makes this feature dangerous: the connection repeatedly enter
false recovery and degrade performance. Therefore we implement
a mitigation suggested in the appendix of the RFC that delays
entering fast recovery by a small interval, i.e., RTT/4. Currently
ER is conservative and is disabled for the rest of the connection
after the first reordering event. A large scale web server
experiment on the performance impact of ER is summarized in
section 6 of the paper "Proportional Rate Reduction for TCP”,
IMC 2011. http://conferences.sigcomm.org/imc/2011/docs/p155.pdf
Note that Linux has a similar feature called THIN_DUPACK. The
differences are THIN_DUPACK do not mitigate reorderings and is only
used after slow start. Currently ER is disabled if THIN_DUPACK is
enabled. I would be happy to merge THIN_DUPACK feature with ER if
people think it's a good idea.
ER is enabled by sysctl_tcp_early_retrans:
0: Disables ER
1: Reduce dupthresh to packets_out - 1 when outstanding packets < 4.
2: (Default) reduce dupthresh like mode 1. In addition, delay
entering fast recovery by RTT/4.
Note: mode 2 is implemented in the third part of this patch series.
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/sysctl_net_ipv4.c')
-rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 10 |
1 files changed, 10 insertions, 0 deletions
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 33417f84e07f..ef32956ed655 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c | |||
@@ -27,6 +27,7 @@ | |||
27 | #include <net/tcp_memcontrol.h> | 27 | #include <net/tcp_memcontrol.h> |
28 | 28 | ||
29 | static int zero; | 29 | static int zero; |
30 | static int two = 2; | ||
30 | static int tcp_retr1_max = 255; | 31 | static int tcp_retr1_max = 255; |
31 | static int ip_local_port_range_min[] = { 1, 1 }; | 32 | static int ip_local_port_range_min[] = { 1, 1 }; |
32 | static int ip_local_port_range_max[] = { 65535, 65535 }; | 33 | static int ip_local_port_range_max[] = { 65535, 65535 }; |
@@ -677,6 +678,15 @@ static struct ctl_table ipv4_table[] = { | |||
677 | .proc_handler = proc_dointvec | 678 | .proc_handler = proc_dointvec |
678 | }, | 679 | }, |
679 | { | 680 | { |
681 | .procname = "tcp_early_retrans", | ||
682 | .data = &sysctl_tcp_early_retrans, | ||
683 | .maxlen = sizeof(int), | ||
684 | .mode = 0644, | ||
685 | .proc_handler = proc_dointvec_minmax, | ||
686 | .extra1 = &zero, | ||
687 | .extra2 = &two, | ||
688 | }, | ||
689 | { | ||
680 | .procname = "udp_mem", | 690 | .procname = "udp_mem", |
681 | .data = &sysctl_udp_mem, | 691 | .data = &sysctl_udp_mem, |
682 | .maxlen = sizeof(sysctl_udp_mem), | 692 | .maxlen = sizeof(sysctl_udp_mem), |