diff options
author | Andreas Petlund <apetlund@simula.no> | 2010-02-17 21:47:01 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2010-02-18 18:43:08 -0500 |
commit | 36e31b0af58728071e8023cf8e20c5166b700717 (patch) | |
tree | 8b4d251bf78965ac7501bea9011786b8255a3312 /net/ipv4 | |
parent | 5aa4b32fc86408705337e941ed716880c63d1590 (diff) |
net: TCP thin linear timeouts
This patch will make TCP use only linear timeouts if the
stream is thin. This will help to avoid the very high latencies
that thin stream suffer because of exponential backoff. This
mechanism is only active if enabled by iocontrol or syscontrol
and the stream is identified as thin. A maximum of 6 linear
timeouts is tried before exponential backoff is resumed.
Signed-off-by: Andreas Petlund <apetlund@simula.no>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 7 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 7 | ||||
-rw-r--r-- | net/ipv4/tcp_timer.c | 21 |
3 files changed, 34 insertions, 1 deletions
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 7e3712ce399..e6a2460587d 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c | |||
@@ -576,6 +576,13 @@ static struct ctl_table ipv4_table[] = { | |||
576 | .proc_handler = proc_dointvec | 576 | .proc_handler = proc_dointvec |
577 | }, | 577 | }, |
578 | { | 578 | { |
579 | .procname = "tcp_thin_linear_timeouts", | ||
580 | .data = &sysctl_tcp_thin_linear_timeouts, | ||
581 | .maxlen = sizeof(int), | ||
582 | .mode = 0644, | ||
583 | .proc_handler = proc_dointvec | ||
584 | }, | ||
585 | { | ||
579 | .procname = "udp_mem", | 586 | .procname = "udp_mem", |
580 | .data = &sysctl_udp_mem, | 587 | .data = &sysctl_udp_mem, |
581 | .maxlen = sizeof(sysctl_udp_mem), | 588 | .maxlen = sizeof(sysctl_udp_mem), |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e471d037fcc..21bae9afefe 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -2229,6 +2229,13 @@ static int do_tcp_setsockopt(struct sock *sk, int level, | |||
2229 | } | 2229 | } |
2230 | break; | 2230 | break; |
2231 | 2231 | ||
2232 | case TCP_THIN_LINEAR_TIMEOUTS: | ||
2233 | if (val < 0 || val > 1) | ||
2234 | err = -EINVAL; | ||
2235 | else | ||
2236 | tp->thin_lto = val; | ||
2237 | break; | ||
2238 | |||
2232 | case TCP_CORK: | 2239 | case TCP_CORK: |
2233 | /* When set indicates to always queue non-full frames. | 2240 | /* When set indicates to always queue non-full frames. |
2234 | * Later the user clears this option and we transmit | 2241 | * Later the user clears this option and we transmit |
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index de7d1bf9114..a17629b8912 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c | |||
@@ -29,6 +29,7 @@ int sysctl_tcp_keepalive_intvl __read_mostly = TCP_KEEPALIVE_INTVL; | |||
29 | int sysctl_tcp_retries1 __read_mostly = TCP_RETR1; | 29 | int sysctl_tcp_retries1 __read_mostly = TCP_RETR1; |
30 | int sysctl_tcp_retries2 __read_mostly = TCP_RETR2; | 30 | int sysctl_tcp_retries2 __read_mostly = TCP_RETR2; |
31 | int sysctl_tcp_orphan_retries __read_mostly; | 31 | int sysctl_tcp_orphan_retries __read_mostly; |
32 | int sysctl_tcp_thin_linear_timeouts __read_mostly; | ||
32 | 33 | ||
33 | static void tcp_write_timer(unsigned long); | 34 | static void tcp_write_timer(unsigned long); |
34 | static void tcp_delack_timer(unsigned long); | 35 | static void tcp_delack_timer(unsigned long); |
@@ -415,7 +416,25 @@ void tcp_retransmit_timer(struct sock *sk) | |||
415 | icsk->icsk_retransmits++; | 416 | icsk->icsk_retransmits++; |
416 | 417 | ||
417 | out_reset_timer: | 418 | out_reset_timer: |
418 | icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); | 419 | /* If stream is thin, use linear timeouts. Since 'icsk_backoff' is |
420 | * used to reset timer, set to 0. Recalculate 'icsk_rto' as this | ||
421 | * might be increased if the stream oscillates between thin and thick, | ||
422 | * thus the old value might already be too high compared to the value | ||
423 | * set by 'tcp_set_rto' in tcp_input.c which resets the rto without | ||
424 | * backoff. Limit to TCP_THIN_LINEAR_RETRIES before initiating | ||
425 | * exponential backoff behaviour to avoid continue hammering | ||
426 | * linear-timeout retransmissions into a black hole | ||
427 | */ | ||
428 | if (sk->sk_state == TCP_ESTABLISHED && | ||
429 | (tp->thin_lto || sysctl_tcp_thin_linear_timeouts) && | ||
430 | tcp_stream_is_thin(tp) && | ||
431 | icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) { | ||
432 | icsk->icsk_backoff = 0; | ||
433 | icsk->icsk_rto = min(__tcp_set_rto(tp), TCP_RTO_MAX); | ||
434 | } else { | ||
435 | /* Use normal (exponential) backoff */ | ||
436 | icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); | ||
437 | } | ||
419 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); | 438 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); |
420 | if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1)) | 439 | if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1)) |
421 | __sk_dst_reset(sk); | 440 | __sk_dst_reset(sk); |