diff options
-rw-r--r-- | Documentation/networking/ip-sysctl.txt | 12 | ||||
-rw-r--r-- | include/linux/tcp.h | 5 | ||||
-rw-r--r-- | include/net/tcp.h | 4 | ||||
-rw-r--r-- | net/ipv4/sysctl_net_ipv4.c | 7 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 7 | ||||
-rw-r--r-- | net/ipv4/tcp_timer.c | 21 |
6 files changed, 54 insertions, 2 deletions
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt index 2dc7a1d97686..f147310d9af4 100644 --- a/Documentation/networking/ip-sysctl.txt +++ b/Documentation/networking/ip-sysctl.txt | |||
@@ -487,6 +487,18 @@ tcp_dma_copybreak - INTEGER | |||
487 | and CONFIG_NET_DMA is enabled. | 487 | and CONFIG_NET_DMA is enabled. |
488 | Default: 4096 | 488 | Default: 4096 |
489 | 489 | ||
490 | tcp_thin_linear_timeouts - BOOLEAN | ||
491 | Enable dynamic triggering of linear timeouts for thin streams. | ||
492 | If set, a check is performed upon retransmission by timeout to | ||
493 | determine if the stream is thin (less than 4 packets in flight). | ||
494 | As long as the stream is found to be thin, up to 6 linear | ||
495 | timeouts may be performed before exponential backoff mode is | ||
496 | initiated. This improves retransmission latency for | ||
497 | non-aggressive thin streams, often found to be time-dependent. | ||
498 | For more information on thin streams, see | ||
499 | Documentation/networking/tcp-thin.txt | ||
500 | Default: 0 | ||
501 | |||
490 | UDP variables: | 502 | UDP variables: |
491 | 503 | ||
492 | udp_mem - vector of 3 INTEGERs: min, pressure, max | 504 | udp_mem - vector of 3 INTEGERs: min, pressure, max |
diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 7fee8a4df931..3ba8b074612f 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h | |||
@@ -103,6 +103,7 @@ enum { | |||
103 | #define TCP_CONGESTION 13 /* Congestion control algorithm */ | 103 | #define TCP_CONGESTION 13 /* Congestion control algorithm */ |
104 | #define TCP_MD5SIG 14 /* TCP MD5 Signature (RFC2385) */ | 104 | #define TCP_MD5SIG 14 /* TCP MD5 Signature (RFC2385) */ |
105 | #define TCP_COOKIE_TRANSACTIONS 15 /* TCP Cookie Transactions */ | 105 | #define TCP_COOKIE_TRANSACTIONS 15 /* TCP Cookie Transactions */ |
106 | #define TCP_THIN_LINEAR_TIMEOUTS 16 /* Use linear timeouts for thin streams*/ | ||
106 | 107 | ||
107 | /* for TCP_INFO socket option */ | 108 | /* for TCP_INFO socket option */ |
108 | #define TCPI_OPT_TIMESTAMPS 1 | 109 | #define TCPI_OPT_TIMESTAMPS 1 |
@@ -340,7 +341,9 @@ struct tcp_sock { | |||
340 | u32 frto_highmark; /* snd_nxt when RTO occurred */ | 341 | u32 frto_highmark; /* snd_nxt when RTO occurred */ |
341 | u16 advmss; /* Advertised MSS */ | 342 | u16 advmss; /* Advertised MSS */ |
342 | u8 frto_counter; /* Number of new acks after RTO */ | 343 | u8 frto_counter; /* Number of new acks after RTO */ |
343 | u8 nonagle; /* Disable Nagle algorithm? */ | 344 | u8 nonagle : 4,/* Disable Nagle algorithm? */ |
345 | thin_lto : 1,/* Use linear timeouts for thin streams */ | ||
346 | unused : 3; | ||
344 | 347 | ||
345 | /* RTT measurement */ | 348 | /* RTT measurement */ |
346 | u32 srtt; /* smoothed round trip time << 3 */ | 349 | u32 srtt; /* smoothed round trip time << 3 */ |
diff --git a/include/net/tcp.h b/include/net/tcp.h index 0bdc3f640247..6278fc734abd 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h | |||
@@ -196,6 +196,9 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo); | |||
196 | #define TCP_NAGLE_CORK 2 /* Socket is corked */ | 196 | #define TCP_NAGLE_CORK 2 /* Socket is corked */ |
197 | #define TCP_NAGLE_PUSH 4 /* Cork is overridden for already queued data */ | 197 | #define TCP_NAGLE_PUSH 4 /* Cork is overridden for already queued data */ |
198 | 198 | ||
199 | /* TCP thin-stream limits */ | ||
200 | #define TCP_THIN_LINEAR_RETRIES 6 /* After 6 linear retries, do exp. backoff */ | ||
201 | |||
199 | extern struct inet_timewait_death_row tcp_death_row; | 202 | extern struct inet_timewait_death_row tcp_death_row; |
200 | 203 | ||
201 | /* sysctl variables for tcp */ | 204 | /* sysctl variables for tcp */ |
@@ -241,6 +244,7 @@ extern int sysctl_tcp_workaround_signed_windows; | |||
241 | extern int sysctl_tcp_slow_start_after_idle; | 244 | extern int sysctl_tcp_slow_start_after_idle; |
242 | extern int sysctl_tcp_max_ssthresh; | 245 | extern int sysctl_tcp_max_ssthresh; |
243 | extern int sysctl_tcp_cookie_size; | 246 | extern int sysctl_tcp_cookie_size; |
247 | extern int sysctl_tcp_thin_linear_timeouts; | ||
244 | 248 | ||
245 | extern atomic_t tcp_memory_allocated; | 249 | extern atomic_t tcp_memory_allocated; |
246 | extern struct percpu_counter tcp_sockets_allocated; | 250 | extern struct percpu_counter tcp_sockets_allocated; |
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 7e3712ce3994..e6a2460587d4 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c | |||
@@ -576,6 +576,13 @@ static struct ctl_table ipv4_table[] = { | |||
576 | .proc_handler = proc_dointvec | 576 | .proc_handler = proc_dointvec |
577 | }, | 577 | }, |
578 | { | 578 | { |
579 | .procname = "tcp_thin_linear_timeouts", | ||
580 | .data = &sysctl_tcp_thin_linear_timeouts, | ||
581 | .maxlen = sizeof(int), | ||
582 | .mode = 0644, | ||
583 | .proc_handler = proc_dointvec | ||
584 | }, | ||
585 | { | ||
579 | .procname = "udp_mem", | 586 | .procname = "udp_mem", |
580 | .data = &sysctl_udp_mem, | 587 | .data = &sysctl_udp_mem, |
581 | .maxlen = sizeof(sysctl_udp_mem), | 588 | .maxlen = sizeof(sysctl_udp_mem), |
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e471d037fcc9..21bae9afefea 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
@@ -2229,6 +2229,13 @@ static int do_tcp_setsockopt(struct sock *sk, int level, | |||
2229 | } | 2229 | } |
2230 | break; | 2230 | break; |
2231 | 2231 | ||
2232 | case TCP_THIN_LINEAR_TIMEOUTS: | ||
2233 | if (val < 0 || val > 1) | ||
2234 | err = -EINVAL; | ||
2235 | else | ||
2236 | tp->thin_lto = val; | ||
2237 | break; | ||
2238 | |||
2232 | case TCP_CORK: | 2239 | case TCP_CORK: |
2233 | /* When set indicates to always queue non-full frames. | 2240 | /* When set indicates to always queue non-full frames. |
2234 | * Later the user clears this option and we transmit | 2241 | * Later the user clears this option and we transmit |
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index de7d1bf9114f..a17629b8912e 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c | |||
@@ -29,6 +29,7 @@ int sysctl_tcp_keepalive_intvl __read_mostly = TCP_KEEPALIVE_INTVL; | |||
29 | int sysctl_tcp_retries1 __read_mostly = TCP_RETR1; | 29 | int sysctl_tcp_retries1 __read_mostly = TCP_RETR1; |
30 | int sysctl_tcp_retries2 __read_mostly = TCP_RETR2; | 30 | int sysctl_tcp_retries2 __read_mostly = TCP_RETR2; |
31 | int sysctl_tcp_orphan_retries __read_mostly; | 31 | int sysctl_tcp_orphan_retries __read_mostly; |
32 | int sysctl_tcp_thin_linear_timeouts __read_mostly; | ||
32 | 33 | ||
33 | static void tcp_write_timer(unsigned long); | 34 | static void tcp_write_timer(unsigned long); |
34 | static void tcp_delack_timer(unsigned long); | 35 | static void tcp_delack_timer(unsigned long); |
@@ -415,7 +416,25 @@ void tcp_retransmit_timer(struct sock *sk) | |||
415 | icsk->icsk_retransmits++; | 416 | icsk->icsk_retransmits++; |
416 | 417 | ||
417 | out_reset_timer: | 418 | out_reset_timer: |
418 | icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); | 419 | /* If stream is thin, use linear timeouts. Since 'icsk_backoff' is |
420 | * used to reset timer, set to 0. Recalculate 'icsk_rto' as this | ||
421 | * might be increased if the stream oscillates between thin and thick, | ||
422 | * thus the old value might already be too high compared to the value | ||
423 | * set by 'tcp_set_rto' in tcp_input.c which resets the rto without | ||
424 | * backoff. Limit to TCP_THIN_LINEAR_RETRIES before initiating | ||
425 | * exponential backoff behaviour to avoid continue hammering | ||
426 | * linear-timeout retransmissions into a black hole | ||
427 | */ | ||
428 | if (sk->sk_state == TCP_ESTABLISHED && | ||
429 | (tp->thin_lto || sysctl_tcp_thin_linear_timeouts) && | ||
430 | tcp_stream_is_thin(tp) && | ||
431 | icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) { | ||
432 | icsk->icsk_backoff = 0; | ||
433 | icsk->icsk_rto = min(__tcp_set_rto(tp), TCP_RTO_MAX); | ||
434 | } else { | ||
435 | /* Use normal (exponential) backoff */ | ||
436 | icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); | ||
437 | } | ||
419 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); | 438 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); |
420 | if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1)) | 439 | if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1)) |
421 | __sk_dst_reset(sk); | 440 | __sk_dst_reset(sk); |