aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/networking/ip-sysctl.txt12
-rw-r--r--include/linux/tcp.h5
-rw-r--r--include/net/tcp.h4
-rw-r--r--net/ipv4/sysctl_net_ipv4.c7
-rw-r--r--net/ipv4/tcp.c7
-rw-r--r--net/ipv4/tcp_timer.c21
6 files changed, 54 insertions, 2 deletions
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 2dc7a1d97686..f147310d9af4 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -487,6 +487,18 @@ tcp_dma_copybreak - INTEGER
487 and CONFIG_NET_DMA is enabled. 487 and CONFIG_NET_DMA is enabled.
488 Default: 4096 488 Default: 4096
489 489
490tcp_thin_linear_timeouts - BOOLEAN
491 Enable dynamic triggering of linear timeouts for thin streams.
492 If set, a check is performed upon retransmission by timeout to
493 determine if the stream is thin (less than 4 packets in flight).
494 As long as the stream is found to be thin, up to 6 linear
495 timeouts may be performed before exponential backoff mode is
496 initiated. This improves retransmission latency for
497 non-aggressive thin streams, often found to be time-dependent.
498 For more information on thin streams, see
499 Documentation/networking/tcp-thin.txt
500 Default: 0
501
490UDP variables: 502UDP variables:
491 503
492udp_mem - vector of 3 INTEGERs: min, pressure, max 504udp_mem - vector of 3 INTEGERs: min, pressure, max
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 7fee8a4df931..3ba8b074612f 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -103,6 +103,7 @@ enum {
103#define TCP_CONGESTION 13 /* Congestion control algorithm */ 103#define TCP_CONGESTION 13 /* Congestion control algorithm */
104#define TCP_MD5SIG 14 /* TCP MD5 Signature (RFC2385) */ 104#define TCP_MD5SIG 14 /* TCP MD5 Signature (RFC2385) */
105#define TCP_COOKIE_TRANSACTIONS 15 /* TCP Cookie Transactions */ 105#define TCP_COOKIE_TRANSACTIONS 15 /* TCP Cookie Transactions */
106#define TCP_THIN_LINEAR_TIMEOUTS 16 /* Use linear timeouts for thin streams*/
106 107
107/* for TCP_INFO socket option */ 108/* for TCP_INFO socket option */
108#define TCPI_OPT_TIMESTAMPS 1 109#define TCPI_OPT_TIMESTAMPS 1
@@ -340,7 +341,9 @@ struct tcp_sock {
340 u32 frto_highmark; /* snd_nxt when RTO occurred */ 341 u32 frto_highmark; /* snd_nxt when RTO occurred */
341 u16 advmss; /* Advertised MSS */ 342 u16 advmss; /* Advertised MSS */
342 u8 frto_counter; /* Number of new acks after RTO */ 343 u8 frto_counter; /* Number of new acks after RTO */
343 u8 nonagle; /* Disable Nagle algorithm? */ 344 u8 nonagle : 4,/* Disable Nagle algorithm? */
345 thin_lto : 1,/* Use linear timeouts for thin streams */
346 unused : 3;
344 347
345/* RTT measurement */ 348/* RTT measurement */
346 u32 srtt; /* smoothed round trip time << 3 */ 349 u32 srtt; /* smoothed round trip time << 3 */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 0bdc3f640247..6278fc734abd 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -196,6 +196,9 @@ extern void tcp_time_wait(struct sock *sk, int state, int timeo);
196#define TCP_NAGLE_CORK 2 /* Socket is corked */ 196#define TCP_NAGLE_CORK 2 /* Socket is corked */
197#define TCP_NAGLE_PUSH 4 /* Cork is overridden for already queued data */ 197#define TCP_NAGLE_PUSH 4 /* Cork is overridden for already queued data */
198 198
199/* TCP thin-stream limits */
200#define TCP_THIN_LINEAR_RETRIES 6 /* After 6 linear retries, do exp. backoff */
201
199extern struct inet_timewait_death_row tcp_death_row; 202extern struct inet_timewait_death_row tcp_death_row;
200 203
201/* sysctl variables for tcp */ 204/* sysctl variables for tcp */
@@ -241,6 +244,7 @@ extern int sysctl_tcp_workaround_signed_windows;
241extern int sysctl_tcp_slow_start_after_idle; 244extern int sysctl_tcp_slow_start_after_idle;
242extern int sysctl_tcp_max_ssthresh; 245extern int sysctl_tcp_max_ssthresh;
243extern int sysctl_tcp_cookie_size; 246extern int sysctl_tcp_cookie_size;
247extern int sysctl_tcp_thin_linear_timeouts;
244 248
245extern atomic_t tcp_memory_allocated; 249extern atomic_t tcp_memory_allocated;
246extern struct percpu_counter tcp_sockets_allocated; 250extern struct percpu_counter tcp_sockets_allocated;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 7e3712ce3994..e6a2460587d4 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -576,6 +576,13 @@ static struct ctl_table ipv4_table[] = {
576 .proc_handler = proc_dointvec 576 .proc_handler = proc_dointvec
577 }, 577 },
578 { 578 {
579 .procname = "tcp_thin_linear_timeouts",
580 .data = &sysctl_tcp_thin_linear_timeouts,
581 .maxlen = sizeof(int),
582 .mode = 0644,
583 .proc_handler = proc_dointvec
584 },
585 {
579 .procname = "udp_mem", 586 .procname = "udp_mem",
580 .data = &sysctl_udp_mem, 587 .data = &sysctl_udp_mem,
581 .maxlen = sizeof(sysctl_udp_mem), 588 .maxlen = sizeof(sysctl_udp_mem),
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e471d037fcc9..21bae9afefea 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2229,6 +2229,13 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
2229 } 2229 }
2230 break; 2230 break;
2231 2231
2232 case TCP_THIN_LINEAR_TIMEOUTS:
2233 if (val < 0 || val > 1)
2234 err = -EINVAL;
2235 else
2236 tp->thin_lto = val;
2237 break;
2238
2232 case TCP_CORK: 2239 case TCP_CORK:
2233 /* When set indicates to always queue non-full frames. 2240 /* When set indicates to always queue non-full frames.
2234 * Later the user clears this option and we transmit 2241 * Later the user clears this option and we transmit
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index de7d1bf9114f..a17629b8912e 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -29,6 +29,7 @@ int sysctl_tcp_keepalive_intvl __read_mostly = TCP_KEEPALIVE_INTVL;
29int sysctl_tcp_retries1 __read_mostly = TCP_RETR1; 29int sysctl_tcp_retries1 __read_mostly = TCP_RETR1;
30int sysctl_tcp_retries2 __read_mostly = TCP_RETR2; 30int sysctl_tcp_retries2 __read_mostly = TCP_RETR2;
31int sysctl_tcp_orphan_retries __read_mostly; 31int sysctl_tcp_orphan_retries __read_mostly;
32int sysctl_tcp_thin_linear_timeouts __read_mostly;
32 33
33static void tcp_write_timer(unsigned long); 34static void tcp_write_timer(unsigned long);
34static void tcp_delack_timer(unsigned long); 35static void tcp_delack_timer(unsigned long);
@@ -415,7 +416,25 @@ void tcp_retransmit_timer(struct sock *sk)
415 icsk->icsk_retransmits++; 416 icsk->icsk_retransmits++;
416 417
417out_reset_timer: 418out_reset_timer:
418 icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); 419 /* If stream is thin, use linear timeouts. Since 'icsk_backoff' is
420 * used to reset timer, set to 0. Recalculate 'icsk_rto' as this
421 * might be increased if the stream oscillates between thin and thick,
422 * thus the old value might already be too high compared to the value
423 * set by 'tcp_set_rto' in tcp_input.c which resets the rto without
424 * backoff. Limit to TCP_THIN_LINEAR_RETRIES before initiating
425 * exponential backoff behaviour to avoid continue hammering
426 * linear-timeout retransmissions into a black hole
427 */
428 if (sk->sk_state == TCP_ESTABLISHED &&
429 (tp->thin_lto || sysctl_tcp_thin_linear_timeouts) &&
430 tcp_stream_is_thin(tp) &&
431 icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) {
432 icsk->icsk_backoff = 0;
433 icsk->icsk_rto = min(__tcp_set_rto(tp), TCP_RTO_MAX);
434 } else {
435 /* Use normal (exponential) backoff */
436 icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX);
437 }
419 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); 438 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX);
420 if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1)) 439 if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1))
421 __sk_dst_reset(sk); 440 __sk_dst_reset(sk);