diff options
Diffstat (limited to 'net/ipv4/tcp_timer.c')
-rw-r--r-- | net/ipv4/tcp_timer.c | 69 |
1 files changed, 62 insertions, 7 deletions
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index cdb2ca7684d4..8a0ab2977f1f 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c | |||
@@ -19,6 +19,7 @@ | |||
19 | */ | 19 | */ |
20 | 20 | ||
21 | #include <linux/module.h> | 21 | #include <linux/module.h> |
22 | #include <linux/gfp.h> | ||
22 | #include <net/tcp.h> | 23 | #include <net/tcp.h> |
23 | 24 | ||
24 | int sysctl_tcp_syn_retries __read_mostly = TCP_SYN_RETRIES; | 25 | int sysctl_tcp_syn_retries __read_mostly = TCP_SYN_RETRIES; |
@@ -29,6 +30,7 @@ int sysctl_tcp_keepalive_intvl __read_mostly = TCP_KEEPALIVE_INTVL; | |||
29 | int sysctl_tcp_retries1 __read_mostly = TCP_RETR1; | 30 | int sysctl_tcp_retries1 __read_mostly = TCP_RETR1; |
30 | int sysctl_tcp_retries2 __read_mostly = TCP_RETR2; | 31 | int sysctl_tcp_retries2 __read_mostly = TCP_RETR2; |
31 | int sysctl_tcp_orphan_retries __read_mostly; | 32 | int sysctl_tcp_orphan_retries __read_mostly; |
33 | int sysctl_tcp_thin_linear_timeouts __read_mostly; | ||
32 | 34 | ||
33 | static void tcp_write_timer(unsigned long); | 35 | static void tcp_write_timer(unsigned long); |
34 | static void tcp_delack_timer(unsigned long); | 36 | static void tcp_delack_timer(unsigned long); |
@@ -132,6 +134,35 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) | |||
132 | } | 134 | } |
133 | } | 135 | } |
134 | 136 | ||
137 | /* This function calculates a "timeout" which is equivalent to the timeout of a | ||
138 | * TCP connection after "boundary" unsuccessful, exponentially backed-off | ||
139 | * retransmissions with an initial RTO of TCP_RTO_MIN. | ||
140 | */ | ||
141 | static bool retransmits_timed_out(struct sock *sk, | ||
142 | unsigned int boundary) | ||
143 | { | ||
144 | unsigned int timeout, linear_backoff_thresh; | ||
145 | unsigned int start_ts; | ||
146 | |||
147 | if (!inet_csk(sk)->icsk_retransmits) | ||
148 | return false; | ||
149 | |||
150 | if (unlikely(!tcp_sk(sk)->retrans_stamp)) | ||
151 | start_ts = TCP_SKB_CB(tcp_write_queue_head(sk))->when; | ||
152 | else | ||
153 | start_ts = tcp_sk(sk)->retrans_stamp; | ||
154 | |||
155 | linear_backoff_thresh = ilog2(TCP_RTO_MAX/TCP_RTO_MIN); | ||
156 | |||
157 | if (boundary <= linear_backoff_thresh) | ||
158 | timeout = ((2 << boundary) - 1) * TCP_RTO_MIN; | ||
159 | else | ||
160 | timeout = ((2 << linear_backoff_thresh) - 1) * TCP_RTO_MIN + | ||
161 | (boundary - linear_backoff_thresh) * TCP_RTO_MAX; | ||
162 | |||
163 | return (tcp_time_stamp - start_ts) >= timeout; | ||
164 | } | ||
165 | |||
135 | /* A write timeout has occurred. Process the after effects. */ | 166 | /* A write timeout has occurred. Process the after effects. */ |
136 | static int tcp_write_timeout(struct sock *sk) | 167 | static int tcp_write_timeout(struct sock *sk) |
137 | { | 168 | { |
@@ -141,14 +172,14 @@ static int tcp_write_timeout(struct sock *sk) | |||
141 | 172 | ||
142 | if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { | 173 | if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { |
143 | if (icsk->icsk_retransmits) | 174 | if (icsk->icsk_retransmits) |
144 | dst_negative_advice(&sk->sk_dst_cache); | 175 | dst_negative_advice(&sk->sk_dst_cache, sk); |
145 | retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; | 176 | retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; |
146 | } else { | 177 | } else { |
147 | if (retransmits_timed_out(sk, sysctl_tcp_retries1)) { | 178 | if (retransmits_timed_out(sk, sysctl_tcp_retries1)) { |
148 | /* Black hole detection */ | 179 | /* Black hole detection */ |
149 | tcp_mtu_probing(icsk, sk); | 180 | tcp_mtu_probing(icsk, sk); |
150 | 181 | ||
151 | dst_negative_advice(&sk->sk_dst_cache); | 182 | dst_negative_advice(&sk->sk_dst_cache, sk); |
152 | } | 183 | } |
153 | 184 | ||
154 | retry_until = sysctl_tcp_retries2; | 185 | retry_until = sysctl_tcp_retries2; |
@@ -303,15 +334,15 @@ void tcp_retransmit_timer(struct sock *sk) | |||
303 | struct inet_sock *inet = inet_sk(sk); | 334 | struct inet_sock *inet = inet_sk(sk); |
304 | if (sk->sk_family == AF_INET) { | 335 | if (sk->sk_family == AF_INET) { |
305 | LIMIT_NETDEBUG(KERN_DEBUG "TCP: Peer %pI4:%u/%u unexpectedly shrunk window %u:%u (repaired)\n", | 336 | LIMIT_NETDEBUG(KERN_DEBUG "TCP: Peer %pI4:%u/%u unexpectedly shrunk window %u:%u (repaired)\n", |
306 | &inet->daddr, ntohs(inet->dport), | 337 | &inet->inet_daddr, ntohs(inet->inet_dport), |
307 | inet->num, tp->snd_una, tp->snd_nxt); | 338 | inet->inet_num, tp->snd_una, tp->snd_nxt); |
308 | } | 339 | } |
309 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) | 340 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
310 | else if (sk->sk_family == AF_INET6) { | 341 | else if (sk->sk_family == AF_INET6) { |
311 | struct ipv6_pinfo *np = inet6_sk(sk); | 342 | struct ipv6_pinfo *np = inet6_sk(sk); |
312 | LIMIT_NETDEBUG(KERN_DEBUG "TCP: Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n", | 343 | LIMIT_NETDEBUG(KERN_DEBUG "TCP: Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n", |
313 | &np->daddr, ntohs(inet->dport), | 344 | &np->daddr, ntohs(inet->inet_dport), |
314 | inet->num, tp->snd_una, tp->snd_nxt); | 345 | inet->inet_num, tp->snd_una, tp->snd_nxt); |
315 | } | 346 | } |
316 | #endif | 347 | #endif |
317 | #endif | 348 | #endif |
@@ -386,7 +417,25 @@ void tcp_retransmit_timer(struct sock *sk) | |||
386 | icsk->icsk_retransmits++; | 417 | icsk->icsk_retransmits++; |
387 | 418 | ||
388 | out_reset_timer: | 419 | out_reset_timer: |
389 | icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); | 420 | /* If stream is thin, use linear timeouts. Since 'icsk_backoff' is |
421 | * used to reset timer, set to 0. Recalculate 'icsk_rto' as this | ||
422 | * might be increased if the stream oscillates between thin and thick, | ||
423 | * thus the old value might already be too high compared to the value | ||
424 | * set by 'tcp_set_rto' in tcp_input.c which resets the rto without | ||
425 | * backoff. Limit to TCP_THIN_LINEAR_RETRIES before initiating | ||
426 | * exponential backoff behaviour to avoid continue hammering | ||
427 | * linear-timeout retransmissions into a black hole | ||
428 | */ | ||
429 | if (sk->sk_state == TCP_ESTABLISHED && | ||
430 | (tp->thin_lto || sysctl_tcp_thin_linear_timeouts) && | ||
431 | tcp_stream_is_thin(tp) && | ||
432 | icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) { | ||
433 | icsk->icsk_backoff = 0; | ||
434 | icsk->icsk_rto = min(__tcp_set_rto(tp), TCP_RTO_MAX); | ||
435 | } else { | ||
436 | /* Use normal (exponential) backoff */ | ||
437 | icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); | ||
438 | } | ||
390 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); | 439 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); |
391 | if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1)) | 440 | if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1)) |
392 | __sk_dst_reset(sk); | 441 | __sk_dst_reset(sk); |
@@ -445,6 +494,12 @@ static void tcp_synack_timer(struct sock *sk) | |||
445 | TCP_TIMEOUT_INIT, TCP_RTO_MAX); | 494 | TCP_TIMEOUT_INIT, TCP_RTO_MAX); |
446 | } | 495 | } |
447 | 496 | ||
497 | void tcp_syn_ack_timeout(struct sock *sk, struct request_sock *req) | ||
498 | { | ||
499 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPTIMEOUTS); | ||
500 | } | ||
501 | EXPORT_SYMBOL(tcp_syn_ack_timeout); | ||
502 | |||
448 | void tcp_set_keepalive(struct sock *sk, int val) | 503 | void tcp_set_keepalive(struct sock *sk, int val) |
449 | { | 504 | { |
450 | if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) | 505 | if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) |