diff options
| author | Michal Marek <mmarek@suse.cz> | 2010-10-27 18:15:57 -0400 |
|---|---|---|
| committer | Michal Marek <mmarek@suse.cz> | 2010-10-27 18:15:57 -0400 |
| commit | b74b953b998bcc2db91b694446f3a2619ec32de6 (patch) | |
| tree | 6ce24caabd730f6ae9287ed0676ec32e6ff31e9d /net/ipv4/tcp_timer.c | |
| parent | abb438526201c6a79949ad45375c051b6681c253 (diff) | |
| parent | f6f94e2ab1b33f0082ac22d71f66385a60d8157f (diff) | |
Merge commit 'v2.6.36' into kbuild/misc
Update to be able to fix a recent change to scripts/basic/docproc.c
(commit eda603f).
Diffstat (limited to 'net/ipv4/tcp_timer.c')
| -rw-r--r-- | net/ipv4/tcp_timer.c | 71 |
1 files changed, 50 insertions, 21 deletions
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 8816a20c2597..74c54b30600f 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c | |||
| @@ -19,6 +19,7 @@ | |||
| 19 | */ | 19 | */ |
| 20 | 20 | ||
| 21 | #include <linux/module.h> | 21 | #include <linux/module.h> |
| 22 | #include <linux/gfp.h> | ||
| 22 | #include <net/tcp.h> | 23 | #include <net/tcp.h> |
| 23 | 24 | ||
| 24 | int sysctl_tcp_syn_retries __read_mostly = TCP_SYN_RETRIES; | 25 | int sysctl_tcp_syn_retries __read_mostly = TCP_SYN_RETRIES; |
| @@ -29,6 +30,7 @@ int sysctl_tcp_keepalive_intvl __read_mostly = TCP_KEEPALIVE_INTVL; | |||
| 29 | int sysctl_tcp_retries1 __read_mostly = TCP_RETR1; | 30 | int sysctl_tcp_retries1 __read_mostly = TCP_RETR1; |
| 30 | int sysctl_tcp_retries2 __read_mostly = TCP_RETR2; | 31 | int sysctl_tcp_retries2 __read_mostly = TCP_RETR2; |
| 31 | int sysctl_tcp_orphan_retries __read_mostly; | 32 | int sysctl_tcp_orphan_retries __read_mostly; |
| 33 | int sysctl_tcp_thin_linear_timeouts __read_mostly; | ||
| 32 | 34 | ||
| 33 | static void tcp_write_timer(unsigned long); | 35 | static void tcp_write_timer(unsigned long); |
| 34 | static void tcp_delack_timer(unsigned long); | 36 | static void tcp_delack_timer(unsigned long); |
| @@ -39,7 +41,6 @@ void tcp_init_xmit_timers(struct sock *sk) | |||
| 39 | inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer, | 41 | inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer, |
| 40 | &tcp_keepalive_timer); | 42 | &tcp_keepalive_timer); |
| 41 | } | 43 | } |
| 42 | |||
| 43 | EXPORT_SYMBOL(tcp_init_xmit_timers); | 44 | EXPORT_SYMBOL(tcp_init_xmit_timers); |
| 44 | 45 | ||
| 45 | static void tcp_write_err(struct sock *sk) | 46 | static void tcp_write_err(struct sock *sk) |
| @@ -65,18 +66,18 @@ static void tcp_write_err(struct sock *sk) | |||
| 65 | static int tcp_out_of_resources(struct sock *sk, int do_reset) | 66 | static int tcp_out_of_resources(struct sock *sk, int do_reset) |
| 66 | { | 67 | { |
| 67 | struct tcp_sock *tp = tcp_sk(sk); | 68 | struct tcp_sock *tp = tcp_sk(sk); |
| 68 | int orphans = percpu_counter_read_positive(&tcp_orphan_count); | 69 | int shift = 0; |
| 69 | 70 | ||
| 70 | /* If peer does not open window for long time, or did not transmit | 71 | /* If peer does not open window for long time, or did not transmit |
| 71 | * anything for long time, penalize it. */ | 72 | * anything for long time, penalize it. */ |
| 72 | if ((s32)(tcp_time_stamp - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset) | 73 | if ((s32)(tcp_time_stamp - tp->lsndtime) > 2*TCP_RTO_MAX || !do_reset) |
| 73 | orphans <<= 1; | 74 | shift++; |
| 74 | 75 | ||
| 75 | /* If some dubious ICMP arrived, penalize even more. */ | 76 | /* If some dubious ICMP arrived, penalize even more. */ |
| 76 | if (sk->sk_err_soft) | 77 | if (sk->sk_err_soft) |
| 77 | orphans <<= 1; | 78 | shift++; |
| 78 | 79 | ||
| 79 | if (tcp_too_many_orphans(sk, orphans)) { | 80 | if (tcp_too_many_orphans(sk, shift)) { |
| 80 | if (net_ratelimit()) | 81 | if (net_ratelimit()) |
| 81 | printk(KERN_INFO "Out of socket memory\n"); | 82 | printk(KERN_INFO "Out of socket memory\n"); |
| 82 | 83 | ||
| @@ -133,14 +134,17 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) | |||
| 133 | } | 134 | } |
| 134 | 135 | ||
| 135 | /* This function calculates a "timeout" which is equivalent to the timeout of a | 136 | /* This function calculates a "timeout" which is equivalent to the timeout of a |
| 136 | * TCP connection after "boundary" unsucessful, exponentially backed-off | 137 | * TCP connection after "boundary" unsuccessful, exponentially backed-off |
| 137 | * retransmissions with an initial RTO of TCP_RTO_MIN. | 138 | * retransmissions with an initial RTO of TCP_RTO_MIN or TCP_TIMEOUT_INIT if |
| 139 | * syn_set flag is set. | ||
| 138 | */ | 140 | */ |
| 139 | static bool retransmits_timed_out(struct sock *sk, | 141 | static bool retransmits_timed_out(struct sock *sk, |
| 140 | unsigned int boundary) | 142 | unsigned int boundary, |
| 143 | bool syn_set) | ||
| 141 | { | 144 | { |
| 142 | unsigned int timeout, linear_backoff_thresh; | 145 | unsigned int timeout, linear_backoff_thresh; |
| 143 | unsigned int start_ts; | 146 | unsigned int start_ts; |
| 147 | unsigned int rto_base = syn_set ? TCP_TIMEOUT_INIT : TCP_RTO_MIN; | ||
| 144 | 148 | ||
| 145 | if (!inet_csk(sk)->icsk_retransmits) | 149 | if (!inet_csk(sk)->icsk_retransmits) |
| 146 | return false; | 150 | return false; |
| @@ -150,12 +154,12 @@ static bool retransmits_timed_out(struct sock *sk, | |||
| 150 | else | 154 | else |
| 151 | start_ts = tcp_sk(sk)->retrans_stamp; | 155 | start_ts = tcp_sk(sk)->retrans_stamp; |
| 152 | 156 | ||
| 153 | linear_backoff_thresh = ilog2(TCP_RTO_MAX/TCP_RTO_MIN); | 157 | linear_backoff_thresh = ilog2(TCP_RTO_MAX/rto_base); |
| 154 | 158 | ||
| 155 | if (boundary <= linear_backoff_thresh) | 159 | if (boundary <= linear_backoff_thresh) |
| 156 | timeout = ((2 << boundary) - 1) * TCP_RTO_MIN; | 160 | timeout = ((2 << boundary) - 1) * rto_base; |
| 157 | else | 161 | else |
| 158 | timeout = ((2 << linear_backoff_thresh) - 1) * TCP_RTO_MIN + | 162 | timeout = ((2 << linear_backoff_thresh) - 1) * rto_base + |
| 159 | (boundary - linear_backoff_thresh) * TCP_RTO_MAX; | 163 | (boundary - linear_backoff_thresh) * TCP_RTO_MAX; |
| 160 | 164 | ||
| 161 | return (tcp_time_stamp - start_ts) >= timeout; | 165 | return (tcp_time_stamp - start_ts) >= timeout; |
| @@ -166,18 +170,19 @@ static int tcp_write_timeout(struct sock *sk) | |||
| 166 | { | 170 | { |
| 167 | struct inet_connection_sock *icsk = inet_csk(sk); | 171 | struct inet_connection_sock *icsk = inet_csk(sk); |
| 168 | int retry_until; | 172 | int retry_until; |
| 169 | bool do_reset; | 173 | bool do_reset, syn_set = 0; |
| 170 | 174 | ||
| 171 | if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { | 175 | if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { |
| 172 | if (icsk->icsk_retransmits) | 176 | if (icsk->icsk_retransmits) |
| 173 | dst_negative_advice(&sk->sk_dst_cache, sk); | 177 | dst_negative_advice(sk); |
| 174 | retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; | 178 | retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; |
| 179 | syn_set = 1; | ||
| 175 | } else { | 180 | } else { |
| 176 | if (retransmits_timed_out(sk, sysctl_tcp_retries1)) { | 181 | if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0)) { |
| 177 | /* Black hole detection */ | 182 | /* Black hole detection */ |
| 178 | tcp_mtu_probing(icsk, sk); | 183 | tcp_mtu_probing(icsk, sk); |
| 179 | 184 | ||
| 180 | dst_negative_advice(&sk->sk_dst_cache, sk); | 185 | dst_negative_advice(sk); |
| 181 | } | 186 | } |
| 182 | 187 | ||
| 183 | retry_until = sysctl_tcp_retries2; | 188 | retry_until = sysctl_tcp_retries2; |
| @@ -186,14 +191,14 @@ static int tcp_write_timeout(struct sock *sk) | |||
| 186 | 191 | ||
| 187 | retry_until = tcp_orphan_retries(sk, alive); | 192 | retry_until = tcp_orphan_retries(sk, alive); |
| 188 | do_reset = alive || | 193 | do_reset = alive || |
| 189 | !retransmits_timed_out(sk, retry_until); | 194 | !retransmits_timed_out(sk, retry_until, 0); |
| 190 | 195 | ||
| 191 | if (tcp_out_of_resources(sk, do_reset)) | 196 | if (tcp_out_of_resources(sk, do_reset)) |
| 192 | return 1; | 197 | return 1; |
| 193 | } | 198 | } |
| 194 | } | 199 | } |
| 195 | 200 | ||
| 196 | if (retransmits_timed_out(sk, retry_until)) { | 201 | if (retransmits_timed_out(sk, retry_until, syn_set)) { |
| 197 | /* Has it gone just too far? */ | 202 | /* Has it gone just too far? */ |
| 198 | tcp_write_err(sk); | 203 | tcp_write_err(sk); |
| 199 | return 1; | 204 | return 1; |
| @@ -415,9 +420,27 @@ void tcp_retransmit_timer(struct sock *sk) | |||
| 415 | icsk->icsk_retransmits++; | 420 | icsk->icsk_retransmits++; |
| 416 | 421 | ||
| 417 | out_reset_timer: | 422 | out_reset_timer: |
| 418 | icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); | 423 | /* If stream is thin, use linear timeouts. Since 'icsk_backoff' is |
| 424 | * used to reset timer, set to 0. Recalculate 'icsk_rto' as this | ||
| 425 | * might be increased if the stream oscillates between thin and thick, | ||
| 426 | * thus the old value might already be too high compared to the value | ||
| 427 | * set by 'tcp_set_rto' in tcp_input.c which resets the rto without | ||
| 428 | * backoff. Limit to TCP_THIN_LINEAR_RETRIES before initiating | ||
| 429 | * exponential backoff behaviour to avoid continue hammering | ||
| 430 | * linear-timeout retransmissions into a black hole | ||
| 431 | */ | ||
| 432 | if (sk->sk_state == TCP_ESTABLISHED && | ||
| 433 | (tp->thin_lto || sysctl_tcp_thin_linear_timeouts) && | ||
| 434 | tcp_stream_is_thin(tp) && | ||
| 435 | icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) { | ||
| 436 | icsk->icsk_backoff = 0; | ||
| 437 | icsk->icsk_rto = min(__tcp_set_rto(tp), TCP_RTO_MAX); | ||
| 438 | } else { | ||
| 439 | /* Use normal (exponential) backoff */ | ||
| 440 | icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); | ||
| 441 | } | ||
| 419 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); | 442 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); |
| 420 | if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1)) | 443 | if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1, 0)) |
| 421 | __sk_dst_reset(sk); | 444 | __sk_dst_reset(sk); |
| 422 | 445 | ||
| 423 | out:; | 446 | out:; |
| @@ -474,6 +497,12 @@ static void tcp_synack_timer(struct sock *sk) | |||
| 474 | TCP_TIMEOUT_INIT, TCP_RTO_MAX); | 497 | TCP_TIMEOUT_INIT, TCP_RTO_MAX); |
| 475 | } | 498 | } |
| 476 | 499 | ||
| 500 | void tcp_syn_ack_timeout(struct sock *sk, struct request_sock *req) | ||
| 501 | { | ||
| 502 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPTIMEOUTS); | ||
| 503 | } | ||
| 504 | EXPORT_SYMBOL(tcp_syn_ack_timeout); | ||
| 505 | |||
| 477 | void tcp_set_keepalive(struct sock *sk, int val) | 506 | void tcp_set_keepalive(struct sock *sk, int val) |
| 478 | { | 507 | { |
| 479 | if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) | 508 | if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN)) |
| @@ -491,7 +520,7 @@ static void tcp_keepalive_timer (unsigned long data) | |||
| 491 | struct sock *sk = (struct sock *) data; | 520 | struct sock *sk = (struct sock *) data; |
| 492 | struct inet_connection_sock *icsk = inet_csk(sk); | 521 | struct inet_connection_sock *icsk = inet_csk(sk); |
| 493 | struct tcp_sock *tp = tcp_sk(sk); | 522 | struct tcp_sock *tp = tcp_sk(sk); |
| 494 | __u32 elapsed; | 523 | u32 elapsed; |
| 495 | 524 | ||
| 496 | /* Only process if socket is not in use. */ | 525 | /* Only process if socket is not in use. */ |
| 497 | bh_lock_sock(sk); | 526 | bh_lock_sock(sk); |
| @@ -528,7 +557,7 @@ static void tcp_keepalive_timer (unsigned long data) | |||
| 528 | if (tp->packets_out || tcp_send_head(sk)) | 557 | if (tp->packets_out || tcp_send_head(sk)) |
| 529 | goto resched; | 558 | goto resched; |
| 530 | 559 | ||
| 531 | elapsed = tcp_time_stamp - tp->rcv_tstamp; | 560 | elapsed = keepalive_time_elapsed(tp); |
| 532 | 561 | ||
| 533 | if (elapsed >= keepalive_time_when(tp)) { | 562 | if (elapsed >= keepalive_time_when(tp)) { |
| 534 | if (icsk->icsk_probes_out >= keepalive_probes(tp)) { | 563 | if (icsk->icsk_probes_out >= keepalive_probes(tp)) { |
