diff options
Diffstat (limited to 'net/ipv4/tcp_timer.c')
| -rw-r--r-- | net/ipv4/tcp_timer.c | 143 |
1 files changed, 49 insertions, 94 deletions
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index b78aac30c49..ecd44b0c45f 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c | |||
| @@ -32,6 +32,17 @@ int sysctl_tcp_retries2 __read_mostly = TCP_RETR2; | |||
| 32 | int sysctl_tcp_orphan_retries __read_mostly; | 32 | int sysctl_tcp_orphan_retries __read_mostly; |
| 33 | int sysctl_tcp_thin_linear_timeouts __read_mostly; | 33 | int sysctl_tcp_thin_linear_timeouts __read_mostly; |
| 34 | 34 | ||
| 35 | static void tcp_write_timer(unsigned long); | ||
| 36 | static void tcp_delack_timer(unsigned long); | ||
| 37 | static void tcp_keepalive_timer (unsigned long data); | ||
| 38 | |||
| 39 | void tcp_init_xmit_timers(struct sock *sk) | ||
| 40 | { | ||
| 41 | inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer, | ||
| 42 | &tcp_keepalive_timer); | ||
| 43 | } | ||
| 44 | EXPORT_SYMBOL(tcp_init_xmit_timers); | ||
| 45 | |||
| 35 | static void tcp_write_err(struct sock *sk) | 46 | static void tcp_write_err(struct sock *sk) |
| 36 | { | 47 | { |
| 37 | sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT; | 48 | sk->sk_err = sk->sk_err_soft ? : ETIMEDOUT; |
| @@ -66,7 +77,10 @@ static int tcp_out_of_resources(struct sock *sk, int do_reset) | |||
| 66 | if (sk->sk_err_soft) | 77 | if (sk->sk_err_soft) |
| 67 | shift++; | 78 | shift++; |
| 68 | 79 | ||
| 69 | if (tcp_check_oom(sk, shift)) { | 80 | if (tcp_too_many_orphans(sk, shift)) { |
| 81 | if (net_ratelimit()) | ||
| 82 | printk(KERN_INFO "Out of socket memory\n"); | ||
| 83 | |||
| 70 | /* Catch exceptional cases, when connection requires reset. | 84 | /* Catch exceptional cases, when connection requires reset. |
| 71 | * 1. Last segment was sent recently. */ | 85 | * 1. Last segment was sent recently. */ |
| 72 | if ((s32)(tcp_time_stamp - tp->lsndtime) <= TCP_TIMEWAIT_LEN || | 86 | if ((s32)(tcp_time_stamp - tp->lsndtime) <= TCP_TIMEWAIT_LEN || |
| @@ -157,13 +171,13 @@ static int tcp_write_timeout(struct sock *sk) | |||
| 157 | { | 171 | { |
| 158 | struct inet_connection_sock *icsk = inet_csk(sk); | 172 | struct inet_connection_sock *icsk = inet_csk(sk); |
| 159 | int retry_until; | 173 | int retry_until; |
| 160 | bool do_reset, syn_set = false; | 174 | bool do_reset, syn_set = 0; |
| 161 | 175 | ||
| 162 | if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { | 176 | if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) { |
| 163 | if (icsk->icsk_retransmits) | 177 | if (icsk->icsk_retransmits) |
| 164 | dst_negative_advice(sk); | 178 | dst_negative_advice(sk); |
| 165 | retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; | 179 | retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; |
| 166 | syn_set = true; | 180 | syn_set = 1; |
| 167 | } else { | 181 | } else { |
| 168 | if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0, 0)) { | 182 | if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0, 0)) { |
| 169 | /* Black hole detection */ | 183 | /* Black hole detection */ |
| @@ -194,11 +208,21 @@ static int tcp_write_timeout(struct sock *sk) | |||
| 194 | return 0; | 208 | return 0; |
| 195 | } | 209 | } |
| 196 | 210 | ||
| 197 | void tcp_delack_timer_handler(struct sock *sk) | 211 | static void tcp_delack_timer(unsigned long data) |
| 198 | { | 212 | { |
| 213 | struct sock *sk = (struct sock *)data; | ||
| 199 | struct tcp_sock *tp = tcp_sk(sk); | 214 | struct tcp_sock *tp = tcp_sk(sk); |
| 200 | struct inet_connection_sock *icsk = inet_csk(sk); | 215 | struct inet_connection_sock *icsk = inet_csk(sk); |
| 201 | 216 | ||
| 217 | bh_lock_sock(sk); | ||
| 218 | if (sock_owned_by_user(sk)) { | ||
| 219 | /* Try again later. */ | ||
| 220 | icsk->icsk_ack.blocked = 1; | ||
| 221 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED); | ||
| 222 | sk_reset_timer(sk, &icsk->icsk_delack_timer, jiffies + TCP_DELACK_MIN); | ||
| 223 | goto out_unlock; | ||
| 224 | } | ||
| 225 | |||
| 202 | sk_mem_reclaim_partial(sk); | 226 | sk_mem_reclaim_partial(sk); |
| 203 | 227 | ||
| 204 | if (sk->sk_state == TCP_CLOSE || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) | 228 | if (sk->sk_state == TCP_CLOSE || !(icsk->icsk_ack.pending & ICSK_ACK_TIMER)) |
| @@ -237,24 +261,9 @@ void tcp_delack_timer_handler(struct sock *sk) | |||
| 237 | } | 261 | } |
| 238 | 262 | ||
| 239 | out: | 263 | out: |
| 240 | if (sk_under_memory_pressure(sk)) | 264 | if (tcp_memory_pressure) |
| 241 | sk_mem_reclaim(sk); | 265 | sk_mem_reclaim(sk); |
| 242 | } | 266 | out_unlock: |
| 243 | |||
| 244 | static void tcp_delack_timer(unsigned long data) | ||
| 245 | { | ||
| 246 | struct sock *sk = (struct sock *)data; | ||
| 247 | |||
| 248 | bh_lock_sock(sk); | ||
| 249 | if (!sock_owned_by_user(sk)) { | ||
| 250 | tcp_delack_timer_handler(sk); | ||
| 251 | } else { | ||
| 252 | inet_csk(sk)->icsk_ack.blocked = 1; | ||
| 253 | NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED); | ||
| 254 | /* deleguate our work to tcp_release_cb() */ | ||
| 255 | if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags)) | ||
| 256 | sock_hold(sk); | ||
| 257 | } | ||
| 258 | bh_unlock_sock(sk); | 267 | bh_unlock_sock(sk); |
| 259 | sock_put(sk); | 268 | sock_put(sk); |
| 260 | } | 269 | } |
| @@ -305,35 +314,6 @@ static void tcp_probe_timer(struct sock *sk) | |||
| 305 | } | 314 | } |
| 306 | 315 | ||
| 307 | /* | 316 | /* |
| 308 | * Timer for Fast Open socket to retransmit SYNACK. Note that the | ||
| 309 | * sk here is the child socket, not the parent (listener) socket. | ||
| 310 | */ | ||
| 311 | static void tcp_fastopen_synack_timer(struct sock *sk) | ||
| 312 | { | ||
| 313 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
| 314 | int max_retries = icsk->icsk_syn_retries ? : | ||
| 315 | sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */ | ||
| 316 | struct request_sock *req; | ||
| 317 | |||
| 318 | req = tcp_sk(sk)->fastopen_rsk; | ||
| 319 | req->rsk_ops->syn_ack_timeout(sk, req); | ||
| 320 | |||
| 321 | if (req->num_timeout >= max_retries) { | ||
| 322 | tcp_write_err(sk); | ||
| 323 | return; | ||
| 324 | } | ||
| 325 | /* XXX (TFO) - Unlike regular SYN-ACK retransmit, we ignore error | ||
| 326 | * returned from rtx_syn_ack() to make it more persistent like | ||
| 327 | * regular retransmit because if the child socket has been accepted | ||
| 328 | * it's not good to give up too easily. | ||
| 329 | */ | ||
| 330 | inet_rtx_syn_ack(sk, req); | ||
| 331 | req->num_timeout++; | ||
| 332 | inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, | ||
| 333 | TCP_TIMEOUT_INIT << req->num_timeout, TCP_RTO_MAX); | ||
| 334 | } | ||
| 335 | |||
| 336 | /* | ||
| 337 | * The TCP retransmit timer. | 317 | * The TCP retransmit timer. |
| 338 | */ | 318 | */ |
| 339 | 319 | ||
| @@ -342,19 +322,6 @@ void tcp_retransmit_timer(struct sock *sk) | |||
| 342 | struct tcp_sock *tp = tcp_sk(sk); | 322 | struct tcp_sock *tp = tcp_sk(sk); |
| 343 | struct inet_connection_sock *icsk = inet_csk(sk); | 323 | struct inet_connection_sock *icsk = inet_csk(sk); |
| 344 | 324 | ||
| 345 | if (tp->early_retrans_delayed) { | ||
| 346 | tcp_resume_early_retransmit(sk); | ||
| 347 | return; | ||
| 348 | } | ||
| 349 | if (tp->fastopen_rsk) { | ||
| 350 | WARN_ON_ONCE(sk->sk_state != TCP_SYN_RECV && | ||
| 351 | sk->sk_state != TCP_FIN_WAIT1); | ||
| 352 | tcp_fastopen_synack_timer(sk); | ||
| 353 | /* Before we receive ACK to our SYN-ACK don't retransmit | ||
| 354 | * anything else (e.g., data or FIN segments). | ||
| 355 | */ | ||
| 356 | return; | ||
| 357 | } | ||
| 358 | if (!tp->packets_out) | 325 | if (!tp->packets_out) |
| 359 | goto out; | 326 | goto out; |
| 360 | 327 | ||
| @@ -367,22 +334,22 @@ void tcp_retransmit_timer(struct sock *sk) | |||
| 367 | * connection. If the socket is an orphan, time it out, | 334 | * connection. If the socket is an orphan, time it out, |
| 368 | * we cannot allow such beasts to hang infinitely. | 335 | * we cannot allow such beasts to hang infinitely. |
| 369 | */ | 336 | */ |
| 337 | #ifdef TCP_DEBUG | ||
| 370 | struct inet_sock *inet = inet_sk(sk); | 338 | struct inet_sock *inet = inet_sk(sk); |
| 371 | if (sk->sk_family == AF_INET) { | 339 | if (sk->sk_family == AF_INET) { |
| 372 | LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("Peer %pI4:%u/%u unexpectedly shrunk window %u:%u (repaired)\n"), | 340 | LIMIT_NETDEBUG(KERN_DEBUG "TCP: Peer %pI4:%u/%u unexpectedly shrunk window %u:%u (repaired)\n", |
| 373 | &inet->inet_daddr, | 341 | &inet->inet_daddr, ntohs(inet->inet_dport), |
| 374 | ntohs(inet->inet_dport), inet->inet_num, | 342 | inet->inet_num, tp->snd_una, tp->snd_nxt); |
| 375 | tp->snd_una, tp->snd_nxt); | ||
| 376 | } | 343 | } |
| 377 | #if IS_ENABLED(CONFIG_IPV6) | 344 | #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) |
| 378 | else if (sk->sk_family == AF_INET6) { | 345 | else if (sk->sk_family == AF_INET6) { |
| 379 | struct ipv6_pinfo *np = inet6_sk(sk); | 346 | struct ipv6_pinfo *np = inet6_sk(sk); |
| 380 | LIMIT_NETDEBUG(KERN_DEBUG pr_fmt("Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n"), | 347 | LIMIT_NETDEBUG(KERN_DEBUG "TCP: Peer %pI6:%u/%u unexpectedly shrunk window %u:%u (repaired)\n", |
| 381 | &np->daddr, | 348 | &np->daddr, ntohs(inet->inet_dport), |
| 382 | ntohs(inet->inet_dport), inet->inet_num, | 349 | inet->inet_num, tp->snd_una, tp->snd_nxt); |
| 383 | tp->snd_una, tp->snd_nxt); | ||
| 384 | } | 350 | } |
| 385 | #endif | 351 | #endif |
| 352 | #endif | ||
| 386 | if (tcp_time_stamp - tp->rcv_tstamp > TCP_RTO_MAX) { | 353 | if (tcp_time_stamp - tp->rcv_tstamp > TCP_RTO_MAX) { |
| 387 | tcp_write_err(sk); | 354 | tcp_write_err(sk); |
| 388 | goto out; | 355 | goto out; |
| @@ -481,11 +448,19 @@ out_reset_timer: | |||
| 481 | out:; | 448 | out:; |
| 482 | } | 449 | } |
| 483 | 450 | ||
| 484 | void tcp_write_timer_handler(struct sock *sk) | 451 | static void tcp_write_timer(unsigned long data) |
| 485 | { | 452 | { |
| 453 | struct sock *sk = (struct sock *)data; | ||
| 486 | struct inet_connection_sock *icsk = inet_csk(sk); | 454 | struct inet_connection_sock *icsk = inet_csk(sk); |
| 487 | int event; | 455 | int event; |
| 488 | 456 | ||
| 457 | bh_lock_sock(sk); | ||
| 458 | if (sock_owned_by_user(sk)) { | ||
| 459 | /* Try again later */ | ||
| 460 | sk_reset_timer(sk, &icsk->icsk_retransmit_timer, jiffies + (HZ / 20)); | ||
| 461 | goto out_unlock; | ||
| 462 | } | ||
| 463 | |||
| 489 | if (sk->sk_state == TCP_CLOSE || !icsk->icsk_pending) | 464 | if (sk->sk_state == TCP_CLOSE || !icsk->icsk_pending) |
| 490 | goto out; | 465 | goto out; |
| 491 | 466 | ||
| @@ -508,20 +483,7 @@ void tcp_write_timer_handler(struct sock *sk) | |||
| 508 | 483 | ||
| 509 | out: | 484 | out: |
| 510 | sk_mem_reclaim(sk); | 485 | sk_mem_reclaim(sk); |
| 511 | } | 486 | out_unlock: |
| 512 | |||
| 513 | static void tcp_write_timer(unsigned long data) | ||
| 514 | { | ||
| 515 | struct sock *sk = (struct sock *)data; | ||
| 516 | |||
| 517 | bh_lock_sock(sk); | ||
| 518 | if (!sock_owned_by_user(sk)) { | ||
| 519 | tcp_write_timer_handler(sk); | ||
| 520 | } else { | ||
| 521 | /* deleguate our work to tcp_release_cb() */ | ||
| 522 | if (!test_and_set_bit(TCP_WRITE_TIMER_DEFERRED, &tcp_sk(sk)->tsq_flags)) | ||
| 523 | sock_hold(sk); | ||
| 524 | } | ||
| 525 | bh_unlock_sock(sk); | 487 | bh_unlock_sock(sk); |
| 526 | sock_put(sk); | 488 | sock_put(sk); |
| 527 | } | 489 | } |
| @@ -638,10 +600,3 @@ out: | |||
| 638 | bh_unlock_sock(sk); | 600 | bh_unlock_sock(sk); |
| 639 | sock_put(sk); | 601 | sock_put(sk); |
| 640 | } | 602 | } |
| 641 | |||
| 642 | void tcp_init_xmit_timers(struct sock *sk) | ||
| 643 | { | ||
| 644 | inet_csk_init_xmit_timers(sk, &tcp_write_timer, &tcp_delack_timer, | ||
| 645 | &tcp_keepalive_timer); | ||
| 646 | } | ||
| 647 | EXPORT_SYMBOL(tcp_init_xmit_timers); | ||
