diff options
| author | Arnaldo Carvalho de Melo <acme@ghostprotocols.net> | 2005-08-09 23:15:09 -0400 |
|---|---|---|
| committer | David S. Miller <davem@sunset.davemloft.net> | 2005-08-29 18:49:50 -0400 |
| commit | a019d6fe2b9da68ea4ba6cf3c4e86fc1dbf554c3 (patch) | |
| tree | f82f0523c313228d64998fac30790edcfd0785c3 /net | |
| parent | 7c657876b63cb1d8a2ec06f8fc6c37bb8412e66c (diff) | |
[ICSK]: Move generalised functions from tcp to inet_connection_sock
This also improves reqsk_queue_prune and renames it to
inet_csk_reqsk_queue_prune, as it deals with both inet_connection_sock
and inet_request_sock objects, not just with request_sock ones thus
belonging to inet_request_sock.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
| -rw-r--r-- | net/dccp/timer.c | 6 | ||||
| -rw-r--r-- | net/ipv4/inet_connection_sock.c | 214 | ||||
| -rw-r--r-- | net/ipv4/tcp.c | 120 | ||||
| -rw-r--r-- | net/ipv4/tcp_timer.c | 93 |
4 files changed, 217 insertions, 216 deletions
diff --git a/net/dccp/timer.c b/net/dccp/timer.c index 8c396ee01aac..9f1f1ab9e2b4 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c | |||
| @@ -220,11 +220,7 @@ out: | |||
| 220 | */ | 220 | */ |
| 221 | static void dccp_response_timer(struct sock *sk) | 221 | static void dccp_response_timer(struct sock *sk) |
| 222 | { | 222 | { |
| 223 | struct inet_connection_sock *icsk = inet_csk(sk); | 223 | inet_csk_reqsk_queue_prune(sk, TCP_SYNQ_INTERVAL, DCCP_TIMEOUT_INIT, DCCP_RTO_MAX); |
| 224 | const int max_retries = icsk->icsk_syn_retries ? : TCP_SYNACK_RETRIES /* FIXME sysctl_tcp_synack_retries */; | ||
| 225 | |||
| 226 | reqsk_queue_prune(&icsk->icsk_accept_queue, sk, TCP_SYNQ_INTERVAL, | ||
| 227 | DCCP_TIMEOUT_INIT, DCCP_RTO_MAX, max_retries); | ||
| 228 | } | 224 | } |
| 229 | 225 | ||
| 230 | static void dccp_keepalive_timer(unsigned long data) | 226 | static void dccp_keepalive_timer(unsigned long data) |
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 136ada050b63..026630a15ea0 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c | |||
| @@ -23,6 +23,7 @@ | |||
| 23 | #include <net/ip.h> | 23 | #include <net/ip.h> |
| 24 | #include <net/route.h> | 24 | #include <net/route.h> |
| 25 | #include <net/tcp_states.h> | 25 | #include <net/tcp_states.h> |
| 26 | #include <net/xfrm.h> | ||
| 26 | 27 | ||
| 27 | #ifdef INET_CSK_DEBUG | 28 | #ifdef INET_CSK_DEBUG |
| 28 | const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n"; | 29 | const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n"; |
| @@ -398,8 +399,100 @@ void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, | |||
| 398 | inet_csk_reqsk_queue_added(sk, timeout); | 399 | inet_csk_reqsk_queue_added(sk, timeout); |
| 399 | } | 400 | } |
| 400 | 401 | ||
| 402 | /* Only thing we need from tcp.h */ | ||
| 403 | extern int sysctl_tcp_synack_retries; | ||
| 404 | |||
| 401 | EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add); | 405 | EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add); |
| 402 | 406 | ||
| 407 | void inet_csk_reqsk_queue_prune(struct sock *parent, | ||
| 408 | const unsigned long interval, | ||
| 409 | const unsigned long timeout, | ||
| 410 | const unsigned long max_rto) | ||
| 411 | { | ||
| 412 | struct inet_connection_sock *icsk = inet_csk(parent); | ||
| 413 | struct request_sock_queue *queue = &icsk->icsk_accept_queue; | ||
| 414 | struct listen_sock *lopt = queue->listen_opt; | ||
| 415 | int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; | ||
| 416 | int thresh = max_retries; | ||
| 417 | unsigned long now = jiffies; | ||
| 418 | struct request_sock **reqp, *req; | ||
| 419 | int i, budget; | ||
| 420 | |||
| 421 | if (lopt == NULL || lopt->qlen == 0) | ||
| 422 | return; | ||
| 423 | |||
| 424 | /* Normally all the openreqs are young and become mature | ||
| 425 | * (i.e. converted to established socket) for first timeout. | ||
| 426 | * If synack was not acknowledged for 3 seconds, it means | ||
| 427 | * one of the following things: synack was lost, ack was lost, | ||
| 428 | * rtt is high or nobody planned to ack (i.e. synflood). | ||
| 429 | * When server is a bit loaded, queue is populated with old | ||
| 430 | * open requests, reducing effective size of queue. | ||
| 431 | * When server is well loaded, queue size reduces to zero | ||
| 432 | * after several minutes of work. It is not synflood, | ||
| 433 | * it is normal operation. The solution is pruning | ||
| 434 | * too old entries overriding normal timeout, when | ||
| 435 | * situation becomes dangerous. | ||
| 436 | * | ||
| 437 | * Essentially, we reserve half of room for young | ||
| 438 | * embrions; and abort old ones without pity, if old | ||
| 439 | * ones are about to clog our table. | ||
| 440 | */ | ||
| 441 | if (lopt->qlen>>(lopt->max_qlen_log-1)) { | ||
| 442 | int young = (lopt->qlen_young<<1); | ||
| 443 | |||
| 444 | while (thresh > 2) { | ||
| 445 | if (lopt->qlen < young) | ||
| 446 | break; | ||
| 447 | thresh--; | ||
| 448 | young <<= 1; | ||
| 449 | } | ||
| 450 | } | ||
| 451 | |||
| 452 | if (queue->rskq_defer_accept) | ||
| 453 | max_retries = queue->rskq_defer_accept; | ||
| 454 | |||
| 455 | budget = 2 * (lopt->nr_table_entries / (timeout / interval)); | ||
| 456 | i = lopt->clock_hand; | ||
| 457 | |||
| 458 | do { | ||
| 459 | reqp=&lopt->syn_table[i]; | ||
| 460 | while ((req = *reqp) != NULL) { | ||
| 461 | if (time_after_eq(now, req->expires)) { | ||
| 462 | if ((req->retrans < thresh || | ||
| 463 | (inet_rsk(req)->acked && req->retrans < max_retries)) | ||
| 464 | && !req->rsk_ops->rtx_syn_ack(parent, req, NULL)) { | ||
| 465 | unsigned long timeo; | ||
| 466 | |||
| 467 | if (req->retrans++ == 0) | ||
| 468 | lopt->qlen_young--; | ||
| 469 | timeo = min((timeout << req->retrans), max_rto); | ||
| 470 | req->expires = now + timeo; | ||
| 471 | reqp = &req->dl_next; | ||
| 472 | continue; | ||
| 473 | } | ||
| 474 | |||
| 475 | /* Drop this request */ | ||
| 476 | inet_csk_reqsk_queue_unlink(parent, req, reqp); | ||
| 477 | reqsk_queue_removed(queue, req); | ||
| 478 | reqsk_free(req); | ||
| 479 | continue; | ||
| 480 | } | ||
| 481 | reqp = &req->dl_next; | ||
| 482 | } | ||
| 483 | |||
| 484 | i = (i + 1) & (lopt->nr_table_entries - 1); | ||
| 485 | |||
| 486 | } while (--budget > 0); | ||
| 487 | |||
| 488 | lopt->clock_hand = i; | ||
| 489 | |||
| 490 | if (lopt->qlen) | ||
| 491 | inet_csk_reset_keepalive_timer(parent, interval); | ||
| 492 | } | ||
| 493 | |||
| 494 | EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_prune); | ||
| 495 | |||
| 403 | struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, | 496 | struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, |
| 404 | const unsigned int __nocast priority) | 497 | const unsigned int __nocast priority) |
| 405 | { | 498 | { |
| @@ -424,3 +517,124 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, | |||
| 424 | } | 517 | } |
| 425 | 518 | ||
| 426 | EXPORT_SYMBOL_GPL(inet_csk_clone); | 519 | EXPORT_SYMBOL_GPL(inet_csk_clone); |
| 520 | |||
| 521 | /* | ||
| 522 | * At this point, there should be no process reference to this | ||
| 523 | * socket, and thus no user references at all. Therefore we | ||
| 524 | * can assume the socket waitqueue is inactive and nobody will | ||
| 525 | * try to jump onto it. | ||
| 526 | */ | ||
| 527 | void inet_csk_destroy_sock(struct sock *sk) | ||
| 528 | { | ||
| 529 | BUG_TRAP(sk->sk_state == TCP_CLOSE); | ||
| 530 | BUG_TRAP(sock_flag(sk, SOCK_DEAD)); | ||
| 531 | |||
| 532 | /* It cannot be in hash table! */ | ||
| 533 | BUG_TRAP(sk_unhashed(sk)); | ||
| 534 | |||
| 535 | /* If it has not 0 inet_sk(sk)->num, it must be bound */ | ||
| 536 | BUG_TRAP(!inet_sk(sk)->num || inet_csk(sk)->icsk_bind_hash); | ||
| 537 | |||
| 538 | sk->sk_prot->destroy(sk); | ||
| 539 | |||
| 540 | sk_stream_kill_queues(sk); | ||
| 541 | |||
| 542 | xfrm_sk_free_policy(sk); | ||
| 543 | |||
| 544 | sk_refcnt_debug_release(sk); | ||
| 545 | |||
| 546 | atomic_dec(sk->sk_prot->orphan_count); | ||
| 547 | sock_put(sk); | ||
| 548 | } | ||
| 549 | |||
| 550 | EXPORT_SYMBOL(inet_csk_destroy_sock); | ||
| 551 | |||
| 552 | int inet_csk_listen_start(struct sock *sk, const int nr_table_entries) | ||
| 553 | { | ||
| 554 | struct inet_sock *inet = inet_sk(sk); | ||
| 555 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
| 556 | int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, nr_table_entries); | ||
| 557 | |||
| 558 | if (rc != 0) | ||
| 559 | return rc; | ||
| 560 | |||
| 561 | sk->sk_max_ack_backlog = 0; | ||
| 562 | sk->sk_ack_backlog = 0; | ||
| 563 | inet_csk_delack_init(sk); | ||
| 564 | |||
| 565 | /* There is race window here: we announce ourselves listening, | ||
| 566 | * but this transition is still not validated by get_port(). | ||
| 567 | * It is OK, because this socket enters to hash table only | ||
| 568 | * after validation is complete. | ||
| 569 | */ | ||
| 570 | sk->sk_state = TCP_LISTEN; | ||
| 571 | if (!sk->sk_prot->get_port(sk, inet->num)) { | ||
| 572 | inet->sport = htons(inet->num); | ||
| 573 | |||
| 574 | sk_dst_reset(sk); | ||
| 575 | sk->sk_prot->hash(sk); | ||
| 576 | |||
| 577 | return 0; | ||
| 578 | } | ||
| 579 | |||
| 580 | sk->sk_state = TCP_CLOSE; | ||
| 581 | __reqsk_queue_destroy(&icsk->icsk_accept_queue); | ||
| 582 | return -EADDRINUSE; | ||
| 583 | } | ||
| 584 | |||
| 585 | EXPORT_SYMBOL_GPL(inet_csk_listen_start); | ||
| 586 | |||
| 587 | /* | ||
| 588 | * This routine closes sockets which have been at least partially | ||
| 589 | * opened, but not yet accepted. | ||
| 590 | */ | ||
| 591 | void inet_csk_listen_stop(struct sock *sk) | ||
| 592 | { | ||
| 593 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
| 594 | struct request_sock *acc_req; | ||
| 595 | struct request_sock *req; | ||
| 596 | |||
| 597 | inet_csk_delete_keepalive_timer(sk); | ||
| 598 | |||
| 599 | /* make all the listen_opt local to us */ | ||
| 600 | acc_req = reqsk_queue_yank_acceptq(&icsk->icsk_accept_queue); | ||
| 601 | |||
| 602 | /* Following specs, it would be better either to send FIN | ||
| 603 | * (and enter FIN-WAIT-1, it is normal close) | ||
| 604 | * or to send active reset (abort). | ||
| 605 | * Certainly, it is pretty dangerous while synflood, but it is | ||
| 606 | * bad justification for our negligence 8) | ||
| 607 | * To be honest, we are not able to make either | ||
| 608 | * of the variants now. --ANK | ||
| 609 | */ | ||
| 610 | reqsk_queue_destroy(&icsk->icsk_accept_queue); | ||
| 611 | |||
| 612 | while ((req = acc_req) != NULL) { | ||
| 613 | struct sock *child = req->sk; | ||
| 614 | |||
| 615 | acc_req = req->dl_next; | ||
| 616 | |||
| 617 | local_bh_disable(); | ||
| 618 | bh_lock_sock(child); | ||
| 619 | BUG_TRAP(!sock_owned_by_user(child)); | ||
| 620 | sock_hold(child); | ||
| 621 | |||
| 622 | sk->sk_prot->disconnect(child, O_NONBLOCK); | ||
| 623 | |||
| 624 | sock_orphan(child); | ||
| 625 | |||
| 626 | atomic_inc(sk->sk_prot->orphan_count); | ||
| 627 | |||
| 628 | inet_csk_destroy_sock(child); | ||
| 629 | |||
| 630 | bh_unlock_sock(child); | ||
| 631 | local_bh_enable(); | ||
| 632 | sock_put(child); | ||
| 633 | |||
| 634 | sk_acceptq_removed(sk); | ||
| 635 | __reqsk_free(req); | ||
| 636 | } | ||
| 637 | BUG_TRAP(!sk->sk_ack_backlog); | ||
| 638 | } | ||
| 639 | |||
| 640 | EXPORT_SYMBOL_GPL(inet_csk_listen_stop); | ||
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index a4e9eec44895..4bda522d25cf 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c | |||
| @@ -456,96 +456,6 @@ int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg) | |||
| 456 | return put_user(answ, (int __user *)arg); | 456 | return put_user(answ, (int __user *)arg); |
| 457 | } | 457 | } |
| 458 | 458 | ||
| 459 | int inet_csk_listen_start(struct sock *sk, const int nr_table_entries) | ||
| 460 | { | ||
| 461 | struct inet_sock *inet = inet_sk(sk); | ||
| 462 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
| 463 | int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, nr_table_entries); | ||
| 464 | |||
| 465 | if (rc != 0) | ||
| 466 | return rc; | ||
| 467 | |||
| 468 | sk->sk_max_ack_backlog = 0; | ||
| 469 | sk->sk_ack_backlog = 0; | ||
| 470 | inet_csk_delack_init(sk); | ||
| 471 | |||
| 472 | /* There is race window here: we announce ourselves listening, | ||
| 473 | * but this transition is still not validated by get_port(). | ||
| 474 | * It is OK, because this socket enters to hash table only | ||
| 475 | * after validation is complete. | ||
| 476 | */ | ||
| 477 | sk->sk_state = TCP_LISTEN; | ||
| 478 | if (!sk->sk_prot->get_port(sk, inet->num)) { | ||
| 479 | inet->sport = htons(inet->num); | ||
| 480 | |||
| 481 | sk_dst_reset(sk); | ||
| 482 | sk->sk_prot->hash(sk); | ||
| 483 | |||
| 484 | return 0; | ||
| 485 | } | ||
| 486 | |||
| 487 | sk->sk_state = TCP_CLOSE; | ||
| 488 | __reqsk_queue_destroy(&icsk->icsk_accept_queue); | ||
| 489 | return -EADDRINUSE; | ||
| 490 | } | ||
| 491 | |||
| 492 | EXPORT_SYMBOL_GPL(inet_csk_listen_start); | ||
| 493 | |||
| 494 | /* | ||
| 495 | * This routine closes sockets which have been at least partially | ||
| 496 | * opened, but not yet accepted. | ||
| 497 | */ | ||
| 498 | void inet_csk_listen_stop(struct sock *sk) | ||
| 499 | { | ||
| 500 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
| 501 | struct request_sock *acc_req; | ||
| 502 | struct request_sock *req; | ||
| 503 | |||
| 504 | inet_csk_delete_keepalive_timer(sk); | ||
| 505 | |||
| 506 | /* make all the listen_opt local to us */ | ||
| 507 | acc_req = reqsk_queue_yank_acceptq(&icsk->icsk_accept_queue); | ||
| 508 | |||
| 509 | /* Following specs, it would be better either to send FIN | ||
| 510 | * (and enter FIN-WAIT-1, it is normal close) | ||
| 511 | * or to send active reset (abort). | ||
| 512 | * Certainly, it is pretty dangerous while synflood, but it is | ||
| 513 | * bad justification for our negligence 8) | ||
| 514 | * To be honest, we are not able to make either | ||
| 515 | * of the variants now. --ANK | ||
| 516 | */ | ||
| 517 | reqsk_queue_destroy(&icsk->icsk_accept_queue); | ||
| 518 | |||
| 519 | while ((req = acc_req) != NULL) { | ||
| 520 | struct sock *child = req->sk; | ||
| 521 | |||
| 522 | acc_req = req->dl_next; | ||
| 523 | |||
| 524 | local_bh_disable(); | ||
| 525 | bh_lock_sock(child); | ||
| 526 | BUG_TRAP(!sock_owned_by_user(child)); | ||
| 527 | sock_hold(child); | ||
| 528 | |||
| 529 | sk->sk_prot->disconnect(child, O_NONBLOCK); | ||
| 530 | |||
| 531 | sock_orphan(child); | ||
| 532 | |||
| 533 | atomic_inc(sk->sk_prot->orphan_count); | ||
| 534 | |||
| 535 | inet_csk_destroy_sock(child); | ||
| 536 | |||
| 537 | bh_unlock_sock(child); | ||
| 538 | local_bh_enable(); | ||
| 539 | sock_put(child); | ||
| 540 | |||
| 541 | sk_acceptq_removed(sk); | ||
| 542 | __reqsk_free(req); | ||
| 543 | } | ||
| 544 | BUG_TRAP(!sk->sk_ack_backlog); | ||
| 545 | } | ||
| 546 | |||
| 547 | EXPORT_SYMBOL_GPL(inet_csk_listen_stop); | ||
| 548 | |||
| 549 | static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb) | 459 | static inline void tcp_mark_push(struct tcp_sock *tp, struct sk_buff *skb) |
| 550 | { | 460 | { |
| 551 | TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; | 461 | TCP_SKB_CB(skb)->flags |= TCPCB_FLAG_PSH; |
| @@ -1559,35 +1469,6 @@ void tcp_shutdown(struct sock *sk, int how) | |||
| 1559 | } | 1469 | } |
| 1560 | } | 1470 | } |
| 1561 | 1471 | ||
| 1562 | /* | ||
| 1563 | * At this point, there should be no process reference to this | ||
| 1564 | * socket, and thus no user references at all. Therefore we | ||
| 1565 | * can assume the socket waitqueue is inactive and nobody will | ||
| 1566 | * try to jump onto it. | ||
| 1567 | */ | ||
| 1568 | void inet_csk_destroy_sock(struct sock *sk) | ||
| 1569 | { | ||
| 1570 | BUG_TRAP(sk->sk_state == TCP_CLOSE); | ||
| 1571 | BUG_TRAP(sock_flag(sk, SOCK_DEAD)); | ||
| 1572 | |||
| 1573 | /* It cannot be in hash table! */ | ||
| 1574 | BUG_TRAP(sk_unhashed(sk)); | ||
| 1575 | |||
| 1576 | /* If it has not 0 inet_sk(sk)->num, it must be bound */ | ||
| 1577 | BUG_TRAP(!inet_sk(sk)->num || inet_csk(sk)->icsk_bind_hash); | ||
| 1578 | |||
| 1579 | sk->sk_prot->destroy(sk); | ||
| 1580 | |||
| 1581 | sk_stream_kill_queues(sk); | ||
| 1582 | |||
| 1583 | xfrm_sk_free_policy(sk); | ||
| 1584 | |||
| 1585 | sk_refcnt_debug_release(sk); | ||
| 1586 | |||
| 1587 | atomic_dec(sk->sk_prot->orphan_count); | ||
| 1588 | sock_put(sk); | ||
| 1589 | } | ||
| 1590 | |||
| 1591 | void tcp_close(struct sock *sk, long timeout) | 1472 | void tcp_close(struct sock *sk, long timeout) |
| 1592 | { | 1473 | { |
| 1593 | struct sk_buff *skb; | 1474 | struct sk_buff *skb; |
| @@ -2258,7 +2139,6 @@ void __init tcp_init(void) | |||
| 2258 | } | 2139 | } |
| 2259 | 2140 | ||
| 2260 | EXPORT_SYMBOL(tcp_close); | 2141 | EXPORT_SYMBOL(tcp_close); |
| 2261 | EXPORT_SYMBOL(inet_csk_destroy_sock); | ||
| 2262 | EXPORT_SYMBOL(tcp_disconnect); | 2142 | EXPORT_SYMBOL(tcp_disconnect); |
| 2263 | EXPORT_SYMBOL(tcp_getsockopt); | 2143 | EXPORT_SYMBOL(tcp_getsockopt); |
| 2264 | EXPORT_SYMBOL(tcp_ioctl); | 2144 | EXPORT_SYMBOL(tcp_ioctl); |
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index b614ad4d30c9..72cec6981830 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c | |||
| @@ -424,103 +424,14 @@ out_unlock: | |||
| 424 | sock_put(sk); | 424 | sock_put(sk); |
| 425 | } | 425 | } |
| 426 | 426 | ||
| 427 | void reqsk_queue_prune(struct request_sock_queue *queue, struct sock *parent, | ||
| 428 | const unsigned long interval, const unsigned long timeout, | ||
| 429 | const unsigned long max_rto, int max_retries) | ||
| 430 | { | ||
| 431 | struct inet_connection_sock *icsk = inet_csk(parent); | ||
| 432 | struct listen_sock *lopt = queue->listen_opt; | ||
| 433 | int thresh = max_retries; | ||
| 434 | unsigned long now = jiffies; | ||
| 435 | struct request_sock **reqp, *req; | ||
| 436 | int i, budget; | ||
| 437 | |||
| 438 | if (lopt == NULL || lopt->qlen == 0) | ||
| 439 | return; | ||
| 440 | |||
| 441 | /* Normally all the openreqs are young and become mature | ||
| 442 | * (i.e. converted to established socket) for first timeout. | ||
| 443 | * If synack was not acknowledged for 3 seconds, it means | ||
| 444 | * one of the following things: synack was lost, ack was lost, | ||
| 445 | * rtt is high or nobody planned to ack (i.e. synflood). | ||
| 446 | * When server is a bit loaded, queue is populated with old | ||
| 447 | * open requests, reducing effective size of queue. | ||
| 448 | * When server is well loaded, queue size reduces to zero | ||
| 449 | * after several minutes of work. It is not synflood, | ||
| 450 | * it is normal operation. The solution is pruning | ||
| 451 | * too old entries overriding normal timeout, when | ||
| 452 | * situation becomes dangerous. | ||
| 453 | * | ||
| 454 | * Essentially, we reserve half of room for young | ||
| 455 | * embrions; and abort old ones without pity, if old | ||
| 456 | * ones are about to clog our table. | ||
| 457 | */ | ||
| 458 | if (lopt->qlen>>(lopt->max_qlen_log-1)) { | ||
| 459 | int young = (lopt->qlen_young<<1); | ||
| 460 | |||
| 461 | while (thresh > 2) { | ||
| 462 | if (lopt->qlen < young) | ||
| 463 | break; | ||
| 464 | thresh--; | ||
| 465 | young <<= 1; | ||
| 466 | } | ||
| 467 | } | ||
| 468 | |||
| 469 | if (queue->rskq_defer_accept) | ||
| 470 | max_retries = queue->rskq_defer_accept; | ||
| 471 | |||
| 472 | budget = 2 * (lopt->nr_table_entries / (timeout / interval)); | ||
| 473 | i = lopt->clock_hand; | ||
| 474 | |||
| 475 | do { | ||
| 476 | reqp=&lopt->syn_table[i]; | ||
| 477 | while ((req = *reqp) != NULL) { | ||
| 478 | if (time_after_eq(now, req->expires)) { | ||
| 479 | if ((req->retrans < thresh || | ||
| 480 | (inet_rsk(req)->acked && req->retrans < max_retries)) | ||
| 481 | && !req->rsk_ops->rtx_syn_ack(parent, req, NULL)) { | ||
| 482 | unsigned long timeo; | ||
| 483 | |||
| 484 | if (req->retrans++ == 0) | ||
| 485 | lopt->qlen_young--; | ||
| 486 | timeo = min((timeout << req->retrans), max_rto); | ||
| 487 | req->expires = now + timeo; | ||
| 488 | reqp = &req->dl_next; | ||
| 489 | continue; | ||
| 490 | } | ||
| 491 | |||
| 492 | /* Drop this request */ | ||
| 493 | inet_csk_reqsk_queue_unlink(parent, req, reqp); | ||
| 494 | reqsk_queue_removed(&icsk->icsk_accept_queue, req); | ||
| 495 | reqsk_free(req); | ||
| 496 | continue; | ||
| 497 | } | ||
| 498 | reqp = &req->dl_next; | ||
| 499 | } | ||
| 500 | |||
| 501 | i = (i + 1) & (lopt->nr_table_entries - 1); | ||
| 502 | |||
| 503 | } while (--budget > 0); | ||
| 504 | |||
| 505 | lopt->clock_hand = i; | ||
| 506 | |||
| 507 | if (lopt->qlen) | ||
| 508 | inet_csk_reset_keepalive_timer(parent, interval); | ||
| 509 | } | ||
| 510 | |||
| 511 | EXPORT_SYMBOL_GPL(reqsk_queue_prune); | ||
| 512 | |||
| 513 | /* | 427 | /* |
| 514 | * Timer for listening sockets | 428 | * Timer for listening sockets |
| 515 | */ | 429 | */ |
| 516 | 430 | ||
| 517 | static void tcp_synack_timer(struct sock *sk) | 431 | static void tcp_synack_timer(struct sock *sk) |
| 518 | { | 432 | { |
| 519 | struct inet_connection_sock *icsk = inet_csk(sk); | 433 | inet_csk_reqsk_queue_prune(sk, TCP_SYNQ_INTERVAL, |
| 520 | const int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; | 434 | TCP_TIMEOUT_INIT, TCP_RTO_MAX); |
| 521 | |||
| 522 | reqsk_queue_prune(&icsk->icsk_accept_queue, sk, TCP_SYNQ_INTERVAL, | ||
| 523 | TCP_TIMEOUT_INIT, TCP_RTO_MAX, max_retries); | ||
| 524 | } | 435 | } |
| 525 | 436 | ||
| 526 | void tcp_set_keepalive(struct sock *sk, int val) | 437 | void tcp_set_keepalive(struct sock *sk, int val) |
