diff options
author | Arnaldo Carvalho de Melo <acme@ghostprotocols.net> | 2005-08-09 23:15:09 -0400 |
---|---|---|
committer | David S. Miller <davem@sunset.davemloft.net> | 2005-08-29 18:49:50 -0400 |
commit | a019d6fe2b9da68ea4ba6cf3c4e86fc1dbf554c3 (patch) | |
tree | f82f0523c313228d64998fac30790edcfd0785c3 /net/ipv4/inet_connection_sock.c | |
parent | 7c657876b63cb1d8a2ec06f8fc6c37bb8412e66c (diff) |
[ICSK]: Move generalised functions from tcp to inet_connection_sock
This also improves reqsk_queue_prune and renames it to
inet_csk_reqsk_queue_prune, as it deals with both inet_connection_sock
and inet_request_sock objects, not just with request_sock ones thus
belonging to inet_request_sock.
Signed-off-by: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/inet_connection_sock.c')
-rw-r--r-- | net/ipv4/inet_connection_sock.c | 214 |
1 files changed, 214 insertions, 0 deletions
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 136ada050b63..026630a15ea0 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c | |||
@@ -23,6 +23,7 @@ | |||
23 | #include <net/ip.h> | 23 | #include <net/ip.h> |
24 | #include <net/route.h> | 24 | #include <net/route.h> |
25 | #include <net/tcp_states.h> | 25 | #include <net/tcp_states.h> |
26 | #include <net/xfrm.h> | ||
26 | 27 | ||
27 | #ifdef INET_CSK_DEBUG | 28 | #ifdef INET_CSK_DEBUG |
28 | const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n"; | 29 | const char inet_csk_timer_bug_msg[] = "inet_csk BUG: unknown timer value\n"; |
@@ -398,8 +399,100 @@ void inet_csk_reqsk_queue_hash_add(struct sock *sk, struct request_sock *req, | |||
398 | inet_csk_reqsk_queue_added(sk, timeout); | 399 | inet_csk_reqsk_queue_added(sk, timeout); |
399 | } | 400 | } |
400 | 401 | ||
402 | /* Only thing we need from tcp.h */ | ||
403 | extern int sysctl_tcp_synack_retries; | ||
404 | |||
401 | EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add); | 405 | EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_hash_add); |
402 | 406 | ||
407 | void inet_csk_reqsk_queue_prune(struct sock *parent, | ||
408 | const unsigned long interval, | ||
409 | const unsigned long timeout, | ||
410 | const unsigned long max_rto) | ||
411 | { | ||
412 | struct inet_connection_sock *icsk = inet_csk(parent); | ||
413 | struct request_sock_queue *queue = &icsk->icsk_accept_queue; | ||
414 | struct listen_sock *lopt = queue->listen_opt; | ||
415 | int max_retries = icsk->icsk_syn_retries ? : sysctl_tcp_synack_retries; | ||
416 | int thresh = max_retries; | ||
417 | unsigned long now = jiffies; | ||
418 | struct request_sock **reqp, *req; | ||
419 | int i, budget; | ||
420 | |||
421 | if (lopt == NULL || lopt->qlen == 0) | ||
422 | return; | ||
423 | |||
424 | /* Normally all the openreqs are young and become mature | ||
425 | * (i.e. converted to established socket) for first timeout. | ||
426 | * If synack was not acknowledged for 3 seconds, it means | ||
427 | * one of the following things: synack was lost, ack was lost, | ||
428 | * rtt is high or nobody planned to ack (i.e. synflood). | ||
429 | * When server is a bit loaded, queue is populated with old | ||
430 | * open requests, reducing effective size of queue. | ||
431 | * When server is well loaded, queue size reduces to zero | ||
432 | * after several minutes of work. It is not synflood, | ||
433 | * it is normal operation. The solution is pruning | ||
434 | * too old entries overriding normal timeout, when | ||
435 | * situation becomes dangerous. | ||
436 | * | ||
437 | * Essentially, we reserve half of room for young | ||
438 | * embrions; and abort old ones without pity, if old | ||
439 | * ones are about to clog our table. | ||
440 | */ | ||
441 | if (lopt->qlen>>(lopt->max_qlen_log-1)) { | ||
442 | int young = (lopt->qlen_young<<1); | ||
443 | |||
444 | while (thresh > 2) { | ||
445 | if (lopt->qlen < young) | ||
446 | break; | ||
447 | thresh--; | ||
448 | young <<= 1; | ||
449 | } | ||
450 | } | ||
451 | |||
452 | if (queue->rskq_defer_accept) | ||
453 | max_retries = queue->rskq_defer_accept; | ||
454 | |||
455 | budget = 2 * (lopt->nr_table_entries / (timeout / interval)); | ||
456 | i = lopt->clock_hand; | ||
457 | |||
458 | do { | ||
459 | reqp=&lopt->syn_table[i]; | ||
460 | while ((req = *reqp) != NULL) { | ||
461 | if (time_after_eq(now, req->expires)) { | ||
462 | if ((req->retrans < thresh || | ||
463 | (inet_rsk(req)->acked && req->retrans < max_retries)) | ||
464 | && !req->rsk_ops->rtx_syn_ack(parent, req, NULL)) { | ||
465 | unsigned long timeo; | ||
466 | |||
467 | if (req->retrans++ == 0) | ||
468 | lopt->qlen_young--; | ||
469 | timeo = min((timeout << req->retrans), max_rto); | ||
470 | req->expires = now + timeo; | ||
471 | reqp = &req->dl_next; | ||
472 | continue; | ||
473 | } | ||
474 | |||
475 | /* Drop this request */ | ||
476 | inet_csk_reqsk_queue_unlink(parent, req, reqp); | ||
477 | reqsk_queue_removed(queue, req); | ||
478 | reqsk_free(req); | ||
479 | continue; | ||
480 | } | ||
481 | reqp = &req->dl_next; | ||
482 | } | ||
483 | |||
484 | i = (i + 1) & (lopt->nr_table_entries - 1); | ||
485 | |||
486 | } while (--budget > 0); | ||
487 | |||
488 | lopt->clock_hand = i; | ||
489 | |||
490 | if (lopt->qlen) | ||
491 | inet_csk_reset_keepalive_timer(parent, interval); | ||
492 | } | ||
493 | |||
494 | EXPORT_SYMBOL_GPL(inet_csk_reqsk_queue_prune); | ||
495 | |||
403 | struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, | 496 | struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, |
404 | const unsigned int __nocast priority) | 497 | const unsigned int __nocast priority) |
405 | { | 498 | { |
@@ -424,3 +517,124 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, | |||
424 | } | 517 | } |
425 | 518 | ||
426 | EXPORT_SYMBOL_GPL(inet_csk_clone); | 519 | EXPORT_SYMBOL_GPL(inet_csk_clone); |
520 | |||
521 | /* | ||
522 | * At this point, there should be no process reference to this | ||
523 | * socket, and thus no user references at all. Therefore we | ||
524 | * can assume the socket waitqueue is inactive and nobody will | ||
525 | * try to jump onto it. | ||
526 | */ | ||
527 | void inet_csk_destroy_sock(struct sock *sk) | ||
528 | { | ||
529 | BUG_TRAP(sk->sk_state == TCP_CLOSE); | ||
530 | BUG_TRAP(sock_flag(sk, SOCK_DEAD)); | ||
531 | |||
532 | /* It cannot be in hash table! */ | ||
533 | BUG_TRAP(sk_unhashed(sk)); | ||
534 | |||
535 | /* If it has not 0 inet_sk(sk)->num, it must be bound */ | ||
536 | BUG_TRAP(!inet_sk(sk)->num || inet_csk(sk)->icsk_bind_hash); | ||
537 | |||
538 | sk->sk_prot->destroy(sk); | ||
539 | |||
540 | sk_stream_kill_queues(sk); | ||
541 | |||
542 | xfrm_sk_free_policy(sk); | ||
543 | |||
544 | sk_refcnt_debug_release(sk); | ||
545 | |||
546 | atomic_dec(sk->sk_prot->orphan_count); | ||
547 | sock_put(sk); | ||
548 | } | ||
549 | |||
550 | EXPORT_SYMBOL(inet_csk_destroy_sock); | ||
551 | |||
552 | int inet_csk_listen_start(struct sock *sk, const int nr_table_entries) | ||
553 | { | ||
554 | struct inet_sock *inet = inet_sk(sk); | ||
555 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
556 | int rc = reqsk_queue_alloc(&icsk->icsk_accept_queue, nr_table_entries); | ||
557 | |||
558 | if (rc != 0) | ||
559 | return rc; | ||
560 | |||
561 | sk->sk_max_ack_backlog = 0; | ||
562 | sk->sk_ack_backlog = 0; | ||
563 | inet_csk_delack_init(sk); | ||
564 | |||
565 | /* There is race window here: we announce ourselves listening, | ||
566 | * but this transition is still not validated by get_port(). | ||
567 | * It is OK, because this socket enters to hash table only | ||
568 | * after validation is complete. | ||
569 | */ | ||
570 | sk->sk_state = TCP_LISTEN; | ||
571 | if (!sk->sk_prot->get_port(sk, inet->num)) { | ||
572 | inet->sport = htons(inet->num); | ||
573 | |||
574 | sk_dst_reset(sk); | ||
575 | sk->sk_prot->hash(sk); | ||
576 | |||
577 | return 0; | ||
578 | } | ||
579 | |||
580 | sk->sk_state = TCP_CLOSE; | ||
581 | __reqsk_queue_destroy(&icsk->icsk_accept_queue); | ||
582 | return -EADDRINUSE; | ||
583 | } | ||
584 | |||
585 | EXPORT_SYMBOL_GPL(inet_csk_listen_start); | ||
586 | |||
587 | /* | ||
588 | * This routine closes sockets which have been at least partially | ||
589 | * opened, but not yet accepted. | ||
590 | */ | ||
591 | void inet_csk_listen_stop(struct sock *sk) | ||
592 | { | ||
593 | struct inet_connection_sock *icsk = inet_csk(sk); | ||
594 | struct request_sock *acc_req; | ||
595 | struct request_sock *req; | ||
596 | |||
597 | inet_csk_delete_keepalive_timer(sk); | ||
598 | |||
599 | /* make all the listen_opt local to us */ | ||
600 | acc_req = reqsk_queue_yank_acceptq(&icsk->icsk_accept_queue); | ||
601 | |||
602 | /* Following specs, it would be better either to send FIN | ||
603 | * (and enter FIN-WAIT-1, it is normal close) | ||
604 | * or to send active reset (abort). | ||
605 | * Certainly, it is pretty dangerous while synflood, but it is | ||
606 | * bad justification for our negligence 8) | ||
607 | * To be honest, we are not able to make either | ||
608 | * of the variants now. --ANK | ||
609 | */ | ||
610 | reqsk_queue_destroy(&icsk->icsk_accept_queue); | ||
611 | |||
612 | while ((req = acc_req) != NULL) { | ||
613 | struct sock *child = req->sk; | ||
614 | |||
615 | acc_req = req->dl_next; | ||
616 | |||
617 | local_bh_disable(); | ||
618 | bh_lock_sock(child); | ||
619 | BUG_TRAP(!sock_owned_by_user(child)); | ||
620 | sock_hold(child); | ||
621 | |||
622 | sk->sk_prot->disconnect(child, O_NONBLOCK); | ||
623 | |||
624 | sock_orphan(child); | ||
625 | |||
626 | atomic_inc(sk->sk_prot->orphan_count); | ||
627 | |||
628 | inet_csk_destroy_sock(child); | ||
629 | |||
630 | bh_unlock_sock(child); | ||
631 | local_bh_enable(); | ||
632 | sock_put(child); | ||
633 | |||
634 | sk_acceptq_removed(sk); | ||
635 | __reqsk_free(req); | ||
636 | } | ||
637 | BUG_TRAP(!sk->sk_ack_backlog); | ||
638 | } | ||
639 | |||
640 | EXPORT_SYMBOL_GPL(inet_csk_listen_stop); | ||