aboutsummaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp_minisocks.c
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2008-06-12 19:31:35 -0400
committerDavid S. Miller <davem@davemloft.net>2008-06-12 19:34:35 -0400
commitec0a196626bd12e0ba108d7daa6d95a4fb25c2c5 (patch)
tree68d9c2923765e12853368e8edb27b241142e0c48 /net/ipv4/tcp_minisocks.c
parentf23d60de719e639690b2dc5c2d0e4243ff614b7a (diff)
tcp: Revert 'process defer accept as established' changes.
This reverts two changesets, ec3c0982a2dd1e671bad8e9d26c28dcba0039d87 ("[TCP]: TCP_DEFER_ACCEPT updates - process as established") and the follow-on bug fix 9ae27e0adbf471c7a6b80102e38e1d5a346b3b38 ("tcp: Fix slab corruption with ipv6 and tcp6fuzz"). This change causes several problems, first reported by Ingo Molnar as a distcc-over-loopback regression where connections were getting stuck. Ilpo Järvinen first spotted the locking problems. The new function added by this code, tcp_defer_accept_check(), only has the child socket locked, yet it is modifying state of the parent listening socket. Fixing that is non-trivial at best, because we can't simply just grab the parent listening socket lock at this point, because it would create an ABBA deadlock. The normal ordering is parent listening socket --> child socket, but this code path would require the reverse lock ordering. Next is a problem noticed by Vitaliy Gusev, he noted: ---------------------------------------- >--- a/net/ipv4/tcp_timer.c >+++ b/net/ipv4/tcp_timer.c >@@ -481,6 +481,11 @@ static void tcp_keepalive_timer (unsigned long data) > goto death; > } > >+ if (tp->defer_tcp_accept.request && sk->sk_state == TCP_ESTABLISHED) { >+ tcp_send_active_reset(sk, GFP_ATOMIC); >+ goto death; Here socket sk is not attached to listening socket's request queue. tcp_done() will not call inet_csk_destroy_sock() (and tcp_v4_destroy_sock() which should release this sk) as socket is not DEAD. Therefore socket sk will be lost for freeing. ---------------------------------------- Finally, Alexey Kuznetsov argues that there might not even be any real value or advantage to these new semantics even if we fix all of the bugs: ---------------------------------------- Hiding from accept() sockets with only out-of-order data only is the only thing which is impossible with old approach. Is this really so valuable? My opinion: no, this is nothing but a new loophole to consume memory without control. ---------------------------------------- So revert this thing for now. Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4/tcp_minisocks.c')
-rw-r--r--net/ipv4/tcp_minisocks.c32
1 files changed, 12 insertions, 20 deletions
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 019c8c16e5cc..8245247a6ceb 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -571,8 +571,10 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
571 does sequence test, SYN is truncated, and thus we consider 571 does sequence test, SYN is truncated, and thus we consider
572 it a bare ACK. 572 it a bare ACK.
573 573
574 Both ends (listening sockets) accept the new incoming 574 If icsk->icsk_accept_queue.rskq_defer_accept, we silently drop this
575 connection and try to talk to each other. 8-) 575 bare ACK. Otherwise, we create an established connection. Both
576 ends (listening sockets) accept the new incoming connection and try
577 to talk to each other. 8-)
576 578
577 Note: This case is both harmless, and rare. Possibility is about the 579 Note: This case is both harmless, and rare. Possibility is about the
578 same as us discovering intelligent life on another plant tomorrow. 580 same as us discovering intelligent life on another plant tomorrow.
@@ -640,6 +642,13 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
640 if (!(flg & TCP_FLAG_ACK)) 642 if (!(flg & TCP_FLAG_ACK))
641 return NULL; 643 return NULL;
642 644
645 /* If TCP_DEFER_ACCEPT is set, drop bare ACK. */
646 if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept &&
647 TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
648 inet_rsk(req)->acked = 1;
649 return NULL;
650 }
651
643 /* OK, ACK is valid, create big socket and 652 /* OK, ACK is valid, create big socket and
644 * feed this segment to it. It will repeat all 653 * feed this segment to it. It will repeat all
645 * the tests. THIS SEGMENT MUST MOVE SOCKET TO 654 * the tests. THIS SEGMENT MUST MOVE SOCKET TO
@@ -678,24 +687,7 @@ struct sock *tcp_check_req(struct sock *sk,struct sk_buff *skb,
678 inet_csk_reqsk_queue_unlink(sk, req, prev); 687 inet_csk_reqsk_queue_unlink(sk, req, prev);
679 inet_csk_reqsk_queue_removed(sk, req); 688 inet_csk_reqsk_queue_removed(sk, req);
680 689
681 if (inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && 690 inet_csk_reqsk_queue_add(sk, req, child);
682 TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) {
683
684 /* the accept queue handling is done is est recv slow
685 * path so lets make sure to start there
686 */
687 tcp_sk(child)->pred_flags = 0;
688 sock_hold(sk);
689 sock_hold(child);
690 tcp_sk(child)->defer_tcp_accept.listen_sk = sk;
691 tcp_sk(child)->defer_tcp_accept.request = req;
692
693 inet_csk_reset_keepalive_timer(child,
694 inet_csk(sk)->icsk_accept_queue.rskq_defer_accept * HZ);
695 } else {
696 inet_csk_reqsk_queue_add(sk, req, child);
697 }
698
699 return child; 691 return child;
700 692
701 listen_overflow: 693 listen_overflow: