aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHerbert Xu <herbert@gondor.apana.org.au>2006-05-04 02:31:35 -0400
committerDavid S. Miller <davem@davemloft.net>2006-05-04 02:31:35 -0400
commit75c2d9077c63ac21488129cc23561d4f4fd0f5e5 (patch)
treef32af17fdadc749f899f66371981055fa8eab35a
parent82e84249f0ee098e004c8bd6d90a1640bd56cfbb (diff)
[TCP]: Fix sock_orphan dead lock
Calling sock_orphan inside bh_lock_sock in tcp_close can lead to dead locks. For example, the inet_diag code holds sk_callback_lock without disabling BH. If an inbound packet arrives during that admittedly tiny window, it will cause a dead lock on bh_lock_sock. Another possible path would be through sock_wfree if the network device driver frees the tx skb in process context with BH enabled. We can fix this by moving sock_orphan out of bh_lock_sock. The tricky bit is to work out when we need to destroy the socket ourselves and when it has already been destroyed by someone else. By moving sock_orphan before the release_sock we can solve this problem. This is because as long as we own the socket lock its state cannot change. So we simply record the socket state before the release_sock and then check the state again after we regain the socket lock. If the socket state has transitioned to TCP_CLOSE in the time being, we know that the socket has been destroyed. Otherwise the socket is still ours to keep. Note that I've also moved the increment on the orphan count forward. This may look like a problem as we're increasing it even if the socket is just about to be destroyed where it'll be decreased again. However, this simply enlarges a window that already exists. This also changes the orphan count test by one. Considering what the orphan count is meant to do this is no big deal. This problem was discoverd by Ingo Molnar using his lock validator. Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--net/ipv4/tcp.c13
1 files changed, 9 insertions, 4 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 87f68e787d0c..e2b7b8055037 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1468,6 +1468,7 @@ void tcp_close(struct sock *sk, long timeout)
1468{ 1468{
1469 struct sk_buff *skb; 1469 struct sk_buff *skb;
1470 int data_was_unread = 0; 1470 int data_was_unread = 0;
1471 int state;
1471 1472
1472 lock_sock(sk); 1473 lock_sock(sk);
1473 sk->sk_shutdown = SHUTDOWN_MASK; 1474 sk->sk_shutdown = SHUTDOWN_MASK;
@@ -1544,6 +1545,11 @@ void tcp_close(struct sock *sk, long timeout)
1544 sk_stream_wait_close(sk, timeout); 1545 sk_stream_wait_close(sk, timeout);
1545 1546
1546adjudge_to_death: 1547adjudge_to_death:
1548 state = sk->sk_state;
1549 sock_hold(sk);
1550 sock_orphan(sk);
1551 atomic_inc(sk->sk_prot->orphan_count);
1552
1547 /* It is the last release_sock in its life. It will remove backlog. */ 1553 /* It is the last release_sock in its life. It will remove backlog. */
1548 release_sock(sk); 1554 release_sock(sk);
1549 1555
@@ -1555,8 +1561,9 @@ adjudge_to_death:
1555 bh_lock_sock(sk); 1561 bh_lock_sock(sk);
1556 BUG_TRAP(!sock_owned_by_user(sk)); 1562 BUG_TRAP(!sock_owned_by_user(sk));
1557 1563
1558 sock_hold(sk); 1564 /* Have we already been destroyed by a softirq or backlog? */
1559 sock_orphan(sk); 1565 if (state != TCP_CLOSE && sk->sk_state == TCP_CLOSE)
1566 goto out;
1560 1567
1561 /* This is a (useful) BSD violating of the RFC. There is a 1568 /* This is a (useful) BSD violating of the RFC. There is a
1562 * problem with TCP as specified in that the other end could 1569 * problem with TCP as specified in that the other end could
@@ -1584,7 +1591,6 @@ adjudge_to_death:
1584 if (tmo > TCP_TIMEWAIT_LEN) { 1591 if (tmo > TCP_TIMEWAIT_LEN) {
1585 inet_csk_reset_keepalive_timer(sk, tcp_fin_time(sk)); 1592 inet_csk_reset_keepalive_timer(sk, tcp_fin_time(sk));
1586 } else { 1593 } else {
1587 atomic_inc(sk->sk_prot->orphan_count);
1588 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo); 1594 tcp_time_wait(sk, TCP_FIN_WAIT2, tmo);
1589 goto out; 1595 goto out;
1590 } 1596 }
@@ -1603,7 +1609,6 @@ adjudge_to_death:
1603 NET_INC_STATS_BH(LINUX_MIB_TCPABORTONMEMORY); 1609 NET_INC_STATS_BH(LINUX_MIB_TCPABORTONMEMORY);
1604 } 1610 }
1605 } 1611 }
1606 atomic_inc(sk->sk_prot->orphan_count);
1607 1612
1608 if (sk->sk_state == TCP_CLOSE) 1613 if (sk->sk_state == TCP_CLOSE)
1609 inet_csk_destroy_sock(sk); 1614 inet_csk_destroy_sock(sk);