diff options
author | Herton R. Krzesinski <herton@redhat.com> | 2014-10-01 17:49:54 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2014-10-03 15:52:00 -0400 |
commit | 593cbb3ec6a3f2424966832727f394b1696d0d72 (patch) | |
tree | 5ce5ea7eed46aa59ff245a02ababb35a6b2b1d44 /net | |
parent | eb74cc97b830c1e438dc1d6b049f17bdb2b9aae5 (diff) |
net/rds: fix possible double free on sock tear down
I got a report of a double free happening at RDS slab cache. One
suspicion was that may be somewhere we were doing a sock_hold/sock_put
on an already freed sock. Thus after providing a kernel with the
following change:
static inline void sock_hold(struct sock *sk)
{
- atomic_inc(&sk->sk_refcnt);
+ if (!atomic_inc_not_zero(&sk->sk_refcnt))
+ WARN(1, "Trying to hold sock already gone: %p (family: %hd)\n",
+ sk, sk->sk_family);
}
The warning successfuly triggered:
Trying to hold sock already gone: ffff81f6dda61280 (family: 21)
WARNING: at include/net/sock.h:350 sock_hold()
Call Trace:
<IRQ> [<ffffffff8adac135>] :rds:rds_send_remove_from_sock+0xf0/0x21b
[<ffffffff8adad35c>] :rds:rds_send_drop_acked+0xbf/0xcf
[<ffffffff8addf546>] :rds_rdma:rds_ib_recv_tasklet_fn+0x256/0x2dc
[<ffffffff8009899a>] tasklet_action+0x8f/0x12b
[<ffffffff800125a2>] __do_softirq+0x89/0x133
[<ffffffff8005f30c>] call_softirq+0x1c/0x28
[<ffffffff8006e644>] do_softirq+0x2c/0x7d
[<ffffffff8006e4d4>] do_IRQ+0xee/0xf7
[<ffffffff8005e625>] ret_from_intr+0x0/0xa
<EOI>
Looking at the call chain above, the only way I think this would be
possible is if somewhere we already released the same socket->sock which
is assigned to the rds_message at rds_send_remove_from_sock. Which seems
only possible to happen after the tear down done on rds_release.
rds_release properly calls rds_send_drop_to to drop the socket from any
rds_message, and some proper synchronization is in place to avoid race
with rds_send_drop_acked/rds_send_remove_from_sock. However, I still see
a very narrow window where it may be possible we touch a sock already
released: when rds_release races with rds_send_drop_acked, we check
RDS_MSG_ON_CONN to avoid cleanup on the same rds_message, but in this
specific case we don't clear rm->m_rs. In this case, it seems we could
then go on at rds_send_drop_to and after it returns, the sock is freed
by last sock_put on rds_release, with concurrently we being at
rds_send_remove_from_sock; then at some point in the loop at
rds_send_remove_from_sock we process an rds_message which didn't have
rm->m_rs unset for a freed sock, and a possible sock_hold on an sock
already gone at rds_release happens.
This hopefully address the described condition above and avoids a double
free on "second last" sock_put. In addition, I removed the comment about
socket destruction on top of rds_send_drop_acked: we call rds_send_drop_to
in rds_release and we should have things properly serialized there, thus
I can't see the comment being accurate there.
Signed-off-by: Herton R. Krzesinski <herton@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r-- | net/rds/send.c | 11 |
1 files changed, 7 insertions, 4 deletions
diff --git a/net/rds/send.c b/net/rds/send.c index 23718160d71e..0a64541020b0 100644 --- a/net/rds/send.c +++ b/net/rds/send.c | |||
@@ -593,8 +593,11 @@ static void rds_send_remove_from_sock(struct list_head *messages, int status) | |||
593 | sock_put(rds_rs_to_sk(rs)); | 593 | sock_put(rds_rs_to_sk(rs)); |
594 | } | 594 | } |
595 | rs = rm->m_rs; | 595 | rs = rm->m_rs; |
596 | sock_hold(rds_rs_to_sk(rs)); | 596 | if (rs) |
597 | sock_hold(rds_rs_to_sk(rs)); | ||
597 | } | 598 | } |
599 | if (!rs) | ||
600 | goto unlock_and_drop; | ||
598 | spin_lock(&rs->rs_lock); | 601 | spin_lock(&rs->rs_lock); |
599 | 602 | ||
600 | if (test_and_clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags)) { | 603 | if (test_and_clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags)) { |
@@ -638,9 +641,6 @@ unlock_and_drop: | |||
638 | * queue. This means that in the TCP case, the message may not have been | 641 | * queue. This means that in the TCP case, the message may not have been |
639 | * assigned the m_ack_seq yet - but that's fine as long as tcp_is_acked | 642 | * assigned the m_ack_seq yet - but that's fine as long as tcp_is_acked |
640 | * checks the RDS_MSG_HAS_ACK_SEQ bit. | 643 | * checks the RDS_MSG_HAS_ACK_SEQ bit. |
641 | * | ||
642 | * XXX It's not clear to me how this is safely serialized with socket | ||
643 | * destruction. Maybe it should bail if it sees SOCK_DEAD. | ||
644 | */ | 644 | */ |
645 | void rds_send_drop_acked(struct rds_connection *conn, u64 ack, | 645 | void rds_send_drop_acked(struct rds_connection *conn, u64 ack, |
646 | is_acked_func is_acked) | 646 | is_acked_func is_acked) |
@@ -711,6 +711,9 @@ void rds_send_drop_to(struct rds_sock *rs, struct sockaddr_in *dest) | |||
711 | */ | 711 | */ |
712 | if (!test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags)) { | 712 | if (!test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags)) { |
713 | spin_unlock_irqrestore(&conn->c_lock, flags); | 713 | spin_unlock_irqrestore(&conn->c_lock, flags); |
714 | spin_lock_irqsave(&rm->m_rs_lock, flags); | ||
715 | rm->m_rs = NULL; | ||
716 | spin_unlock_irqrestore(&rm->m_rs_lock, flags); | ||
714 | continue; | 717 | continue; |
715 | } | 718 | } |
716 | list_del_init(&rm->m_conn_item); | 719 | list_del_init(&rm->m_conn_item); |