diff options
author | Eric Dumazet <eric.dumazet@gmail.com> | 2014-03-10 12:50:11 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2014-03-11 16:45:59 -0400 |
commit | c3f9b01849ef3bc69024990092b9f42e20df7797 (patch) | |
tree | 444bad5f01a2505544c8d2b78bd01059b8cacdc3 /net | |
parent | c7b76f854e01cfdfdfaeb22b2ba1350be5d09ad7 (diff) |
tcp: tcp_release_cb() should release socket ownership
Lars Persson reported following deadlock :
-000 |M:0x0:0x802B6AF8(asm) <-- arch_spin_lock
-001 |tcp_v4_rcv(skb = 0x8BD527A0) <-- sk = 0x8BE6B2A0
-002 |ip_local_deliver_finish(skb = 0x8BD527A0)
-003 |__netif_receive_skb_core(skb = 0x8BD527A0, ?)
-004 |netif_receive_skb(skb = 0x8BD527A0)
-005 |elk_poll(napi = 0x8C770500, budget = 64)
-006 |net_rx_action(?)
-007 |__do_softirq()
-008 |do_softirq()
-009 |local_bh_enable()
-010 |tcp_rcv_established(sk = 0x8BE6B2A0, skb = 0x87D3A9E0, th = 0x814EBE14, ?)
-011 |tcp_v4_do_rcv(sk = 0x8BE6B2A0, skb = 0x87D3A9E0)
-012 |tcp_delack_timer_handler(sk = 0x8BE6B2A0)
-013 |tcp_release_cb(sk = 0x8BE6B2A0)
-014 |release_sock(sk = 0x8BE6B2A0)
-015 |tcp_sendmsg(?, sk = 0x8BE6B2A0, ?, ?)
-016 |sock_sendmsg(sock = 0x8518C4C0, msg = 0x87D8DAA8, size = 4096)
-017 |kernel_sendmsg(?, ?, ?, ?, size = 4096)
-018 |smb_send_kvec()
-019 |smb_send_rqst(server = 0x87C4D400, rqst = 0x87D8DBA0)
-020 |cifs_call_async()
-021 |cifs_async_writev(wdata = 0x87FD6580)
-022 |cifs_writepages(mapping = 0x852096E4, wbc = 0x87D8DC88)
-023 |__writeback_single_inode(inode = 0x852095D0, wbc = 0x87D8DC88)
-024 |writeback_sb_inodes(sb = 0x87D6D800, wb = 0x87E4A9C0, work = 0x87D8DD88)
-025 |__writeback_inodes_wb(wb = 0x87E4A9C0, work = 0x87D8DD88)
-026 |wb_writeback(wb = 0x87E4A9C0, work = 0x87D8DD88)
-027 |wb_do_writeback(wb = 0x87E4A9C0, force_wait = 0)
-028 |bdi_writeback_workfn(work = 0x87E4A9CC)
-029 |process_one_work(worker = 0x8B045880, work = 0x87E4A9CC)
-030 |worker_thread(__worker = 0x8B045880)
-031 |kthread(_create = 0x87CADD90)
-032 |ret_from_kernel_thread(asm)
Bug occurs because __tcp_checksum_complete_user() enables BH, assuming
it is running from softirq context.
Lars trace involved a NIC without RX checksum support but other points
are problematic as well, like the prequeue stuff.
Problem is triggered by a timer, that found socket being owned by user.
tcp_release_cb() should call tcp_write_timer_handler() or
tcp_delack_timer_handler() in the appropriate context :
BH disabled and socket lock held, but 'owned' field cleared,
as if they were running from timer handlers.
Fixes: 6f458dfb4092 ("tcp: improve latencies of timer triggered events")
Reported-by: Lars Persson <lars.persson@axis.com>
Tested-by: Lars Persson <lars.persson@axis.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r-- | net/core/sock.c | 5 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 11 |
2 files changed, 15 insertions, 1 deletions
diff --git a/net/core/sock.c b/net/core/sock.c index 5b6a9431b017..c0fc6bdad1e3 100644 --- a/net/core/sock.c +++ b/net/core/sock.c | |||
@@ -2357,10 +2357,13 @@ void release_sock(struct sock *sk) | |||
2357 | if (sk->sk_backlog.tail) | 2357 | if (sk->sk_backlog.tail) |
2358 | __release_sock(sk); | 2358 | __release_sock(sk); |
2359 | 2359 | ||
2360 | /* Warning : release_cb() might need to release sk ownership, | ||
2361 | * ie call sock_release_ownership(sk) before us. | ||
2362 | */ | ||
2360 | if (sk->sk_prot->release_cb) | 2363 | if (sk->sk_prot->release_cb) |
2361 | sk->sk_prot->release_cb(sk); | 2364 | sk->sk_prot->release_cb(sk); |
2362 | 2365 | ||
2363 | sk->sk_lock.owned = 0; | 2366 | sock_release_ownership(sk); |
2364 | if (waitqueue_active(&sk->sk_lock.wq)) | 2367 | if (waitqueue_active(&sk->sk_lock.wq)) |
2365 | wake_up(&sk->sk_lock.wq); | 2368 | wake_up(&sk->sk_lock.wq); |
2366 | spin_unlock_bh(&sk->sk_lock.slock); | 2369 | spin_unlock_bh(&sk->sk_lock.slock); |
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index f0eb4e337ec8..17a11e65e57f 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c | |||
@@ -767,6 +767,17 @@ void tcp_release_cb(struct sock *sk) | |||
767 | if (flags & (1UL << TCP_TSQ_DEFERRED)) | 767 | if (flags & (1UL << TCP_TSQ_DEFERRED)) |
768 | tcp_tsq_handler(sk); | 768 | tcp_tsq_handler(sk); |
769 | 769 | ||
770 | /* Here begins the tricky part : | ||
771 | * We are called from release_sock() with : | ||
772 | * 1) BH disabled | ||
773 | * 2) sk_lock.slock spinlock held | ||
774 | * 3) socket owned by us (sk->sk_lock.owned == 1) | ||
775 | * | ||
776 | * But following code is meant to be called from BH handlers, | ||
777 | * so we should keep BH disabled, but early release socket ownership | ||
778 | */ | ||
779 | sock_release_ownership(sk); | ||
780 | |||
770 | if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) { | 781 | if (flags & (1UL << TCP_WRITE_TIMER_DEFERRED)) { |
771 | tcp_write_timer_handler(sk); | 782 | tcp_write_timer_handler(sk); |
772 | __sock_put(sk); | 783 | __sock_put(sk); |