diff options
author | Karl Heiss <kheiss@gmail.com> | 2015-09-24 12:15:07 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2015-09-29 00:03:40 -0400 |
commit | 635682a14427d241bab7bbdeebb48a7d7b91638e (patch) | |
tree | 43ead3fc2c0b7395b6cb4ac5bf0aa7b92a8eea85 /net | |
parent | f05940e61845951517eda02a28ccc091888aaab9 (diff) |
sctp: Prevent soft lockup when sctp_accept() is called during a timeout event
A case can occur when sctp_accept() is called by the user during
a heartbeat timeout event after the 4-way handshake. Since
sctp_assoc_migrate() changes both assoc->base.sk and assoc->ep, the
bh_sock_lock in sctp_generate_heartbeat_event() will be taken with
the listening socket but released with the new association socket.
The result is a deadlock on any future attempts to take the listening
socket lock.
Note that this race can occur with other SCTP timeouts that take
the bh_lock_sock() in the event sctp_accept() is called.
BUG: soft lockup - CPU#9 stuck for 67s! [swapper:0]
...
RIP: 0010:[<ffffffff8152d48e>] [<ffffffff8152d48e>] _spin_lock+0x1e/0x30
RSP: 0018:ffff880028323b20 EFLAGS: 00000206
RAX: 0000000000000002 RBX: ffff880028323b20 RCX: 0000000000000000
RDX: 0000000000000000 RSI: ffff880028323be0 RDI: ffff8804632c4b48
RBP: ffffffff8100bb93 R08: 0000000000000000 R09: 0000000000000000
R10: ffff880610662280 R11: 0000000000000100 R12: ffff880028323aa0
R13: ffff8804383c3880 R14: ffff880028323a90 R15: ffffffff81534225
FS: 0000000000000000(0000) GS:ffff880028320000(0000) knlGS:0000000000000000
CS: 0010 DS: 0018 ES: 0018 CR0: 000000008005003b
CR2: 00000000006df528 CR3: 0000000001a85000 CR4: 00000000000006e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400
Process swapper (pid: 0, threadinfo ffff880616b70000, task ffff880616b6cab0)
Stack:
ffff880028323c40 ffffffffa01c2582 ffff880614cfb020 0000000000000000
<d> 0100000000000000 00000014383a6c44 ffff8804383c3880 ffff880614e93c00
<d> ffff880614e93c00 0000000000000000 ffff8804632c4b00 ffff8804383c38b8
Call Trace:
<IRQ>
[<ffffffffa01c2582>] ? sctp_rcv+0x492/0xa10 [sctp]
[<ffffffff8148c559>] ? nf_iterate+0x69/0xb0
[<ffffffff814974a0>] ? ip_local_deliver_finish+0x0/0x2d0
[<ffffffff8148c716>] ? nf_hook_slow+0x76/0x120
[<ffffffff814974a0>] ? ip_local_deliver_finish+0x0/0x2d0
[<ffffffff8149757d>] ? ip_local_deliver_finish+0xdd/0x2d0
[<ffffffff81497808>] ? ip_local_deliver+0x98/0xa0
[<ffffffff81496ccd>] ? ip_rcv_finish+0x12d/0x440
[<ffffffff81497255>] ? ip_rcv+0x275/0x350
[<ffffffff8145cfeb>] ? __netif_receive_skb+0x4ab/0x750
...
With lockdep debugging:
=====================================
[ BUG: bad unlock balance detected! ]
-------------------------------------
CslRx/12087 is trying to release lock (slock-AF_INET) at:
[<ffffffffa01bcae0>] sctp_generate_timeout_event+0x40/0xe0 [sctp]
but there are no more locks to release!
other info that might help us debug this:
2 locks held by CslRx/12087:
#0: (&asoc->timers[i]){+.-...}, at: [<ffffffff8108ce1f>] run_timer_softirq+0x16f/0x3e0
#1: (slock-AF_INET){+.-...}, at: [<ffffffffa01bcac3>] sctp_generate_timeout_event+0x23/0xe0 [sctp]
Ensure the socket taken is also the same one that is released by
saving a copy of the socket before entering the timeout event
critical section.
Signed-off-by: Karl Heiss <kheiss@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r-- | net/sctp/sm_sideeffect.c | 42 |
1 files changed, 23 insertions, 19 deletions
diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index f554b9a96e07..6098d4c42fa9 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c | |||
@@ -244,12 +244,13 @@ void sctp_generate_t3_rtx_event(unsigned long peer) | |||
244 | int error; | 244 | int error; |
245 | struct sctp_transport *transport = (struct sctp_transport *) peer; | 245 | struct sctp_transport *transport = (struct sctp_transport *) peer; |
246 | struct sctp_association *asoc = transport->asoc; | 246 | struct sctp_association *asoc = transport->asoc; |
247 | struct net *net = sock_net(asoc->base.sk); | 247 | struct sock *sk = asoc->base.sk; |
248 | struct net *net = sock_net(sk); | ||
248 | 249 | ||
249 | /* Check whether a task is in the sock. */ | 250 | /* Check whether a task is in the sock. */ |
250 | 251 | ||
251 | bh_lock_sock(asoc->base.sk); | 252 | bh_lock_sock(sk); |
252 | if (sock_owned_by_user(asoc->base.sk)) { | 253 | if (sock_owned_by_user(sk)) { |
253 | pr_debug("%s: sock is busy\n", __func__); | 254 | pr_debug("%s: sock is busy\n", __func__); |
254 | 255 | ||
255 | /* Try again later. */ | 256 | /* Try again later. */ |
@@ -272,10 +273,10 @@ void sctp_generate_t3_rtx_event(unsigned long peer) | |||
272 | transport, GFP_ATOMIC); | 273 | transport, GFP_ATOMIC); |
273 | 274 | ||
274 | if (error) | 275 | if (error) |
275 | asoc->base.sk->sk_err = -error; | 276 | sk->sk_err = -error; |
276 | 277 | ||
277 | out_unlock: | 278 | out_unlock: |
278 | bh_unlock_sock(asoc->base.sk); | 279 | bh_unlock_sock(sk); |
279 | sctp_transport_put(transport); | 280 | sctp_transport_put(transport); |
280 | } | 281 | } |
281 | 282 | ||
@@ -285,11 +286,12 @@ out_unlock: | |||
285 | static void sctp_generate_timeout_event(struct sctp_association *asoc, | 286 | static void sctp_generate_timeout_event(struct sctp_association *asoc, |
286 | sctp_event_timeout_t timeout_type) | 287 | sctp_event_timeout_t timeout_type) |
287 | { | 288 | { |
288 | struct net *net = sock_net(asoc->base.sk); | 289 | struct sock *sk = asoc->base.sk; |
290 | struct net *net = sock_net(sk); | ||
289 | int error = 0; | 291 | int error = 0; |
290 | 292 | ||
291 | bh_lock_sock(asoc->base.sk); | 293 | bh_lock_sock(sk); |
292 | if (sock_owned_by_user(asoc->base.sk)) { | 294 | if (sock_owned_by_user(sk)) { |
293 | pr_debug("%s: sock is busy: timer %d\n", __func__, | 295 | pr_debug("%s: sock is busy: timer %d\n", __func__, |
294 | timeout_type); | 296 | timeout_type); |
295 | 297 | ||
@@ -312,10 +314,10 @@ static void sctp_generate_timeout_event(struct sctp_association *asoc, | |||
312 | (void *)timeout_type, GFP_ATOMIC); | 314 | (void *)timeout_type, GFP_ATOMIC); |
313 | 315 | ||
314 | if (error) | 316 | if (error) |
315 | asoc->base.sk->sk_err = -error; | 317 | sk->sk_err = -error; |
316 | 318 | ||
317 | out_unlock: | 319 | out_unlock: |
318 | bh_unlock_sock(asoc->base.sk); | 320 | bh_unlock_sock(sk); |
319 | sctp_association_put(asoc); | 321 | sctp_association_put(asoc); |
320 | } | 322 | } |
321 | 323 | ||
@@ -365,10 +367,11 @@ void sctp_generate_heartbeat_event(unsigned long data) | |||
365 | int error = 0; | 367 | int error = 0; |
366 | struct sctp_transport *transport = (struct sctp_transport *) data; | 368 | struct sctp_transport *transport = (struct sctp_transport *) data; |
367 | struct sctp_association *asoc = transport->asoc; | 369 | struct sctp_association *asoc = transport->asoc; |
368 | struct net *net = sock_net(asoc->base.sk); | 370 | struct sock *sk = asoc->base.sk; |
371 | struct net *net = sock_net(sk); | ||
369 | 372 | ||
370 | bh_lock_sock(asoc->base.sk); | 373 | bh_lock_sock(sk); |
371 | if (sock_owned_by_user(asoc->base.sk)) { | 374 | if (sock_owned_by_user(sk)) { |
372 | pr_debug("%s: sock is busy\n", __func__); | 375 | pr_debug("%s: sock is busy\n", __func__); |
373 | 376 | ||
374 | /* Try again later. */ | 377 | /* Try again later. */ |
@@ -389,10 +392,10 @@ void sctp_generate_heartbeat_event(unsigned long data) | |||
389 | transport, GFP_ATOMIC); | 392 | transport, GFP_ATOMIC); |
390 | 393 | ||
391 | if (error) | 394 | if (error) |
392 | asoc->base.sk->sk_err = -error; | 395 | sk->sk_err = -error; |
393 | 396 | ||
394 | out_unlock: | 397 | out_unlock: |
395 | bh_unlock_sock(asoc->base.sk); | 398 | bh_unlock_sock(sk); |
396 | sctp_transport_put(transport); | 399 | sctp_transport_put(transport); |
397 | } | 400 | } |
398 | 401 | ||
@@ -403,10 +406,11 @@ void sctp_generate_proto_unreach_event(unsigned long data) | |||
403 | { | 406 | { |
404 | struct sctp_transport *transport = (struct sctp_transport *) data; | 407 | struct sctp_transport *transport = (struct sctp_transport *) data; |
405 | struct sctp_association *asoc = transport->asoc; | 408 | struct sctp_association *asoc = transport->asoc; |
406 | struct net *net = sock_net(asoc->base.sk); | 409 | struct sock *sk = asoc->base.sk; |
410 | struct net *net = sock_net(sk); | ||
407 | 411 | ||
408 | bh_lock_sock(asoc->base.sk); | 412 | bh_lock_sock(sk); |
409 | if (sock_owned_by_user(asoc->base.sk)) { | 413 | if (sock_owned_by_user(sk)) { |
410 | pr_debug("%s: sock is busy\n", __func__); | 414 | pr_debug("%s: sock is busy\n", __func__); |
411 | 415 | ||
412 | /* Try again later. */ | 416 | /* Try again later. */ |
@@ -427,7 +431,7 @@ void sctp_generate_proto_unreach_event(unsigned long data) | |||
427 | asoc->state, asoc->ep, asoc, transport, GFP_ATOMIC); | 431 | asoc->state, asoc->ep, asoc, transport, GFP_ATOMIC); |
428 | 432 | ||
429 | out_unlock: | 433 | out_unlock: |
430 | bh_unlock_sock(asoc->base.sk); | 434 | bh_unlock_sock(sk); |
431 | sctp_association_put(asoc); | 435 | sctp_association_put(asoc); |
432 | } | 436 | } |
433 | 437 | ||