diff options
author | Florian Westphal <fw@strlen.de> | 2015-07-21 10:33:50 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2015-07-22 01:22:56 -0400 |
commit | 0470eb99b4721586ccac954faac3fa4472da0845 (patch) | |
tree | 1b0c260ac166160fddccb4e23ad3fd218b11e9d1 /net | |
parent | 7c8cbacab1d5768f070fde86d4d552c2e1984431 (diff) |
netlink: don't hold mutex in rcu callback when releasing mmapd ring
Kirill A. Shutemov says:
This simple test-case trigers few locking asserts in kernel:
int main(int argc, char **argv)
{
unsigned int block_size = 16 * 4096;
struct nl_mmap_req req = {
.nm_block_size = block_size,
.nm_block_nr = 64,
.nm_frame_size = 16384,
.nm_frame_nr = 64 * block_size / 16384,
};
unsigned int ring_size;
int fd;
fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
if (setsockopt(fd, SOL_NETLINK, NETLINK_RX_RING, &req, sizeof(req)) < 0)
exit(1);
if (setsockopt(fd, SOL_NETLINK, NETLINK_TX_RING, &req, sizeof(req)) < 0)
exit(1);
ring_size = req.nm_block_nr * req.nm_block_size;
mmap(NULL, 2 * ring_size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
return 0;
}
+++ exited with 0 +++
BUG: sleeping function called from invalid context at /home/kas/git/public/linux-mm/kernel/locking/mutex.c:616
in_atomic(): 1, irqs_disabled(): 0, pid: 1, name: init
3 locks held by init/1:
#0: (reboot_mutex){+.+...}, at: [<ffffffff81080959>] SyS_reboot+0xa9/0x220
#1: ((reboot_notifier_list).rwsem){.+.+..}, at: [<ffffffff8107f379>] __blocking_notifier_call_chain+0x39/0x70
#2: (rcu_callback){......}, at: [<ffffffff810d32e0>] rcu_do_batch.isra.49+0x160/0x10c0
Preemption disabled at:[<ffffffff8145365f>] __delay+0xf/0x20
CPU: 1 PID: 1 Comm: init Not tainted 4.1.0-00009-gbddf4c4818e0 #253
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS Debian-1.8.2-1 04/01/2014
ffff88017b3d8000 ffff88027bc03c38 ffffffff81929ceb 0000000000000102
0000000000000000 ffff88027bc03c68 ffffffff81085a9d 0000000000000002
ffffffff81ca2a20 0000000000000268 0000000000000000 ffff88027bc03c98
Call Trace:
<IRQ> [<ffffffff81929ceb>] dump_stack+0x4f/0x7b
[<ffffffff81085a9d>] ___might_sleep+0x16d/0x270
[<ffffffff81085bed>] __might_sleep+0x4d/0x90
[<ffffffff8192e96f>] mutex_lock_nested+0x2f/0x430
[<ffffffff81932fed>] ? _raw_spin_unlock_irqrestore+0x5d/0x80
[<ffffffff81464143>] ? __this_cpu_preempt_check+0x13/0x20
[<ffffffff8182fc3d>] netlink_set_ring+0x1ed/0x350
[<ffffffff8182e000>] ? netlink_undo_bind+0x70/0x70
[<ffffffff8182fe20>] netlink_sock_destruct+0x80/0x150
[<ffffffff817e484d>] __sk_free+0x1d/0x160
[<ffffffff817e49a9>] sk_free+0x19/0x20
[..]
Cong Wang says:
We can't hold mutex lock in a rcu callback, [..]
Thomas Graf says:
The socket should be dead at this point. It might be simpler to
add a netlink_release_ring() function which doesn't require
locking at all.
Reported-by: "Kirill A. Shutemov" <kirill@shutemov.name>
Diagnosed-by: Cong Wang <cwang@twopensource.com>
Suggested-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r-- | net/netlink/af_netlink.c | 79 |
1 files changed, 47 insertions, 32 deletions
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 9a0ae7172f92..d8e2e3918ce2 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c | |||
@@ -357,25 +357,52 @@ err1: | |||
357 | return NULL; | 357 | return NULL; |
358 | } | 358 | } |
359 | 359 | ||
360 | |||
361 | static void | ||
362 | __netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, bool tx_ring, void **pg_vec, | ||
363 | unsigned int order) | ||
364 | { | ||
365 | struct netlink_sock *nlk = nlk_sk(sk); | ||
366 | struct sk_buff_head *queue; | ||
367 | struct netlink_ring *ring; | ||
368 | |||
369 | queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue; | ||
370 | ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring; | ||
371 | |||
372 | spin_lock_bh(&queue->lock); | ||
373 | |||
374 | ring->frame_max = req->nm_frame_nr - 1; | ||
375 | ring->head = 0; | ||
376 | ring->frame_size = req->nm_frame_size; | ||
377 | ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE; | ||
378 | |||
379 | swap(ring->pg_vec_len, req->nm_block_nr); | ||
380 | swap(ring->pg_vec_order, order); | ||
381 | swap(ring->pg_vec, pg_vec); | ||
382 | |||
383 | __skb_queue_purge(queue); | ||
384 | spin_unlock_bh(&queue->lock); | ||
385 | |||
386 | WARN_ON(atomic_read(&nlk->mapped)); | ||
387 | |||
388 | if (pg_vec) | ||
389 | free_pg_vec(pg_vec, order, req->nm_block_nr); | ||
390 | } | ||
391 | |||
360 | static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, | 392 | static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, |
361 | bool closing, bool tx_ring) | 393 | bool tx_ring) |
362 | { | 394 | { |
363 | struct netlink_sock *nlk = nlk_sk(sk); | 395 | struct netlink_sock *nlk = nlk_sk(sk); |
364 | struct netlink_ring *ring; | 396 | struct netlink_ring *ring; |
365 | struct sk_buff_head *queue; | ||
366 | void **pg_vec = NULL; | 397 | void **pg_vec = NULL; |
367 | unsigned int order = 0; | 398 | unsigned int order = 0; |
368 | int err; | ||
369 | 399 | ||
370 | ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring; | 400 | ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring; |
371 | queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue; | ||
372 | 401 | ||
373 | if (!closing) { | 402 | if (atomic_read(&nlk->mapped)) |
374 | if (atomic_read(&nlk->mapped)) | 403 | return -EBUSY; |
375 | return -EBUSY; | 404 | if (atomic_read(&ring->pending)) |
376 | if (atomic_read(&ring->pending)) | 405 | return -EBUSY; |
377 | return -EBUSY; | ||
378 | } | ||
379 | 406 | ||
380 | if (req->nm_block_nr) { | 407 | if (req->nm_block_nr) { |
381 | if (ring->pg_vec != NULL) | 408 | if (ring->pg_vec != NULL) |
@@ -407,31 +434,19 @@ static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, | |||
407 | return -EINVAL; | 434 | return -EINVAL; |
408 | } | 435 | } |
409 | 436 | ||
410 | err = -EBUSY; | ||
411 | mutex_lock(&nlk->pg_vec_lock); | 437 | mutex_lock(&nlk->pg_vec_lock); |
412 | if (closing || atomic_read(&nlk->mapped) == 0) { | 438 | if (atomic_read(&nlk->mapped) == 0) { |
413 | err = 0; | 439 | __netlink_set_ring(sk, req, tx_ring, pg_vec, order); |
414 | spin_lock_bh(&queue->lock); | 440 | mutex_unlock(&nlk->pg_vec_lock); |
415 | 441 | return 0; | |
416 | ring->frame_max = req->nm_frame_nr - 1; | ||
417 | ring->head = 0; | ||
418 | ring->frame_size = req->nm_frame_size; | ||
419 | ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE; | ||
420 | |||
421 | swap(ring->pg_vec_len, req->nm_block_nr); | ||
422 | swap(ring->pg_vec_order, order); | ||
423 | swap(ring->pg_vec, pg_vec); | ||
424 | |||
425 | __skb_queue_purge(queue); | ||
426 | spin_unlock_bh(&queue->lock); | ||
427 | |||
428 | WARN_ON(atomic_read(&nlk->mapped)); | ||
429 | } | 442 | } |
443 | |||
430 | mutex_unlock(&nlk->pg_vec_lock); | 444 | mutex_unlock(&nlk->pg_vec_lock); |
431 | 445 | ||
432 | if (pg_vec) | 446 | if (pg_vec) |
433 | free_pg_vec(pg_vec, order, req->nm_block_nr); | 447 | free_pg_vec(pg_vec, order, req->nm_block_nr); |
434 | return err; | 448 | |
449 | return -EBUSY; | ||
435 | } | 450 | } |
436 | 451 | ||
437 | static void netlink_mm_open(struct vm_area_struct *vma) | 452 | static void netlink_mm_open(struct vm_area_struct *vma) |
@@ -900,10 +915,10 @@ static void netlink_sock_destruct(struct sock *sk) | |||
900 | 915 | ||
901 | memset(&req, 0, sizeof(req)); | 916 | memset(&req, 0, sizeof(req)); |
902 | if (nlk->rx_ring.pg_vec) | 917 | if (nlk->rx_ring.pg_vec) |
903 | netlink_set_ring(sk, &req, true, false); | 918 | __netlink_set_ring(sk, &req, false, NULL, 0); |
904 | memset(&req, 0, sizeof(req)); | 919 | memset(&req, 0, sizeof(req)); |
905 | if (nlk->tx_ring.pg_vec) | 920 | if (nlk->tx_ring.pg_vec) |
906 | netlink_set_ring(sk, &req, true, true); | 921 | __netlink_set_ring(sk, &req, true, NULL, 0); |
907 | } | 922 | } |
908 | #endif /* CONFIG_NETLINK_MMAP */ | 923 | #endif /* CONFIG_NETLINK_MMAP */ |
909 | 924 | ||
@@ -2223,7 +2238,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname, | |||
2223 | return -EINVAL; | 2238 | return -EINVAL; |
2224 | if (copy_from_user(&req, optval, sizeof(req))) | 2239 | if (copy_from_user(&req, optval, sizeof(req))) |
2225 | return -EFAULT; | 2240 | return -EFAULT; |
2226 | err = netlink_set_ring(sk, &req, false, | 2241 | err = netlink_set_ring(sk, &req, |
2227 | optname == NETLINK_TX_RING); | 2242 | optname == NETLINK_TX_RING); |
2228 | break; | 2243 | break; |
2229 | } | 2244 | } |