aboutsummaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorFlorian Westphal <fw@strlen.de>2015-07-21 10:33:50 -0400
committerDavid S. Miller <davem@davemloft.net>2015-07-22 01:22:56 -0400
commit0470eb99b4721586ccac954faac3fa4472da0845 (patch)
tree1b0c260ac166160fddccb4e23ad3fd218b11e9d1 /net
parent7c8cbacab1d5768f070fde86d4d552c2e1984431 (diff)
netlink: don't hold mutex in rcu callback when releasing mmapd ring
Kirill A. Shutemov says: This simple test-case trigers few locking asserts in kernel: int main(int argc, char **argv) { unsigned int block_size = 16 * 4096; struct nl_mmap_req req = { .nm_block_size = block_size, .nm_block_nr = 64, .nm_frame_size = 16384, .nm_frame_nr = 64 * block_size / 16384, }; unsigned int ring_size; int fd; fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); if (setsockopt(fd, SOL_NETLINK, NETLINK_RX_RING, &req, sizeof(req)) < 0) exit(1); if (setsockopt(fd, SOL_NETLINK, NETLINK_TX_RING, &req, sizeof(req)) < 0) exit(1); ring_size = req.nm_block_nr * req.nm_block_size; mmap(NULL, 2 * ring_size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); return 0; } +++ exited with 0 +++ BUG: sleeping function called from invalid context at /home/kas/git/public/linux-mm/kernel/locking/mutex.c:616 in_atomic(): 1, irqs_disabled(): 0, pid: 1, name: init 3 locks held by init/1: #0: (reboot_mutex){+.+...}, at: [<ffffffff81080959>] SyS_reboot+0xa9/0x220 #1: ((reboot_notifier_list).rwsem){.+.+..}, at: [<ffffffff8107f379>] __blocking_notifier_call_chain+0x39/0x70 #2: (rcu_callback){......}, at: [<ffffffff810d32e0>] rcu_do_batch.isra.49+0x160/0x10c0 Preemption disabled at:[<ffffffff8145365f>] __delay+0xf/0x20 CPU: 1 PID: 1 Comm: init Not tainted 4.1.0-00009-gbddf4c4818e0 #253 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS Debian-1.8.2-1 04/01/2014 ffff88017b3d8000 ffff88027bc03c38 ffffffff81929ceb 0000000000000102 0000000000000000 ffff88027bc03c68 ffffffff81085a9d 0000000000000002 ffffffff81ca2a20 0000000000000268 0000000000000000 ffff88027bc03c98 Call Trace: <IRQ> [<ffffffff81929ceb>] dump_stack+0x4f/0x7b [<ffffffff81085a9d>] ___might_sleep+0x16d/0x270 [<ffffffff81085bed>] __might_sleep+0x4d/0x90 [<ffffffff8192e96f>] mutex_lock_nested+0x2f/0x430 [<ffffffff81932fed>] ? _raw_spin_unlock_irqrestore+0x5d/0x80 [<ffffffff81464143>] ? __this_cpu_preempt_check+0x13/0x20 [<ffffffff8182fc3d>] netlink_set_ring+0x1ed/0x350 [<ffffffff8182e000>] ? netlink_undo_bind+0x70/0x70 [<ffffffff8182fe20>] netlink_sock_destruct+0x80/0x150 [<ffffffff817e484d>] __sk_free+0x1d/0x160 [<ffffffff817e49a9>] sk_free+0x19/0x20 [..] Cong Wang says: We can't hold mutex lock in a rcu callback, [..] Thomas Graf says: The socket should be dead at this point. It might be simpler to add a netlink_release_ring() function which doesn't require locking at all. Reported-by: "Kirill A. Shutemov" <kirill@shutemov.name> Diagnosed-by: Cong Wang <cwang@twopensource.com> Suggested-by: Thomas Graf <tgraf@suug.ch> Signed-off-by: Florian Westphal <fw@strlen.de> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/netlink/af_netlink.c79
1 files changed, 47 insertions, 32 deletions
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 9a0ae7172f92..d8e2e3918ce2 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -357,25 +357,52 @@ err1:
357 return NULL; 357 return NULL;
358} 358}
359 359
360
361static void
362__netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, bool tx_ring, void **pg_vec,
363 unsigned int order)
364{
365 struct netlink_sock *nlk = nlk_sk(sk);
366 struct sk_buff_head *queue;
367 struct netlink_ring *ring;
368
369 queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
370 ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring;
371
372 spin_lock_bh(&queue->lock);
373
374 ring->frame_max = req->nm_frame_nr - 1;
375 ring->head = 0;
376 ring->frame_size = req->nm_frame_size;
377 ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE;
378
379 swap(ring->pg_vec_len, req->nm_block_nr);
380 swap(ring->pg_vec_order, order);
381 swap(ring->pg_vec, pg_vec);
382
383 __skb_queue_purge(queue);
384 spin_unlock_bh(&queue->lock);
385
386 WARN_ON(atomic_read(&nlk->mapped));
387
388 if (pg_vec)
389 free_pg_vec(pg_vec, order, req->nm_block_nr);
390}
391
360static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, 392static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req,
361 bool closing, bool tx_ring) 393 bool tx_ring)
362{ 394{
363 struct netlink_sock *nlk = nlk_sk(sk); 395 struct netlink_sock *nlk = nlk_sk(sk);
364 struct netlink_ring *ring; 396 struct netlink_ring *ring;
365 struct sk_buff_head *queue;
366 void **pg_vec = NULL; 397 void **pg_vec = NULL;
367 unsigned int order = 0; 398 unsigned int order = 0;
368 int err;
369 399
370 ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring; 400 ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring;
371 queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
372 401
373 if (!closing) { 402 if (atomic_read(&nlk->mapped))
374 if (atomic_read(&nlk->mapped)) 403 return -EBUSY;
375 return -EBUSY; 404 if (atomic_read(&ring->pending))
376 if (atomic_read(&ring->pending)) 405 return -EBUSY;
377 return -EBUSY;
378 }
379 406
380 if (req->nm_block_nr) { 407 if (req->nm_block_nr) {
381 if (ring->pg_vec != NULL) 408 if (ring->pg_vec != NULL)
@@ -407,31 +434,19 @@ static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req,
407 return -EINVAL; 434 return -EINVAL;
408 } 435 }
409 436
410 err = -EBUSY;
411 mutex_lock(&nlk->pg_vec_lock); 437 mutex_lock(&nlk->pg_vec_lock);
412 if (closing || atomic_read(&nlk->mapped) == 0) { 438 if (atomic_read(&nlk->mapped) == 0) {
413 err = 0; 439 __netlink_set_ring(sk, req, tx_ring, pg_vec, order);
414 spin_lock_bh(&queue->lock); 440 mutex_unlock(&nlk->pg_vec_lock);
415 441 return 0;
416 ring->frame_max = req->nm_frame_nr - 1;
417 ring->head = 0;
418 ring->frame_size = req->nm_frame_size;
419 ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE;
420
421 swap(ring->pg_vec_len, req->nm_block_nr);
422 swap(ring->pg_vec_order, order);
423 swap(ring->pg_vec, pg_vec);
424
425 __skb_queue_purge(queue);
426 spin_unlock_bh(&queue->lock);
427
428 WARN_ON(atomic_read(&nlk->mapped));
429 } 442 }
443
430 mutex_unlock(&nlk->pg_vec_lock); 444 mutex_unlock(&nlk->pg_vec_lock);
431 445
432 if (pg_vec) 446 if (pg_vec)
433 free_pg_vec(pg_vec, order, req->nm_block_nr); 447 free_pg_vec(pg_vec, order, req->nm_block_nr);
434 return err; 448
449 return -EBUSY;
435} 450}
436 451
437static void netlink_mm_open(struct vm_area_struct *vma) 452static void netlink_mm_open(struct vm_area_struct *vma)
@@ -900,10 +915,10 @@ static void netlink_sock_destruct(struct sock *sk)
900 915
901 memset(&req, 0, sizeof(req)); 916 memset(&req, 0, sizeof(req));
902 if (nlk->rx_ring.pg_vec) 917 if (nlk->rx_ring.pg_vec)
903 netlink_set_ring(sk, &req, true, false); 918 __netlink_set_ring(sk, &req, false, NULL, 0);
904 memset(&req, 0, sizeof(req)); 919 memset(&req, 0, sizeof(req));
905 if (nlk->tx_ring.pg_vec) 920 if (nlk->tx_ring.pg_vec)
906 netlink_set_ring(sk, &req, true, true); 921 __netlink_set_ring(sk, &req, true, NULL, 0);
907 } 922 }
908#endif /* CONFIG_NETLINK_MMAP */ 923#endif /* CONFIG_NETLINK_MMAP */
909 924
@@ -2223,7 +2238,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
2223 return -EINVAL; 2238 return -EINVAL;
2224 if (copy_from_user(&req, optval, sizeof(req))) 2239 if (copy_from_user(&req, optval, sizeof(req)))
2225 return -EFAULT; 2240 return -EFAULT;
2226 err = netlink_set_ring(sk, &req, false, 2241 err = netlink_set_ring(sk, &req,
2227 optname == NETLINK_TX_RING); 2242 optname == NETLINK_TX_RING);
2228 break; 2243 break;
2229 } 2244 }