aboutsummaryrefslogtreecommitdiffstats
path: root/net/netlink
diff options
context:
space:
mode:
authorPatrick McHardy <kaber@trash.net>2013-04-17 02:47:04 -0400
committerDavid S. Miller <davem@davemloft.net>2013-04-19 14:57:58 -0400
commitf9c2288837ba072b21dba955f04a4c97eaa77b1e (patch)
treec02c28e33176fc82b8fd560b3a850351b28617e2 /net/netlink
parent5fd96123ee19b96be7d7b57fd42227e1a146ef05 (diff)
netlink: implement memory mapped recvmsg()
Add support for mmap'ed recvmsg(). To allow the kernel to construct messages into the mapped area, a dataless skb is allocated and the data pointer is set to point into the ring frame. This means frames will be delivered to userspace in order of allocation instead of order of transmission. This usually doesn't matter since the order is either not determinable by userspace or message creation/transmission is serialized. The only case where this can have a visible difference is nfnetlink_queue. Userspace can't assume mmap'ed messages have ordered IDs anymore and needs to check this if using batched verdicts. For non-mapped sockets, nothing changes. Signed-off-by: Patrick McHardy <kaber@trash.net> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/netlink')
-rw-r--r--net/netlink/af_netlink.c144
1 files changed, 141 insertions, 3 deletions
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 90504a0e42ab..d120b5d4d86a 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -116,6 +116,11 @@ static bool netlink_skb_is_mmaped(const struct sk_buff *skb)
116 return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED; 116 return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED;
117} 117}
118 118
119static bool netlink_rx_is_mmaped(struct sock *sk)
120{
121 return nlk_sk(sk)->rx_ring.pg_vec != NULL;
122}
123
119static bool netlink_tx_is_mmaped(struct sock *sk) 124static bool netlink_tx_is_mmaped(struct sock *sk)
120{ 125{
121 return nlk_sk(sk)->tx_ring.pg_vec != NULL; 126 return nlk_sk(sk)->tx_ring.pg_vec != NULL;
@@ -589,8 +594,54 @@ out:
589 mutex_unlock(&nlk->pg_vec_lock); 594 mutex_unlock(&nlk->pg_vec_lock);
590 return err; 595 return err;
591} 596}
597
598static void netlink_queue_mmaped_skb(struct sock *sk, struct sk_buff *skb)
599{
600 struct nl_mmap_hdr *hdr;
601
602 hdr = netlink_mmap_hdr(skb);
603 hdr->nm_len = skb->len;
604 hdr->nm_group = NETLINK_CB(skb).dst_group;
605 hdr->nm_pid = NETLINK_CB(skb).creds.pid;
606 hdr->nm_uid = NETLINK_CB(skb).creds.uid;
607 hdr->nm_gid = NETLINK_CB(skb).creds.gid;
608 netlink_frame_flush_dcache(hdr);
609 netlink_set_status(hdr, NL_MMAP_STATUS_VALID);
610
611 NETLINK_CB(skb).flags |= NETLINK_SKB_DELIVERED;
612 kfree_skb(skb);
613}
614
615static void netlink_ring_set_copied(struct sock *sk, struct sk_buff *skb)
616{
617 struct netlink_sock *nlk = nlk_sk(sk);
618 struct netlink_ring *ring = &nlk->rx_ring;
619 struct nl_mmap_hdr *hdr;
620
621 spin_lock_bh(&sk->sk_receive_queue.lock);
622 hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
623 if (hdr == NULL) {
624 spin_unlock_bh(&sk->sk_receive_queue.lock);
625 kfree_skb(skb);
626 sk->sk_err = ENOBUFS;
627 sk->sk_error_report(sk);
628 return;
629 }
630 netlink_increment_head(ring);
631 __skb_queue_tail(&sk->sk_receive_queue, skb);
632 spin_unlock_bh(&sk->sk_receive_queue.lock);
633
634 hdr->nm_len = skb->len;
635 hdr->nm_group = NETLINK_CB(skb).dst_group;
636 hdr->nm_pid = NETLINK_CB(skb).creds.pid;
637 hdr->nm_uid = NETLINK_CB(skb).creds.uid;
638 hdr->nm_gid = NETLINK_CB(skb).creds.gid;
639 netlink_set_status(hdr, NL_MMAP_STATUS_COPY);
640}
641
592#else /* CONFIG_NETLINK_MMAP */ 642#else /* CONFIG_NETLINK_MMAP */
593#define netlink_skb_is_mmaped(skb) false 643#define netlink_skb_is_mmaped(skb) false
644#define netlink_rx_is_mmaped(sk) false
594#define netlink_tx_is_mmaped(sk) false 645#define netlink_tx_is_mmaped(sk) false
595#define netlink_mmap sock_no_mmap 646#define netlink_mmap sock_no_mmap
596#define netlink_poll datagram_poll 647#define netlink_poll datagram_poll
@@ -1381,7 +1432,14 @@ static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb)
1381{ 1432{
1382 int len = skb->len; 1433 int len = skb->len;
1383 1434
1384 skb_queue_tail(&sk->sk_receive_queue, skb); 1435#ifdef CONFIG_NETLINK_MMAP
1436 if (netlink_skb_is_mmaped(skb))
1437 netlink_queue_mmaped_skb(sk, skb);
1438 else if (netlink_rx_is_mmaped(sk))
1439 netlink_ring_set_copied(sk, skb);
1440 else
1441#endif /* CONFIG_NETLINK_MMAP */
1442 skb_queue_tail(&sk->sk_receive_queue, skb);
1385 sk->sk_data_ready(sk, len); 1443 sk->sk_data_ready(sk, len);
1386 return len; 1444 return len;
1387} 1445}
@@ -1492,6 +1550,68 @@ retry:
1492} 1550}
1493EXPORT_SYMBOL(netlink_unicast); 1551EXPORT_SYMBOL(netlink_unicast);
1494 1552
1553struct sk_buff *netlink_alloc_skb(struct sock *ssk, unsigned int size,
1554 u32 dst_portid, gfp_t gfp_mask)
1555{
1556#ifdef CONFIG_NETLINK_MMAP
1557 struct sock *sk = NULL;
1558 struct sk_buff *skb;
1559 struct netlink_ring *ring;
1560 struct nl_mmap_hdr *hdr;
1561 unsigned int maxlen;
1562
1563 sk = netlink_getsockbyportid(ssk, dst_portid);
1564 if (IS_ERR(sk))
1565 goto out;
1566
1567 ring = &nlk_sk(sk)->rx_ring;
1568 /* fast-path without atomic ops for common case: non-mmaped receiver */
1569 if (ring->pg_vec == NULL)
1570 goto out_put;
1571
1572 skb = alloc_skb_head(gfp_mask);
1573 if (skb == NULL)
1574 goto err1;
1575
1576 spin_lock_bh(&sk->sk_receive_queue.lock);
1577 /* check again under lock */
1578 if (ring->pg_vec == NULL)
1579 goto out_free;
1580
1581 maxlen = ring->frame_size - NL_MMAP_HDRLEN;
1582 if (maxlen < size)
1583 goto out_free;
1584
1585 netlink_forward_ring(ring);
1586 hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
1587 if (hdr == NULL)
1588 goto err2;
1589 netlink_ring_setup_skb(skb, sk, ring, hdr);
1590 netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED);
1591 atomic_inc(&ring->pending);
1592 netlink_increment_head(ring);
1593
1594 spin_unlock_bh(&sk->sk_receive_queue.lock);
1595 return skb;
1596
1597err2:
1598 kfree_skb(skb);
1599 spin_unlock_bh(&sk->sk_receive_queue.lock);
1600err1:
1601 sock_put(sk);
1602 return NULL;
1603
1604out_free:
1605 kfree_skb(skb);
1606 spin_unlock_bh(&sk->sk_receive_queue.lock);
1607out_put:
1608 sock_put(sk);
1609out:
1610#endif
1611 return alloc_skb(size, gfp_mask);
1612}
1613EXPORT_SYMBOL_GPL(netlink_alloc_skb);
1614
1495int netlink_has_listeners(struct sock *sk, unsigned int group) 1615int netlink_has_listeners(struct sock *sk, unsigned int group)
1496{ 1616{
1497 int res = 0; 1617 int res = 0;
@@ -2270,9 +2390,13 @@ static int netlink_dump(struct sock *sk)
2270 2390
2271 alloc_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE); 2391 alloc_size = max_t(int, cb->min_dump_alloc, NLMSG_GOODSIZE);
2272 2392
2273 skb = sock_rmalloc(sk, alloc_size, 0, GFP_KERNEL); 2393 if (!netlink_rx_is_mmaped(sk) &&
2394 atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
2395 goto errout_skb;
2396 skb = netlink_alloc_skb(sk, alloc_size, nlk->portid, GFP_KERNEL);
2274 if (!skb) 2397 if (!skb)
2275 goto errout_skb; 2398 goto errout_skb;
2399 netlink_skb_set_owner_r(skb, sk);
2276 2400
2277 len = cb->dump(skb, cb); 2401 len = cb->dump(skb, cb);
2278 2402
@@ -2327,6 +2451,19 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
2327 if (cb == NULL) 2451 if (cb == NULL)
2328 return -ENOBUFS; 2452 return -ENOBUFS;
2329 2453
2454 /* Memory mapped dump requests need to be copied to avoid looping
2455 * on the pending state in netlink_mmap_sendmsg() while the CB hold
2456 * a reference to the skb.
2457 */
2458 if (netlink_skb_is_mmaped(skb)) {
2459 skb = skb_copy(skb, GFP_KERNEL);
2460 if (skb == NULL) {
2461 kfree(cb);
2462 return -ENOBUFS;
2463 }
2464 } else
2465 atomic_inc(&skb->users);
2466
2330 cb->dump = control->dump; 2467 cb->dump = control->dump;
2331 cb->done = control->done; 2468 cb->done = control->done;
2332 cb->nlh = nlh; 2469 cb->nlh = nlh;
@@ -2387,7 +2524,8 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
2387 if (err) 2524 if (err)
2388 payload += nlmsg_len(nlh); 2525 payload += nlmsg_len(nlh);
2389 2526
2390 skb = nlmsg_new(payload, GFP_KERNEL); 2527 skb = netlink_alloc_skb(in_skb->sk, nlmsg_total_size(payload),
2528 NETLINK_CB(in_skb).portid, GFP_KERNEL);
2391 if (!skb) { 2529 if (!skb) {
2392 struct sock *sk; 2530 struct sock *sk;
2393 2531