aboutsummaryrefslogtreecommitdiffstats
path: root/net/netlink
diff options
context:
space:
mode:
authorPatrick McHardy <kaber@trash.net>2013-04-17 02:47:03 -0400
committerDavid S. Miller <davem@davemloft.net>2013-04-19 14:57:57 -0400
commit5fd96123ee19b96be7d7b57fd42227e1a146ef05 (patch)
tree291383cb7d7b8b719bb370fe8483c8109a93aaff /net/netlink
parent9652e931e73be7e54a9c40e9bcd4bbdafe92a406 (diff)
netlink: implement memory mapped sendmsg()
Add support for mmap'ed sendmsg() to netlink. Since the kernel validates received messages before processing them, the code makes sure userspace can't modify the message contents after invoking sendmsg(). To do that only a single mapping of the TX ring is allowed to exist and the socket must not be shared. If either of these two conditions does not hold, it falls back to copying. Signed-off-by: Patrick McHardy <kaber@trash.net> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/netlink')
-rw-r--r--net/netlink/af_netlink.c135
1 files changed, 129 insertions, 6 deletions
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 6560635fd25c..90504a0e42ab 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -116,6 +116,11 @@ static bool netlink_skb_is_mmaped(const struct sk_buff *skb)
116 return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED; 116 return NETLINK_CB(skb).flags & NETLINK_SKB_MMAPED;
117} 117}
118 118
119static bool netlink_tx_is_mmaped(struct sock *sk)
120{
121 return nlk_sk(sk)->tx_ring.pg_vec != NULL;
122}
123
119static __pure struct page *pgvec_to_page(const void *addr) 124static __pure struct page *pgvec_to_page(const void *addr)
120{ 125{
121 if (is_vmalloc_addr(addr)) 126 if (is_vmalloc_addr(addr))
@@ -438,6 +443,9 @@ static unsigned int netlink_poll(struct file *file, struct socket *sock,
438 struct netlink_sock *nlk = nlk_sk(sk); 443 struct netlink_sock *nlk = nlk_sk(sk);
439 unsigned int mask; 444 unsigned int mask;
440 445
446 if (nlk->cb != NULL && nlk->rx_ring.pg_vec != NULL)
447 netlink_dump(sk);
448
441 mask = datagram_poll(file, sock, wait); 449 mask = datagram_poll(file, sock, wait);
442 450
443 spin_lock_bh(&sk->sk_receive_queue.lock); 451 spin_lock_bh(&sk->sk_receive_queue.lock);
@@ -483,10 +491,110 @@ static void netlink_ring_setup_skb(struct sk_buff *skb, struct sock *sk,
483 NETLINK_CB(skb).flags |= NETLINK_SKB_MMAPED; 491 NETLINK_CB(skb).flags |= NETLINK_SKB_MMAPED;
484 NETLINK_CB(skb).sk = sk; 492 NETLINK_CB(skb).sk = sk;
485} 493}
494
495static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg,
496 u32 dst_portid, u32 dst_group,
497 struct sock_iocb *siocb)
498{
499 struct netlink_sock *nlk = nlk_sk(sk);
500 struct netlink_ring *ring;
501 struct nl_mmap_hdr *hdr;
502 struct sk_buff *skb;
503 unsigned int maxlen;
504 bool excl = true;
505 int err = 0, len = 0;
506
507 /* Netlink messages are validated by the receiver before processing.
508 * In order to avoid userspace changing the contents of the message
509 * after validation, the socket and the ring may only be used by a
510 * single process, otherwise we fall back to copying.
511 */
512 if (atomic_long_read(&sk->sk_socket->file->f_count) > 2 ||
513 atomic_read(&nlk->mapped) > 1)
514 excl = false;
515
516 mutex_lock(&nlk->pg_vec_lock);
517
518 ring = &nlk->tx_ring;
519 maxlen = ring->frame_size - NL_MMAP_HDRLEN;
520
521 do {
522 hdr = netlink_current_frame(ring, NL_MMAP_STATUS_VALID);
523 if (hdr == NULL) {
524 if (!(msg->msg_flags & MSG_DONTWAIT) &&
525 atomic_read(&nlk->tx_ring.pending))
526 schedule();
527 continue;
528 }
529 if (hdr->nm_len > maxlen) {
530 err = -EINVAL;
531 goto out;
532 }
533
534 netlink_frame_flush_dcache(hdr);
535
536 if (likely(dst_portid == 0 && dst_group == 0 && excl)) {
537 skb = alloc_skb_head(GFP_KERNEL);
538 if (skb == NULL) {
539 err = -ENOBUFS;
540 goto out;
541 }
542 sock_hold(sk);
543 netlink_ring_setup_skb(skb, sk, ring, hdr);
544 NETLINK_CB(skb).flags |= NETLINK_SKB_TX;
545 __skb_put(skb, hdr->nm_len);
546 netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED);
547 atomic_inc(&ring->pending);
548 } else {
549 skb = alloc_skb(hdr->nm_len, GFP_KERNEL);
550 if (skb == NULL) {
551 err = -ENOBUFS;
552 goto out;
553 }
554 __skb_put(skb, hdr->nm_len);
555 memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, hdr->nm_len);
556 netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
557 }
558
559 netlink_increment_head(ring);
560
561 NETLINK_CB(skb).portid = nlk->portid;
562 NETLINK_CB(skb).dst_group = dst_group;
563 NETLINK_CB(skb).creds = siocb->scm->creds;
564
565 err = security_netlink_send(sk, skb);
566 if (err) {
567 kfree_skb(skb);
568 goto out;
569 }
570
571 if (unlikely(dst_group)) {
572 atomic_inc(&skb->users);
573 netlink_broadcast(sk, skb, dst_portid, dst_group,
574 GFP_KERNEL);
575 }
576 err = netlink_unicast(sk, skb, dst_portid,
577 msg->msg_flags & MSG_DONTWAIT);
578 if (err < 0)
579 goto out;
580 len += err;
581
582 } while (hdr != NULL ||
583 (!(msg->msg_flags & MSG_DONTWAIT) &&
584 atomic_read(&nlk->tx_ring.pending)));
585
586 if (len > 0)
587 err = len;
588out:
589 mutex_unlock(&nlk->pg_vec_lock);
590 return err;
591}
486#else /* CONFIG_NETLINK_MMAP */ 592#else /* CONFIG_NETLINK_MMAP */
487#define netlink_skb_is_mmaped(skb) false 593#define netlink_skb_is_mmaped(skb) false
594#define netlink_tx_is_mmaped(sk) false
488#define netlink_mmap sock_no_mmap 595#define netlink_mmap sock_no_mmap
489#define netlink_poll datagram_poll 596#define netlink_poll datagram_poll
597#define netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, siocb) 0
490#endif /* CONFIG_NETLINK_MMAP */ 598#endif /* CONFIG_NETLINK_MMAP */
491 599
492static void netlink_destroy_callback(struct netlink_callback *cb) 600static void netlink_destroy_callback(struct netlink_callback *cb)
@@ -517,11 +625,16 @@ static void netlink_skb_destructor(struct sk_buff *skb)
517 hdr = netlink_mmap_hdr(skb); 625 hdr = netlink_mmap_hdr(skb);
518 sk = NETLINK_CB(skb).sk; 626 sk = NETLINK_CB(skb).sk;
519 627
520 if (!(NETLINK_CB(skb).flags & NETLINK_SKB_DELIVERED)) { 628 if (NETLINK_CB(skb).flags & NETLINK_SKB_TX) {
521 hdr->nm_len = 0; 629 netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
522 netlink_set_status(hdr, NL_MMAP_STATUS_VALID); 630 ring = &nlk_sk(sk)->tx_ring;
631 } else {
632 if (!(NETLINK_CB(skb).flags & NETLINK_SKB_DELIVERED)) {
633 hdr->nm_len = 0;
634 netlink_set_status(hdr, NL_MMAP_STATUS_VALID);
635 }
636 ring = &nlk_sk(sk)->rx_ring;
523 } 637 }
524 ring = &nlk_sk(sk)->rx_ring;
525 638
526 WARN_ON(atomic_read(&ring->pending) == 0); 639 WARN_ON(atomic_read(&ring->pending) == 0);
527 atomic_dec(&ring->pending); 640 atomic_dec(&ring->pending);
@@ -1230,8 +1343,9 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
1230 1343
1231 nlk = nlk_sk(sk); 1344 nlk = nlk_sk(sk);
1232 1345
1233 if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || 1346 if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
1234 test_bit(NETLINK_CONGESTED, &nlk->state)) { 1347 test_bit(NETLINK_CONGESTED, &nlk->state)) &&
1348 !netlink_skb_is_mmaped(skb)) {
1235 DECLARE_WAITQUEUE(wait, current); 1349 DECLARE_WAITQUEUE(wait, current);
1236 if (!*timeo) { 1350 if (!*timeo) {
1237 if (!ssk || netlink_is_kernel(ssk)) 1351 if (!ssk || netlink_is_kernel(ssk))
@@ -1291,6 +1405,8 @@ static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation)
1291 int delta; 1405 int delta;
1292 1406
1293 WARN_ON(skb->sk != NULL); 1407 WARN_ON(skb->sk != NULL);
1408 if (netlink_skb_is_mmaped(skb))
1409 return skb;
1294 1410
1295 delta = skb->end - skb->tail; 1411 delta = skb->end - skb->tail;
1296 if (delta * 2 < skb->truesize) 1412 if (delta * 2 < skb->truesize)
@@ -1815,6 +1931,13 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
1815 goto out; 1931 goto out;
1816 } 1932 }
1817 1933
1934 if (netlink_tx_is_mmaped(sk) &&
1935 msg->msg_iov->iov_base == NULL) {
1936 err = netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group,
1937 siocb);
1938 goto out;
1939 }
1940
1818 err = -EMSGSIZE; 1941 err = -EMSGSIZE;
1819 if (len > sk->sk_sndbuf - 32) 1942 if (len > sk->sk_sndbuf - 32)
1820 goto out; 1943 goto out;