aboutsummaryrefslogtreecommitdiffstats
path: root/net/netlink/af_netlink.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/netlink/af_netlink.c')
-rw-r--r--net/netlink/af_netlink.c786
1 files changed, 23 insertions, 763 deletions
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index f1ffb34e253f..330ebd600f25 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -225,7 +225,7 @@ static int __netlink_deliver_tap_skb(struct sk_buff *skb,
225 225
226 dev_hold(dev); 226 dev_hold(dev);
227 227
228 if (netlink_skb_is_mmaped(skb) || is_vmalloc_addr(skb->head)) 228 if (is_vmalloc_addr(skb->head))
229 nskb = netlink_to_full_skb(skb, GFP_ATOMIC); 229 nskb = netlink_to_full_skb(skb, GFP_ATOMIC);
230 else 230 else
231 nskb = skb_clone(skb, GFP_ATOMIC); 231 nskb = skb_clone(skb, GFP_ATOMIC);
@@ -300,610 +300,8 @@ static void netlink_rcv_wake(struct sock *sk)
300 wake_up_interruptible(&nlk->wait); 300 wake_up_interruptible(&nlk->wait);
301} 301}
302 302
303#ifdef CONFIG_NETLINK_MMAP
304static bool netlink_rx_is_mmaped(struct sock *sk)
305{
306 return nlk_sk(sk)->rx_ring.pg_vec != NULL;
307}
308
309static bool netlink_tx_is_mmaped(struct sock *sk)
310{
311 return nlk_sk(sk)->tx_ring.pg_vec != NULL;
312}
313
314static __pure struct page *pgvec_to_page(const void *addr)
315{
316 if (is_vmalloc_addr(addr))
317 return vmalloc_to_page(addr);
318 else
319 return virt_to_page(addr);
320}
321
322static void free_pg_vec(void **pg_vec, unsigned int order, unsigned int len)
323{
324 unsigned int i;
325
326 for (i = 0; i < len; i++) {
327 if (pg_vec[i] != NULL) {
328 if (is_vmalloc_addr(pg_vec[i]))
329 vfree(pg_vec[i]);
330 else
331 free_pages((unsigned long)pg_vec[i], order);
332 }
333 }
334 kfree(pg_vec);
335}
336
337static void *alloc_one_pg_vec_page(unsigned long order)
338{
339 void *buffer;
340 gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP | __GFP_ZERO |
341 __GFP_NOWARN | __GFP_NORETRY;
342
343 buffer = (void *)__get_free_pages(gfp_flags, order);
344 if (buffer != NULL)
345 return buffer;
346
347 buffer = vzalloc((1 << order) * PAGE_SIZE);
348 if (buffer != NULL)
349 return buffer;
350
351 gfp_flags &= ~__GFP_NORETRY;
352 return (void *)__get_free_pages(gfp_flags, order);
353}
354
355static void **alloc_pg_vec(struct netlink_sock *nlk,
356 struct nl_mmap_req *req, unsigned int order)
357{
358 unsigned int block_nr = req->nm_block_nr;
359 unsigned int i;
360 void **pg_vec;
361
362 pg_vec = kcalloc(block_nr, sizeof(void *), GFP_KERNEL);
363 if (pg_vec == NULL)
364 return NULL;
365
366 for (i = 0; i < block_nr; i++) {
367 pg_vec[i] = alloc_one_pg_vec_page(order);
368 if (pg_vec[i] == NULL)
369 goto err1;
370 }
371
372 return pg_vec;
373err1:
374 free_pg_vec(pg_vec, order, block_nr);
375 return NULL;
376}
377
378
379static void
380__netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, bool tx_ring, void **pg_vec,
381 unsigned int order)
382{
383 struct netlink_sock *nlk = nlk_sk(sk);
384 struct sk_buff_head *queue;
385 struct netlink_ring *ring;
386
387 queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
388 ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring;
389
390 spin_lock_bh(&queue->lock);
391
392 ring->frame_max = req->nm_frame_nr - 1;
393 ring->head = 0;
394 ring->frame_size = req->nm_frame_size;
395 ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE;
396
397 swap(ring->pg_vec_len, req->nm_block_nr);
398 swap(ring->pg_vec_order, order);
399 swap(ring->pg_vec, pg_vec);
400
401 __skb_queue_purge(queue);
402 spin_unlock_bh(&queue->lock);
403
404 WARN_ON(atomic_read(&nlk->mapped));
405
406 if (pg_vec)
407 free_pg_vec(pg_vec, order, req->nm_block_nr);
408}
409
410static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req,
411 bool tx_ring)
412{
413 struct netlink_sock *nlk = nlk_sk(sk);
414 struct netlink_ring *ring;
415 void **pg_vec = NULL;
416 unsigned int order = 0;
417
418 ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring;
419
420 if (atomic_read(&nlk->mapped))
421 return -EBUSY;
422 if (atomic_read(&ring->pending))
423 return -EBUSY;
424
425 if (req->nm_block_nr) {
426 if (ring->pg_vec != NULL)
427 return -EBUSY;
428
429 if ((int)req->nm_block_size <= 0)
430 return -EINVAL;
431 if (!PAGE_ALIGNED(req->nm_block_size))
432 return -EINVAL;
433 if (req->nm_frame_size < NL_MMAP_HDRLEN)
434 return -EINVAL;
435 if (!IS_ALIGNED(req->nm_frame_size, NL_MMAP_MSG_ALIGNMENT))
436 return -EINVAL;
437
438 ring->frames_per_block = req->nm_block_size /
439 req->nm_frame_size;
440 if (ring->frames_per_block == 0)
441 return -EINVAL;
442 if (ring->frames_per_block * req->nm_block_nr !=
443 req->nm_frame_nr)
444 return -EINVAL;
445
446 order = get_order(req->nm_block_size);
447 pg_vec = alloc_pg_vec(nlk, req, order);
448 if (pg_vec == NULL)
449 return -ENOMEM;
450 } else {
451 if (req->nm_frame_nr)
452 return -EINVAL;
453 }
454
455 mutex_lock(&nlk->pg_vec_lock);
456 if (atomic_read(&nlk->mapped) == 0) {
457 __netlink_set_ring(sk, req, tx_ring, pg_vec, order);
458 mutex_unlock(&nlk->pg_vec_lock);
459 return 0;
460 }
461
462 mutex_unlock(&nlk->pg_vec_lock);
463
464 if (pg_vec)
465 free_pg_vec(pg_vec, order, req->nm_block_nr);
466
467 return -EBUSY;
468}
469
470static void netlink_mm_open(struct vm_area_struct *vma)
471{
472 struct file *file = vma->vm_file;
473 struct socket *sock = file->private_data;
474 struct sock *sk = sock->sk;
475
476 if (sk)
477 atomic_inc(&nlk_sk(sk)->mapped);
478}
479
480static void netlink_mm_close(struct vm_area_struct *vma)
481{
482 struct file *file = vma->vm_file;
483 struct socket *sock = file->private_data;
484 struct sock *sk = sock->sk;
485
486 if (sk)
487 atomic_dec(&nlk_sk(sk)->mapped);
488}
489
490static const struct vm_operations_struct netlink_mmap_ops = {
491 .open = netlink_mm_open,
492 .close = netlink_mm_close,
493};
494
495static int netlink_mmap(struct file *file, struct socket *sock,
496 struct vm_area_struct *vma)
497{
498 struct sock *sk = sock->sk;
499 struct netlink_sock *nlk = nlk_sk(sk);
500 struct netlink_ring *ring;
501 unsigned long start, size, expected;
502 unsigned int i;
503 int err = -EINVAL;
504
505 if (vma->vm_pgoff)
506 return -EINVAL;
507
508 mutex_lock(&nlk->pg_vec_lock);
509
510 expected = 0;
511 for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) {
512 if (ring->pg_vec == NULL)
513 continue;
514 expected += ring->pg_vec_len * ring->pg_vec_pages * PAGE_SIZE;
515 }
516
517 if (expected == 0)
518 goto out;
519
520 size = vma->vm_end - vma->vm_start;
521 if (size != expected)
522 goto out;
523
524 start = vma->vm_start;
525 for (ring = &nlk->rx_ring; ring <= &nlk->tx_ring; ring++) {
526 if (ring->pg_vec == NULL)
527 continue;
528
529 for (i = 0; i < ring->pg_vec_len; i++) {
530 struct page *page;
531 void *kaddr = ring->pg_vec[i];
532 unsigned int pg_num;
533
534 for (pg_num = 0; pg_num < ring->pg_vec_pages; pg_num++) {
535 page = pgvec_to_page(kaddr);
536 err = vm_insert_page(vma, start, page);
537 if (err < 0)
538 goto out;
539 start += PAGE_SIZE;
540 kaddr += PAGE_SIZE;
541 }
542 }
543 }
544
545 atomic_inc(&nlk->mapped);
546 vma->vm_ops = &netlink_mmap_ops;
547 err = 0;
548out:
549 mutex_unlock(&nlk->pg_vec_lock);
550 return err;
551}
552
553static void netlink_frame_flush_dcache(const struct nl_mmap_hdr *hdr, unsigned int nm_len)
554{
555#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE == 1
556 struct page *p_start, *p_end;
557
558 /* First page is flushed through netlink_{get,set}_status */
559 p_start = pgvec_to_page(hdr + PAGE_SIZE);
560 p_end = pgvec_to_page((void *)hdr + NL_MMAP_HDRLEN + nm_len - 1);
561 while (p_start <= p_end) {
562 flush_dcache_page(p_start);
563 p_start++;
564 }
565#endif
566}
567
568static enum nl_mmap_status netlink_get_status(const struct nl_mmap_hdr *hdr)
569{
570 smp_rmb();
571 flush_dcache_page(pgvec_to_page(hdr));
572 return hdr->nm_status;
573}
574
575static void netlink_set_status(struct nl_mmap_hdr *hdr,
576 enum nl_mmap_status status)
577{
578 smp_mb();
579 hdr->nm_status = status;
580 flush_dcache_page(pgvec_to_page(hdr));
581}
582
583static struct nl_mmap_hdr *
584__netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos)
585{
586 unsigned int pg_vec_pos, frame_off;
587
588 pg_vec_pos = pos / ring->frames_per_block;
589 frame_off = pos % ring->frames_per_block;
590
591 return ring->pg_vec[pg_vec_pos] + (frame_off * ring->frame_size);
592}
593
594static struct nl_mmap_hdr *
595netlink_lookup_frame(const struct netlink_ring *ring, unsigned int pos,
596 enum nl_mmap_status status)
597{
598 struct nl_mmap_hdr *hdr;
599
600 hdr = __netlink_lookup_frame(ring, pos);
601 if (netlink_get_status(hdr) != status)
602 return NULL;
603
604 return hdr;
605}
606
607static struct nl_mmap_hdr *
608netlink_current_frame(const struct netlink_ring *ring,
609 enum nl_mmap_status status)
610{
611 return netlink_lookup_frame(ring, ring->head, status);
612}
613
614static void netlink_increment_head(struct netlink_ring *ring)
615{
616 ring->head = ring->head != ring->frame_max ? ring->head + 1 : 0;
617}
618
619static void netlink_forward_ring(struct netlink_ring *ring)
620{
621 unsigned int head = ring->head;
622 const struct nl_mmap_hdr *hdr;
623
624 do {
625 hdr = __netlink_lookup_frame(ring, ring->head);
626 if (hdr->nm_status == NL_MMAP_STATUS_UNUSED)
627 break;
628 if (hdr->nm_status != NL_MMAP_STATUS_SKIP)
629 break;
630 netlink_increment_head(ring);
631 } while (ring->head != head);
632}
633
634static bool netlink_has_valid_frame(struct netlink_ring *ring)
635{
636 unsigned int head = ring->head, pos = head;
637 const struct nl_mmap_hdr *hdr;
638
639 do {
640 hdr = __netlink_lookup_frame(ring, pos);
641 if (hdr->nm_status == NL_MMAP_STATUS_VALID)
642 return true;
643 pos = pos != 0 ? pos - 1 : ring->frame_max;
644 } while (pos != head);
645
646 return false;
647}
648
649static bool netlink_dump_space(struct netlink_sock *nlk)
650{
651 struct netlink_ring *ring = &nlk->rx_ring;
652 struct nl_mmap_hdr *hdr;
653 unsigned int n;
654
655 hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
656 if (hdr == NULL)
657 return false;
658
659 n = ring->head + ring->frame_max / 2;
660 if (n > ring->frame_max)
661 n -= ring->frame_max;
662
663 hdr = __netlink_lookup_frame(ring, n);
664
665 return hdr->nm_status == NL_MMAP_STATUS_UNUSED;
666}
667
668static unsigned int netlink_poll(struct file *file, struct socket *sock,
669 poll_table *wait)
670{
671 struct sock *sk = sock->sk;
672 struct netlink_sock *nlk = nlk_sk(sk);
673 unsigned int mask;
674 int err;
675
676 if (nlk->rx_ring.pg_vec != NULL) {
677 /* Memory mapped sockets don't call recvmsg(), so flow control
678 * for dumps is performed here. A dump is allowed to continue
679 * if at least half the ring is unused.
680 */
681 while (nlk->cb_running && netlink_dump_space(nlk)) {
682 err = netlink_dump(sk);
683 if (err < 0) {
684 sk->sk_err = -err;
685 sk->sk_error_report(sk);
686 break;
687 }
688 }
689 netlink_rcv_wake(sk);
690 }
691
692 mask = datagram_poll(file, sock, wait);
693
694 /* We could already have received frames in the normal receive
695 * queue, that will show up as NL_MMAP_STATUS_COPY in the ring,
696 * so if mask contains pollin/etc already, there's no point
697 * walking the ring.
698 */
699 if ((mask & (POLLIN | POLLRDNORM)) != (POLLIN | POLLRDNORM)) {
700 spin_lock_bh(&sk->sk_receive_queue.lock);
701 if (nlk->rx_ring.pg_vec) {
702 if (netlink_has_valid_frame(&nlk->rx_ring))
703 mask |= POLLIN | POLLRDNORM;
704 }
705 spin_unlock_bh(&sk->sk_receive_queue.lock);
706 }
707
708 spin_lock_bh(&sk->sk_write_queue.lock);
709 if (nlk->tx_ring.pg_vec) {
710 if (netlink_current_frame(&nlk->tx_ring, NL_MMAP_STATUS_UNUSED))
711 mask |= POLLOUT | POLLWRNORM;
712 }
713 spin_unlock_bh(&sk->sk_write_queue.lock);
714
715 return mask;
716}
717
718static struct nl_mmap_hdr *netlink_mmap_hdr(struct sk_buff *skb)
719{
720 return (struct nl_mmap_hdr *)(skb->head - NL_MMAP_HDRLEN);
721}
722
723static void netlink_ring_setup_skb(struct sk_buff *skb, struct sock *sk,
724 struct netlink_ring *ring,
725 struct nl_mmap_hdr *hdr)
726{
727 unsigned int size;
728 void *data;
729
730 size = ring->frame_size - NL_MMAP_HDRLEN;
731 data = (void *)hdr + NL_MMAP_HDRLEN;
732
733 skb->head = data;
734 skb->data = data;
735 skb_reset_tail_pointer(skb);
736 skb->end = skb->tail + size;
737 skb->len = 0;
738
739 skb->destructor = netlink_skb_destructor;
740 NETLINK_CB(skb).flags |= NETLINK_SKB_MMAPED;
741 NETLINK_CB(skb).sk = sk;
742}
743
744static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg,
745 u32 dst_portid, u32 dst_group,
746 struct scm_cookie *scm)
747{
748 struct netlink_sock *nlk = nlk_sk(sk);
749 struct netlink_ring *ring;
750 struct nl_mmap_hdr *hdr;
751 struct sk_buff *skb;
752 unsigned int maxlen;
753 int err = 0, len = 0;
754
755 mutex_lock(&nlk->pg_vec_lock);
756
757 ring = &nlk->tx_ring;
758 maxlen = ring->frame_size - NL_MMAP_HDRLEN;
759
760 do {
761 unsigned int nm_len;
762
763 hdr = netlink_current_frame(ring, NL_MMAP_STATUS_VALID);
764 if (hdr == NULL) {
765 if (!(msg->msg_flags & MSG_DONTWAIT) &&
766 atomic_read(&nlk->tx_ring.pending))
767 schedule();
768 continue;
769 }
770
771 nm_len = ACCESS_ONCE(hdr->nm_len);
772 if (nm_len > maxlen) {
773 err = -EINVAL;
774 goto out;
775 }
776
777 netlink_frame_flush_dcache(hdr, nm_len);
778
779 skb = alloc_skb(nm_len, GFP_KERNEL);
780 if (skb == NULL) {
781 err = -ENOBUFS;
782 goto out;
783 }
784 __skb_put(skb, nm_len);
785 memcpy(skb->data, (void *)hdr + NL_MMAP_HDRLEN, nm_len);
786 netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
787
788 netlink_increment_head(ring);
789
790 NETLINK_CB(skb).portid = nlk->portid;
791 NETLINK_CB(skb).dst_group = dst_group;
792 NETLINK_CB(skb).creds = scm->creds;
793
794 err = security_netlink_send(sk, skb);
795 if (err) {
796 kfree_skb(skb);
797 goto out;
798 }
799
800 if (unlikely(dst_group)) {
801 atomic_inc(&skb->users);
802 netlink_broadcast(sk, skb, dst_portid, dst_group,
803 GFP_KERNEL);
804 }
805 err = netlink_unicast(sk, skb, dst_portid,
806 msg->msg_flags & MSG_DONTWAIT);
807 if (err < 0)
808 goto out;
809 len += err;
810
811 } while (hdr != NULL ||
812 (!(msg->msg_flags & MSG_DONTWAIT) &&
813 atomic_read(&nlk->tx_ring.pending)));
814
815 if (len > 0)
816 err = len;
817out:
818 mutex_unlock(&nlk->pg_vec_lock);
819 return err;
820}
821
822static void netlink_queue_mmaped_skb(struct sock *sk, struct sk_buff *skb)
823{
824 struct nl_mmap_hdr *hdr;
825
826 hdr = netlink_mmap_hdr(skb);
827 hdr->nm_len = skb->len;
828 hdr->nm_group = NETLINK_CB(skb).dst_group;
829 hdr->nm_pid = NETLINK_CB(skb).creds.pid;
830 hdr->nm_uid = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid);
831 hdr->nm_gid = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid);
832 netlink_frame_flush_dcache(hdr, hdr->nm_len);
833 netlink_set_status(hdr, NL_MMAP_STATUS_VALID);
834
835 NETLINK_CB(skb).flags |= NETLINK_SKB_DELIVERED;
836 kfree_skb(skb);
837}
838
839static void netlink_ring_set_copied(struct sock *sk, struct sk_buff *skb)
840{
841 struct netlink_sock *nlk = nlk_sk(sk);
842 struct netlink_ring *ring = &nlk->rx_ring;
843 struct nl_mmap_hdr *hdr;
844
845 spin_lock_bh(&sk->sk_receive_queue.lock);
846 hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
847 if (hdr == NULL) {
848 spin_unlock_bh(&sk->sk_receive_queue.lock);
849 kfree_skb(skb);
850 netlink_overrun(sk);
851 return;
852 }
853 netlink_increment_head(ring);
854 __skb_queue_tail(&sk->sk_receive_queue, skb);
855 spin_unlock_bh(&sk->sk_receive_queue.lock);
856
857 hdr->nm_len = skb->len;
858 hdr->nm_group = NETLINK_CB(skb).dst_group;
859 hdr->nm_pid = NETLINK_CB(skb).creds.pid;
860 hdr->nm_uid = from_kuid(sk_user_ns(sk), NETLINK_CB(skb).creds.uid);
861 hdr->nm_gid = from_kgid(sk_user_ns(sk), NETLINK_CB(skb).creds.gid);
862 netlink_set_status(hdr, NL_MMAP_STATUS_COPY);
863}
864
865#else /* CONFIG_NETLINK_MMAP */
866#define netlink_rx_is_mmaped(sk) false
867#define netlink_tx_is_mmaped(sk) false
868#define netlink_mmap sock_no_mmap
869#define netlink_poll datagram_poll
870#define netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, scm) 0
871#endif /* CONFIG_NETLINK_MMAP */
872
873static void netlink_skb_destructor(struct sk_buff *skb) 303static void netlink_skb_destructor(struct sk_buff *skb)
874{ 304{
875#ifdef CONFIG_NETLINK_MMAP
876 struct nl_mmap_hdr *hdr;
877 struct netlink_ring *ring;
878 struct sock *sk;
879
880 /* If a packet from the kernel to userspace was freed because of an
881 * error without being delivered to userspace, the kernel must reset
882 * the status. In the direction userspace to kernel, the status is
883 * always reset here after the packet was processed and freed.
884 */
885 if (netlink_skb_is_mmaped(skb)) {
886 hdr = netlink_mmap_hdr(skb);
887 sk = NETLINK_CB(skb).sk;
888
889 if (NETLINK_CB(skb).flags & NETLINK_SKB_TX) {
890 netlink_set_status(hdr, NL_MMAP_STATUS_UNUSED);
891 ring = &nlk_sk(sk)->tx_ring;
892 } else {
893 if (!(NETLINK_CB(skb).flags & NETLINK_SKB_DELIVERED)) {
894 hdr->nm_len = 0;
895 netlink_set_status(hdr, NL_MMAP_STATUS_VALID);
896 }
897 ring = &nlk_sk(sk)->rx_ring;
898 }
899
900 WARN_ON(atomic_read(&ring->pending) == 0);
901 atomic_dec(&ring->pending);
902 sock_put(sk);
903
904 skb->head = NULL;
905 }
906#endif
907 if (is_vmalloc_addr(skb->head)) { 305 if (is_vmalloc_addr(skb->head)) {
908 if (!skb->cloned || 306 if (!skb->cloned ||
909 !atomic_dec_return(&(skb_shinfo(skb)->dataref))) 307 !atomic_dec_return(&(skb_shinfo(skb)->dataref)))
@@ -937,18 +335,6 @@ static void netlink_sock_destruct(struct sock *sk)
937 } 335 }
938 336
939 skb_queue_purge(&sk->sk_receive_queue); 337 skb_queue_purge(&sk->sk_receive_queue);
940#ifdef CONFIG_NETLINK_MMAP
941 if (1) {
942 struct nl_mmap_req req;
943
944 memset(&req, 0, sizeof(req));
945 if (nlk->rx_ring.pg_vec)
946 __netlink_set_ring(sk, &req, false, NULL, 0);
947 memset(&req, 0, sizeof(req));
948 if (nlk->tx_ring.pg_vec)
949 __netlink_set_ring(sk, &req, true, NULL, 0);
950 }
951#endif /* CONFIG_NETLINK_MMAP */
952 338
953 if (!sock_flag(sk, SOCK_DEAD)) { 339 if (!sock_flag(sk, SOCK_DEAD)) {
954 printk(KERN_ERR "Freeing alive netlink socket %p\n", sk); 340 printk(KERN_ERR "Freeing alive netlink socket %p\n", sk);
@@ -1194,9 +580,6 @@ static int __netlink_create(struct net *net, struct socket *sock,
1194 mutex_init(nlk->cb_mutex); 580 mutex_init(nlk->cb_mutex);
1195 } 581 }
1196 init_waitqueue_head(&nlk->wait); 582 init_waitqueue_head(&nlk->wait);
1197#ifdef CONFIG_NETLINK_MMAP
1198 mutex_init(&nlk->pg_vec_lock);
1199#endif
1200 583
1201 sk->sk_destruct = netlink_sock_destruct; 584 sk->sk_destruct = netlink_sock_destruct;
1202 sk->sk_protocol = protocol; 585 sk->sk_protocol = protocol;
@@ -1305,7 +688,7 @@ static int netlink_release(struct socket *sock)
1305 688
1306 skb_queue_purge(&sk->sk_write_queue); 689 skb_queue_purge(&sk->sk_write_queue);
1307 690
1308 if (nlk->portid) { 691 if (nlk->portid && nlk->bound) {
1309 struct netlink_notify n = { 692 struct netlink_notify n = {
1310 .net = sock_net(sk), 693 .net = sock_net(sk),
1311 .protocol = sk->sk_protocol, 694 .protocol = sk->sk_protocol,
@@ -1650,6 +1033,14 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr,
1650 return 0; 1033 return 0;
1651} 1034}
1652 1035
1036static int netlink_ioctl(struct socket *sock, unsigned int cmd,
1037 unsigned long arg)
1038{
1039 /* try to hand this ioctl down to the NIC drivers.
1040 */
1041 return -ENOIOCTLCMD;
1042}
1043
1653static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid) 1044static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid)
1654{ 1045{
1655 struct sock *sock; 1046 struct sock *sock;
@@ -1728,8 +1119,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
1728 nlk = nlk_sk(sk); 1119 nlk = nlk_sk(sk);
1729 1120
1730 if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf || 1121 if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
1731 test_bit(NETLINK_S_CONGESTED, &nlk->state)) && 1122 test_bit(NETLINK_S_CONGESTED, &nlk->state))) {
1732 !netlink_skb_is_mmaped(skb)) {
1733 DECLARE_WAITQUEUE(wait, current); 1123 DECLARE_WAITQUEUE(wait, current);
1734 if (!*timeo) { 1124 if (!*timeo) {
1735 if (!ssk || netlink_is_kernel(ssk)) 1125 if (!ssk || netlink_is_kernel(ssk))
@@ -1767,14 +1157,7 @@ static int __netlink_sendskb(struct sock *sk, struct sk_buff *skb)
1767 1157
1768 netlink_deliver_tap(skb); 1158 netlink_deliver_tap(skb);
1769 1159
1770#ifdef CONFIG_NETLINK_MMAP 1160 skb_queue_tail(&sk->sk_receive_queue, skb);
1771 if (netlink_skb_is_mmaped(skb))
1772 netlink_queue_mmaped_skb(sk, skb);
1773 else if (netlink_rx_is_mmaped(sk))
1774 netlink_ring_set_copied(sk, skb);
1775 else
1776#endif /* CONFIG_NETLINK_MMAP */
1777 skb_queue_tail(&sk->sk_receive_queue, skb);
1778 sk->sk_data_ready(sk); 1161 sk->sk_data_ready(sk);
1779 return len; 1162 return len;
1780} 1163}
@@ -1798,9 +1181,6 @@ static struct sk_buff *netlink_trim(struct sk_buff *skb, gfp_t allocation)
1798 int delta; 1181 int delta;
1799 1182
1800 WARN_ON(skb->sk != NULL); 1183 WARN_ON(skb->sk != NULL);
1801 if (netlink_skb_is_mmaped(skb))
1802 return skb;
1803
1804 delta = skb->end - skb->tail; 1184 delta = skb->end - skb->tail;
1805 if (is_vmalloc_addr(skb->head) || delta * 2 < skb->truesize) 1185 if (is_vmalloc_addr(skb->head) || delta * 2 < skb->truesize)
1806 return skb; 1186 return skb;
@@ -1876,79 +1256,6 @@ retry:
1876} 1256}
1877EXPORT_SYMBOL(netlink_unicast); 1257EXPORT_SYMBOL(netlink_unicast);
1878 1258
1879struct sk_buff *__netlink_alloc_skb(struct sock *ssk, unsigned int size,
1880 unsigned int ldiff, u32 dst_portid,
1881 gfp_t gfp_mask)
1882{
1883#ifdef CONFIG_NETLINK_MMAP
1884 unsigned int maxlen, linear_size;
1885 struct sock *sk = NULL;
1886 struct sk_buff *skb;
1887 struct netlink_ring *ring;
1888 struct nl_mmap_hdr *hdr;
1889
1890 sk = netlink_getsockbyportid(ssk, dst_portid);
1891 if (IS_ERR(sk))
1892 goto out;
1893
1894 ring = &nlk_sk(sk)->rx_ring;
1895 /* fast-path without atomic ops for common case: non-mmaped receiver */
1896 if (ring->pg_vec == NULL)
1897 goto out_put;
1898
1899 /* We need to account the full linear size needed as a ring
1900 * slot cannot have non-linear parts.
1901 */
1902 linear_size = size + ldiff;
1903 if (ring->frame_size - NL_MMAP_HDRLEN < linear_size)
1904 goto out_put;
1905
1906 skb = alloc_skb_head(gfp_mask);
1907 if (skb == NULL)
1908 goto err1;
1909
1910 spin_lock_bh(&sk->sk_receive_queue.lock);
1911 /* check again under lock */
1912 if (ring->pg_vec == NULL)
1913 goto out_free;
1914
1915 /* check again under lock */
1916 maxlen = ring->frame_size - NL_MMAP_HDRLEN;
1917 if (maxlen < linear_size)
1918 goto out_free;
1919
1920 netlink_forward_ring(ring);
1921 hdr = netlink_current_frame(ring, NL_MMAP_STATUS_UNUSED);
1922 if (hdr == NULL)
1923 goto err2;
1924
1925 netlink_ring_setup_skb(skb, sk, ring, hdr);
1926 netlink_set_status(hdr, NL_MMAP_STATUS_RESERVED);
1927 atomic_inc(&ring->pending);
1928 netlink_increment_head(ring);
1929
1930 spin_unlock_bh(&sk->sk_receive_queue.lock);
1931 return skb;
1932
1933err2:
1934 kfree_skb(skb);
1935 spin_unlock_bh(&sk->sk_receive_queue.lock);
1936 netlink_overrun(sk);
1937err1:
1938 sock_put(sk);
1939 return NULL;
1940
1941out_free:
1942 kfree_skb(skb);
1943 spin_unlock_bh(&sk->sk_receive_queue.lock);
1944out_put:
1945 sock_put(sk);
1946out:
1947#endif
1948 return alloc_skb(size, gfp_mask);
1949}
1950EXPORT_SYMBOL_GPL(__netlink_alloc_skb);
1951
1952int netlink_has_listeners(struct sock *sk, unsigned int group) 1259int netlink_has_listeners(struct sock *sk, unsigned int group)
1953{ 1260{
1954 int res = 0; 1261 int res = 0;
@@ -2225,8 +1532,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
2225 if (level != SOL_NETLINK) 1532 if (level != SOL_NETLINK)
2226 return -ENOPROTOOPT; 1533 return -ENOPROTOOPT;
2227 1534
2228 if (optname != NETLINK_RX_RING && optname != NETLINK_TX_RING && 1535 if (optlen >= sizeof(int) &&
2229 optlen >= sizeof(int) &&
2230 get_user(val, (unsigned int __user *)optval)) 1536 get_user(val, (unsigned int __user *)optval))
2231 return -EFAULT; 1537 return -EFAULT;
2232 1538
@@ -2279,25 +1585,6 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
2279 } 1585 }
2280 err = 0; 1586 err = 0;
2281 break; 1587 break;
2282#ifdef CONFIG_NETLINK_MMAP
2283 case NETLINK_RX_RING:
2284 case NETLINK_TX_RING: {
2285 struct nl_mmap_req req;
2286
2287 /* Rings might consume more memory than queue limits, require
2288 * CAP_NET_ADMIN.
2289 */
2290 if (!capable(CAP_NET_ADMIN))
2291 return -EPERM;
2292 if (optlen < sizeof(req))
2293 return -EINVAL;
2294 if (copy_from_user(&req, optval, sizeof(req)))
2295 return -EFAULT;
2296 err = netlink_set_ring(sk, &req,
2297 optname == NETLINK_TX_RING);
2298 break;
2299 }
2300#endif /* CONFIG_NETLINK_MMAP */
2301 case NETLINK_LISTEN_ALL_NSID: 1588 case NETLINK_LISTEN_ALL_NSID:
2302 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_BROADCAST)) 1589 if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_BROADCAST))
2303 return -EPERM; 1590 return -EPERM;
@@ -2467,18 +1754,6 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
2467 smp_rmb(); 1754 smp_rmb();
2468 } 1755 }
2469 1756
2470 /* It's a really convoluted way for userland to ask for mmaped
2471 * sendmsg(), but that's what we've got...
2472 */
2473 if (netlink_tx_is_mmaped(sk) &&
2474 iter_is_iovec(&msg->msg_iter) &&
2475 msg->msg_iter.nr_segs == 1 &&
2476 msg->msg_iter.iov->iov_base == NULL) {
2477 err = netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group,
2478 &scm);
2479 goto out;
2480 }
2481
2482 err = -EMSGSIZE; 1757 err = -EMSGSIZE;
2483 if (len > sk->sk_sndbuf - 32) 1758 if (len > sk->sk_sndbuf - 32)
2484 goto out; 1759 goto out;
@@ -2794,8 +2069,7 @@ static int netlink_dump(struct sock *sk)
2794 goto errout_skb; 2069 goto errout_skb;
2795 } 2070 }
2796 2071
2797 if (!netlink_rx_is_mmaped(sk) && 2072 if (atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
2798 atomic_read(&sk->sk_rmem_alloc) >= sk->sk_rcvbuf)
2799 goto errout_skb; 2073 goto errout_skb;
2800 2074
2801 /* NLMSG_GOODSIZE is small to avoid high order allocations being 2075 /* NLMSG_GOODSIZE is small to avoid high order allocations being
@@ -2808,15 +2082,12 @@ static int netlink_dump(struct sock *sk)
2808 2082
2809 if (alloc_min_size < nlk->max_recvmsg_len) { 2083 if (alloc_min_size < nlk->max_recvmsg_len) {
2810 alloc_size = nlk->max_recvmsg_len; 2084 alloc_size = nlk->max_recvmsg_len;
2811 skb = netlink_alloc_skb(sk, alloc_size, nlk->portid, 2085 skb = alloc_skb(alloc_size, GFP_KERNEL |
2812 GFP_KERNEL | 2086 __GFP_NOWARN | __GFP_NORETRY);
2813 __GFP_NOWARN |
2814 __GFP_NORETRY);
2815 } 2087 }
2816 if (!skb) { 2088 if (!skb) {
2817 alloc_size = alloc_min_size; 2089 alloc_size = alloc_min_size;
2818 skb = netlink_alloc_skb(sk, alloc_size, nlk->portid, 2090 skb = alloc_skb(alloc_size, GFP_KERNEL);
2819 GFP_KERNEL);
2820 } 2091 }
2821 if (!skb) 2092 if (!skb)
2822 goto errout_skb; 2093 goto errout_skb;
@@ -2831,8 +2102,7 @@ static int netlink_dump(struct sock *sk)
2831 * reasonable static buffer based on the expected largest dump of a 2102 * reasonable static buffer based on the expected largest dump of a
2832 * single netdev. The outcome is MSG_TRUNC error. 2103 * single netdev. The outcome is MSG_TRUNC error.
2833 */ 2104 */
2834 if (!netlink_rx_is_mmaped(sk)) 2105 skb_reserve(skb, skb_tailroom(skb) - alloc_size);
2835 skb_reserve(skb, skb_tailroom(skb) - alloc_size);
2836 netlink_skb_set_owner_r(skb, sk); 2106 netlink_skb_set_owner_r(skb, sk);
2837 2107
2838 len = cb->dump(skb, cb); 2108 len = cb->dump(skb, cb);
@@ -2884,16 +2154,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb,
2884 struct netlink_sock *nlk; 2154 struct netlink_sock *nlk;
2885 int ret; 2155 int ret;
2886 2156
2887 /* Memory mapped dump requests need to be copied to avoid looping 2157 atomic_inc(&skb->users);
2888 * on the pending state in netlink_mmap_sendmsg() while the CB hold
2889 * a reference to the skb.
2890 */
2891 if (netlink_skb_is_mmaped(skb)) {
2892 skb = skb_copy(skb, GFP_KERNEL);
2893 if (skb == NULL)
2894 return -ENOBUFS;
2895 } else
2896 atomic_inc(&skb->users);
2897 2158
2898 sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid); 2159 sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid);
2899 if (sk == NULL) { 2160 if (sk == NULL) {
@@ -2966,8 +2227,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err)
2966 if (!(nlk->flags & NETLINK_F_CAP_ACK) && err) 2227 if (!(nlk->flags & NETLINK_F_CAP_ACK) && err)
2967 payload += nlmsg_len(nlh); 2228 payload += nlmsg_len(nlh);
2968 2229
2969 skb = netlink_alloc_skb(in_skb->sk, nlmsg_total_size(payload), 2230 skb = nlmsg_new(payload, GFP_KERNEL);
2970 NETLINK_CB(in_skb).portid, GFP_KERNEL);
2971 if (!skb) { 2231 if (!skb) {
2972 struct sock *sk; 2232 struct sock *sk;
2973 2233
@@ -3241,15 +2501,15 @@ static const struct proto_ops netlink_ops = {
3241 .socketpair = sock_no_socketpair, 2501 .socketpair = sock_no_socketpair,
3242 .accept = sock_no_accept, 2502 .accept = sock_no_accept,
3243 .getname = netlink_getname, 2503 .getname = netlink_getname,
3244 .poll = netlink_poll, 2504 .poll = datagram_poll,
3245 .ioctl = sock_no_ioctl, 2505 .ioctl = netlink_ioctl,
3246 .listen = sock_no_listen, 2506 .listen = sock_no_listen,
3247 .shutdown = sock_no_shutdown, 2507 .shutdown = sock_no_shutdown,
3248 .setsockopt = netlink_setsockopt, 2508 .setsockopt = netlink_setsockopt,
3249 .getsockopt = netlink_getsockopt, 2509 .getsockopt = netlink_getsockopt,
3250 .sendmsg = netlink_sendmsg, 2510 .sendmsg = netlink_sendmsg,
3251 .recvmsg = netlink_recvmsg, 2511 .recvmsg = netlink_recvmsg,
3252 .mmap = netlink_mmap, 2512 .mmap = sock_no_mmap,
3253 .sendpage = sock_no_sendpage, 2513 .sendpage = sock_no_sendpage,
3254}; 2514};
3255 2515