summaryrefslogtreecommitdiffstats
path: root/drivers/net/tun.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/tun.c')
-rw-r--r--drivers/net/tun.c160
1 files changed, 144 insertions, 16 deletions
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 987aeefbc774..3a16d4fdaa05 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -22,7 +22,7 @@
22 * Add TUNSETLINK ioctl to set the link encapsulation 22 * Add TUNSETLINK ioctl to set the link encapsulation
23 * 23 *
24 * Mark Smith <markzzzsmith@yahoo.com.au> 24 * Mark Smith <markzzzsmith@yahoo.com.au>
25 * Use random_ether_addr() for tap MAC address. 25 * Use eth_random_addr() for tap MAC address.
26 * 26 *
27 * Harald Roelle <harald.roelle@ifi.lmu.de> 2004/04/20 27 * Harald Roelle <harald.roelle@ifi.lmu.de> 2004/04/20
28 * Fixes in packet dropping, queue length setting and queue wakeup. 28 * Fixes in packet dropping, queue length setting and queue wakeup.
@@ -100,6 +100,8 @@ do { \
100} while (0) 100} while (0)
101#endif 101#endif
102 102
103#define GOODCOPY_LEN 128
104
103#define FLT_EXACT_COUNT 8 105#define FLT_EXACT_COUNT 8
104struct tap_filter { 106struct tap_filter {
105 unsigned int count; /* Number of addrs. Zero means disabled */ 107 unsigned int count; /* Number of addrs. Zero means disabled */
@@ -185,7 +187,6 @@ static void __tun_detach(struct tun_struct *tun)
185 netif_tx_lock_bh(tun->dev); 187 netif_tx_lock_bh(tun->dev);
186 netif_carrier_off(tun->dev); 188 netif_carrier_off(tun->dev);
187 tun->tfile = NULL; 189 tun->tfile = NULL;
188 tun->socket.file = NULL;
189 netif_tx_unlock_bh(tun->dev); 190 netif_tx_unlock_bh(tun->dev);
190 191
191 /* Drop read queue */ 192 /* Drop read queue */
@@ -358,6 +359,8 @@ static void tun_free_netdev(struct net_device *dev)
358{ 359{
359 struct tun_struct *tun = netdev_priv(dev); 360 struct tun_struct *tun = netdev_priv(dev);
360 361
362 BUG_ON(!test_bit(SOCK_EXTERNALLY_ALLOCATED, &tun->socket.flags));
363
361 sk_release_kernel(tun->socket.sk); 364 sk_release_kernel(tun->socket.sk);
362} 365}
363 366
@@ -414,6 +417,8 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
414 417
415 /* Orphan the skb - required as we might hang on to it 418 /* Orphan the skb - required as we might hang on to it
416 * for indefinite time. */ 419 * for indefinite time. */
420 if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC)))
421 goto drop;
417 skb_orphan(skb); 422 skb_orphan(skb);
418 423
419 /* Enqueue packet */ 424 /* Enqueue packet */
@@ -600,19 +605,100 @@ static struct sk_buff *tun_alloc_skb(struct tun_struct *tun,
600 return skb; 605 return skb;
601} 606}
602 607
608/* set skb frags from iovec, this can move to core network code for reuse */
609static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
610 int offset, size_t count)
611{
612 int len = iov_length(from, count) - offset;
613 int copy = skb_headlen(skb);
614 int size, offset1 = 0;
615 int i = 0;
616
617 /* Skip over from offset */
618 while (count && (offset >= from->iov_len)) {
619 offset -= from->iov_len;
620 ++from;
621 --count;
622 }
623
624 /* copy up to skb headlen */
625 while (count && (copy > 0)) {
626 size = min_t(unsigned int, copy, from->iov_len - offset);
627 if (copy_from_user(skb->data + offset1, from->iov_base + offset,
628 size))
629 return -EFAULT;
630 if (copy > size) {
631 ++from;
632 --count;
633 offset = 0;
634 } else
635 offset += size;
636 copy -= size;
637 offset1 += size;
638 }
639
640 if (len == offset1)
641 return 0;
642
643 while (count--) {
644 struct page *page[MAX_SKB_FRAGS];
645 int num_pages;
646 unsigned long base;
647 unsigned long truesize;
648
649 len = from->iov_len - offset;
650 if (!len) {
651 offset = 0;
652 ++from;
653 continue;
654 }
655 base = (unsigned long)from->iov_base + offset;
656 size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT;
657 if (i + size > MAX_SKB_FRAGS)
658 return -EMSGSIZE;
659 num_pages = get_user_pages_fast(base, size, 0, &page[i]);
660 if (num_pages != size) {
661 for (i = 0; i < num_pages; i++)
662 put_page(page[i]);
663 return -EFAULT;
664 }
665 truesize = size * PAGE_SIZE;
666 skb->data_len += len;
667 skb->len += len;
668 skb->truesize += truesize;
669 atomic_add(truesize, &skb->sk->sk_wmem_alloc);
670 while (len) {
671 int off = base & ~PAGE_MASK;
672 int size = min_t(int, len, PAGE_SIZE - off);
673 __skb_fill_page_desc(skb, i, page[i], off, size);
674 skb_shinfo(skb)->nr_frags++;
675 /* increase sk_wmem_alloc */
676 base += size;
677 len -= size;
678 i++;
679 }
680 offset = 0;
681 ++from;
682 }
683 return 0;
684}
685
603/* Get packet from user space buffer */ 686/* Get packet from user space buffer */
604static ssize_t tun_get_user(struct tun_struct *tun, 687static ssize_t tun_get_user(struct tun_struct *tun, void *msg_control,
605 const struct iovec *iv, size_t count, 688 const struct iovec *iv, size_t total_len,
606 int noblock) 689 size_t count, int noblock)
607{ 690{
608 struct tun_pi pi = { 0, cpu_to_be16(ETH_P_IP) }; 691 struct tun_pi pi = { 0, cpu_to_be16(ETH_P_IP) };
609 struct sk_buff *skb; 692 struct sk_buff *skb;
610 size_t len = count, align = NET_SKB_PAD; 693 size_t len = total_len, align = NET_SKB_PAD;
611 struct virtio_net_hdr gso = { 0 }; 694 struct virtio_net_hdr gso = { 0 };
612 int offset = 0; 695 int offset = 0;
696 int copylen;
697 bool zerocopy = false;
698 int err;
613 699
614 if (!(tun->flags & TUN_NO_PI)) { 700 if (!(tun->flags & TUN_NO_PI)) {
615 if ((len -= sizeof(pi)) > count) 701 if ((len -= sizeof(pi)) > total_len)
616 return -EINVAL; 702 return -EINVAL;
617 703
618 if (memcpy_fromiovecend((void *)&pi, iv, 0, sizeof(pi))) 704 if (memcpy_fromiovecend((void *)&pi, iv, 0, sizeof(pi)))
@@ -621,7 +707,7 @@ static ssize_t tun_get_user(struct tun_struct *tun,
621 } 707 }
622 708
623 if (tun->flags & TUN_VNET_HDR) { 709 if (tun->flags & TUN_VNET_HDR) {
624 if ((len -= tun->vnet_hdr_sz) > count) 710 if ((len -= tun->vnet_hdr_sz) > total_len)
625 return -EINVAL; 711 return -EINVAL;
626 712
627 if (memcpy_fromiovecend((void *)&gso, iv, offset, sizeof(gso))) 713 if (memcpy_fromiovecend((void *)&gso, iv, offset, sizeof(gso)))
@@ -643,14 +729,46 @@ static ssize_t tun_get_user(struct tun_struct *tun,
643 return -EINVAL; 729 return -EINVAL;
644 } 730 }
645 731
646 skb = tun_alloc_skb(tun, align, len, gso.hdr_len, noblock); 732 if (msg_control)
733 zerocopy = true;
734
735 if (zerocopy) {
736 /* Userspace may produce vectors with count greater than
737 * MAX_SKB_FRAGS, so we need to linearize parts of the skb
738 * to let the rest of data to be fit in the frags.
739 */
740 if (count > MAX_SKB_FRAGS) {
741 copylen = iov_length(iv, count - MAX_SKB_FRAGS);
742 if (copylen < offset)
743 copylen = 0;
744 else
745 copylen -= offset;
746 } else
747 copylen = 0;
748 /* There are 256 bytes to be copied in skb, so there is enough
749 * room for skb expand head in case it is used.
750 * The rest of the buffer is mapped from userspace.
751 */
752 if (copylen < gso.hdr_len)
753 copylen = gso.hdr_len;
754 if (!copylen)
755 copylen = GOODCOPY_LEN;
756 } else
757 copylen = len;
758
759 skb = tun_alloc_skb(tun, align, copylen, gso.hdr_len, noblock);
647 if (IS_ERR(skb)) { 760 if (IS_ERR(skb)) {
648 if (PTR_ERR(skb) != -EAGAIN) 761 if (PTR_ERR(skb) != -EAGAIN)
649 tun->dev->stats.rx_dropped++; 762 tun->dev->stats.rx_dropped++;
650 return PTR_ERR(skb); 763 return PTR_ERR(skb);
651 } 764 }
652 765
653 if (skb_copy_datagram_from_iovec(skb, 0, iv, offset, len)) { 766 if (zerocopy)
767 err = zerocopy_sg_from_iovec(skb, iv, offset, count);
768 else
769 err = skb_copy_datagram_from_iovec(skb, 0, iv, offset, len);
770
771 if (err) {
654 tun->dev->stats.rx_dropped++; 772 tun->dev->stats.rx_dropped++;
655 kfree_skb(skb); 773 kfree_skb(skb);
656 return -EFAULT; 774 return -EFAULT;
@@ -724,12 +842,18 @@ static ssize_t tun_get_user(struct tun_struct *tun,
724 skb_shinfo(skb)->gso_segs = 0; 842 skb_shinfo(skb)->gso_segs = 0;
725 } 843 }
726 844
845 /* copy skb_ubuf_info for callback when skb has no error */
846 if (zerocopy) {
847 skb_shinfo(skb)->destructor_arg = msg_control;
848 skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
849 }
850
727 netif_rx_ni(skb); 851 netif_rx_ni(skb);
728 852
729 tun->dev->stats.rx_packets++; 853 tun->dev->stats.rx_packets++;
730 tun->dev->stats.rx_bytes += len; 854 tun->dev->stats.rx_bytes += len;
731 855
732 return count; 856 return total_len;
733} 857}
734 858
735static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv, 859static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv,
@@ -744,7 +868,7 @@ static ssize_t tun_chr_aio_write(struct kiocb *iocb, const struct iovec *iv,
744 868
745 tun_debug(KERN_INFO, tun, "tun_chr_write %ld\n", count); 869 tun_debug(KERN_INFO, tun, "tun_chr_write %ld\n", count);
746 870
747 result = tun_get_user(tun, iv, iov_length(iv, count), 871 result = tun_get_user(tun, NULL, iv, iov_length(iv, count), count,
748 file->f_flags & O_NONBLOCK); 872 file->f_flags & O_NONBLOCK);
749 873
750 tun_put(tun); 874 tun_put(tun);
@@ -958,8 +1082,8 @@ static int tun_sendmsg(struct kiocb *iocb, struct socket *sock,
958 struct msghdr *m, size_t total_len) 1082 struct msghdr *m, size_t total_len)
959{ 1083{
960 struct tun_struct *tun = container_of(sock, struct tun_struct, socket); 1084 struct tun_struct *tun = container_of(sock, struct tun_struct, socket);
961 return tun_get_user(tun, m->msg_iov, total_len, 1085 return tun_get_user(tun, m->msg_control, m->msg_iov, total_len,
962 m->msg_flags & MSG_DONTWAIT); 1086 m->msg_iovlen, m->msg_flags & MSG_DONTWAIT);
963} 1087}
964 1088
965static int tun_recvmsg(struct kiocb *iocb, struct socket *sock, 1089static int tun_recvmsg(struct kiocb *iocb, struct socket *sock,
@@ -1115,6 +1239,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
1115 tun->flags = flags; 1239 tun->flags = flags;
1116 tun->txflt.count = 0; 1240 tun->txflt.count = 0;
1117 tun->vnet_hdr_sz = sizeof(struct virtio_net_hdr); 1241 tun->vnet_hdr_sz = sizeof(struct virtio_net_hdr);
1242 set_bit(SOCK_EXTERNALLY_ALLOCATED, &tun->socket.flags);
1118 1243
1119 err = -ENOMEM; 1244 err = -ENOMEM;
1120 sk = sk_alloc(&init_net, AF_UNSPEC, GFP_KERNEL, &tun_proto); 1245 sk = sk_alloc(&init_net, AF_UNSPEC, GFP_KERNEL, &tun_proto);
@@ -1128,6 +1253,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
1128 sock_init_data(&tun->socket, sk); 1253 sock_init_data(&tun->socket, sk);
1129 sk->sk_write_space = tun_sock_write_space; 1254 sk->sk_write_space = tun_sock_write_space;
1130 sk->sk_sndbuf = INT_MAX; 1255 sk->sk_sndbuf = INT_MAX;
1256 sock_set_flag(sk, SOCK_ZEROCOPY);
1131 1257
1132 tun_sk(sk)->tun = tun; 1258 tun_sk(sk)->tun = tun;
1133 1259
@@ -1252,10 +1378,12 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
1252 int vnet_hdr_sz; 1378 int vnet_hdr_sz;
1253 int ret; 1379 int ret;
1254 1380
1255 if (cmd == TUNSETIFF || _IOC_TYPE(cmd) == 0x89) 1381 if (cmd == TUNSETIFF || _IOC_TYPE(cmd) == 0x89) {
1256 if (copy_from_user(&ifr, argp, ifreq_len)) 1382 if (copy_from_user(&ifr, argp, ifreq_len))
1257 return -EFAULT; 1383 return -EFAULT;
1258 1384 } else {
1385 memset(&ifr, 0, sizeof(ifr));
1386 }
1259 if (cmd == TUNGETFEATURES) { 1387 if (cmd == TUNGETFEATURES) {
1260 /* Currently this just means: "what IFF flags are valid?". 1388 /* Currently this just means: "what IFF flags are valid?".
1261 * This is needed because we never checked for invalid flags on 1389 * This is needed because we never checked for invalid flags on