aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/macvtap.c
diff options
context:
space:
mode:
authorShirley Ma <mashirle@us.ibm.com>2011-07-06 08:26:11 -0400
committerDavid S. Miller <davem@davemloft.net>2011-07-07 07:41:24 -0400
commit97bc3633bec7ed0fdfbda6b9cf86c51e4f58f8e2 (patch)
tree4a7c3df9d75e1b3b635a91069cbaeb80c7a4fed6 /drivers/net/macvtap.c
parenta6686f2f382b13f8a7253401a66690c3633b6a74 (diff)
macvtap: macvtapTX zero-copy support
Only 128 bytes is copied, the rest of data is DMA mapped directly from userspace. Signed-off-by: Shirley Ma <xma@...ibm.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/macvtap.c')
-rw-r--r--drivers/net/macvtap.c132
1 files changed, 121 insertions, 11 deletions
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index ecee0fe65a97..ab96c319a240 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -60,6 +60,7 @@ static struct proto macvtap_proto = {
60 */ 60 */
61static dev_t macvtap_major; 61static dev_t macvtap_major;
62#define MACVTAP_NUM_DEVS 65536 62#define MACVTAP_NUM_DEVS 65536
63#define GOODCOPY_LEN 128
63static struct class *macvtap_class; 64static struct class *macvtap_class;
64static struct cdev macvtap_cdev; 65static struct cdev macvtap_cdev;
65 66
@@ -340,6 +341,7 @@ static int macvtap_open(struct inode *inode, struct file *file)
340{ 341{
341 struct net *net = current->nsproxy->net_ns; 342 struct net *net = current->nsproxy->net_ns;
342 struct net_device *dev = dev_get_by_index(net, iminor(inode)); 343 struct net_device *dev = dev_get_by_index(net, iminor(inode));
344 struct macvlan_dev *vlan = netdev_priv(dev);
343 struct macvtap_queue *q; 345 struct macvtap_queue *q;
344 int err; 346 int err;
345 347
@@ -369,6 +371,16 @@ static int macvtap_open(struct inode *inode, struct file *file)
369 q->flags = IFF_VNET_HDR | IFF_NO_PI | IFF_TAP; 371 q->flags = IFF_VNET_HDR | IFF_NO_PI | IFF_TAP;
370 q->vnet_hdr_sz = sizeof(struct virtio_net_hdr); 372 q->vnet_hdr_sz = sizeof(struct virtio_net_hdr);
371 373
374 /*
375 * so far only KVM virtio_net uses macvtap, enable zero copy between
376 * guest kernel and host kernel when lower device supports zerocopy
377 */
378 if (vlan) {
379 if ((vlan->lowerdev->features & NETIF_F_HIGHDMA) &&
380 (vlan->lowerdev->features & NETIF_F_SG))
381 sock_set_flag(&q->sk, SOCK_ZEROCOPY);
382 }
383
372 err = macvtap_set_queue(dev, file, q); 384 err = macvtap_set_queue(dev, file, q);
373 if (err) 385 if (err)
374 sock_put(&q->sk); 386 sock_put(&q->sk);
@@ -433,6 +445,80 @@ static inline struct sk_buff *macvtap_alloc_skb(struct sock *sk, size_t prepad,
433 return skb; 445 return skb;
434} 446}
435 447
448/* set skb frags from iovec, this can move to core network code for reuse */
449static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
450 int offset, size_t count)
451{
452 int len = iov_length(from, count) - offset;
453 int copy = skb_headlen(skb);
454 int size, offset1 = 0;
455 int i = 0;
456 skb_frag_t *f;
457
458 /* Skip over from offset */
459 while (count && (offset >= from->iov_len)) {
460 offset -= from->iov_len;
461 ++from;
462 --count;
463 }
464
465 /* copy up to skb headlen */
466 while (count && (copy > 0)) {
467 size = min_t(unsigned int, copy, from->iov_len - offset);
468 if (copy_from_user(skb->data + offset1, from->iov_base + offset,
469 size))
470 return -EFAULT;
471 if (copy > size) {
472 ++from;
473 --count;
474 }
475 copy -= size;
476 offset1 += size;
477 offset = 0;
478 }
479
480 if (len == offset1)
481 return 0;
482
483 while (count--) {
484 struct page *page[MAX_SKB_FRAGS];
485 int num_pages;
486 unsigned long base;
487
488 len = from->iov_len - offset1;
489 if (!len) {
490 offset1 = 0;
491 ++from;
492 continue;
493 }
494 base = (unsigned long)from->iov_base + offset1;
495 size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT;
496 num_pages = get_user_pages_fast(base, size, 0, &page[i]);
497 if ((num_pages != size) ||
498 (num_pages > MAX_SKB_FRAGS - skb_shinfo(skb)->nr_frags))
499 /* put_page is in skb free */
500 return -EFAULT;
501 skb->data_len += len;
502 skb->len += len;
503 skb->truesize += len;
504 atomic_add(len, &skb->sk->sk_wmem_alloc);
505 while (len) {
506 f = &skb_shinfo(skb)->frags[i];
507 f->page = page[i];
508 f->page_offset = base & ~PAGE_MASK;
509 f->size = min_t(int, len, PAGE_SIZE - f->page_offset);
510 skb_shinfo(skb)->nr_frags++;
511 /* increase sk_wmem_alloc */
512 base += f->size;
513 len -= f->size;
514 i++;
515 }
516 offset1 = 0;
517 ++from;
518 }
519 return 0;
520}
521
436/* 522/*
437 * macvtap_skb_from_vnet_hdr and macvtap_skb_to_vnet_hdr should 523 * macvtap_skb_from_vnet_hdr and macvtap_skb_to_vnet_hdr should
438 * be shared with the tun/tap driver. 524 * be shared with the tun/tap driver.
@@ -517,16 +603,18 @@ static int macvtap_skb_to_vnet_hdr(const struct sk_buff *skb,
517 603
518 604
519/* Get packet from user space buffer */ 605/* Get packet from user space buffer */
520static ssize_t macvtap_get_user(struct macvtap_queue *q, 606static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
521 const struct iovec *iv, size_t count, 607 const struct iovec *iv, unsigned long total_len,
522 int noblock) 608 size_t count, int noblock)
523{ 609{
524 struct sk_buff *skb; 610 struct sk_buff *skb;
525 struct macvlan_dev *vlan; 611 struct macvlan_dev *vlan;
526 size_t len = count; 612 unsigned long len = total_len;
527 int err; 613 int err;
528 struct virtio_net_hdr vnet_hdr = { 0 }; 614 struct virtio_net_hdr vnet_hdr = { 0 };
529 int vnet_hdr_len = 0; 615 int vnet_hdr_len = 0;
616 int copylen;
617 bool zerocopy = false;
530 618
531 if (q->flags & IFF_VNET_HDR) { 619 if (q->flags & IFF_VNET_HDR) {
532 vnet_hdr_len = q->vnet_hdr_sz; 620 vnet_hdr_len = q->vnet_hdr_sz;
@@ -554,12 +642,31 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q,
554 if (unlikely(len < ETH_HLEN)) 642 if (unlikely(len < ETH_HLEN))
555 goto err; 643 goto err;
556 644
557 skb = macvtap_alloc_skb(&q->sk, NET_IP_ALIGN, len, vnet_hdr.hdr_len, 645 if (m && m->msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY))
558 noblock, &err); 646 zerocopy = true;
647
648 if (zerocopy) {
649 /* There are 256 bytes to be copied in skb, so there is enough
650 * room for skb expand head in case it is used.
651 * The rest buffer is mapped from userspace.
652 */
653 copylen = vnet_hdr.hdr_len;
654 if (!copylen)
655 copylen = GOODCOPY_LEN;
656 } else
657 copylen = len;
658
659 skb = macvtap_alloc_skb(&q->sk, NET_IP_ALIGN, copylen,
660 vnet_hdr.hdr_len, noblock, &err);
559 if (!skb) 661 if (!skb)
560 goto err; 662 goto err;
561 663
562 err = skb_copy_datagram_from_iovec(skb, 0, iv, vnet_hdr_len, len); 664 if (zerocopy) {
665 err = zerocopy_sg_from_iovec(skb, iv, vnet_hdr_len, count);
666 skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
667 } else
668 err = skb_copy_datagram_from_iovec(skb, 0, iv, vnet_hdr_len,
669 len);
563 if (err) 670 if (err)
564 goto err_kfree; 671 goto err_kfree;
565 672
@@ -575,13 +682,16 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q,
575 682
576 rcu_read_lock_bh(); 683 rcu_read_lock_bh();
577 vlan = rcu_dereference_bh(q->vlan); 684 vlan = rcu_dereference_bh(q->vlan);
685 /* copy skb_ubuf_info for callback when skb has no error */
686 if (zerocopy)
687 skb_shinfo(skb)->destructor_arg = m->msg_control;
578 if (vlan) 688 if (vlan)
579 macvlan_start_xmit(skb, vlan->dev); 689 macvlan_start_xmit(skb, vlan->dev);
580 else 690 else
581 kfree_skb(skb); 691 kfree_skb(skb);
582 rcu_read_unlock_bh(); 692 rcu_read_unlock_bh();
583 693
584 return count; 694 return total_len;
585 695
586err_kfree: 696err_kfree:
587 kfree_skb(skb); 697 kfree_skb(skb);
@@ -603,8 +713,8 @@ static ssize_t macvtap_aio_write(struct kiocb *iocb, const struct iovec *iv,
603 ssize_t result = -ENOLINK; 713 ssize_t result = -ENOLINK;
604 struct macvtap_queue *q = file->private_data; 714 struct macvtap_queue *q = file->private_data;
605 715
606 result = macvtap_get_user(q, iv, iov_length(iv, count), 716 result = macvtap_get_user(q, NULL, iv, iov_length(iv, count), count,
607 file->f_flags & O_NONBLOCK); 717 file->f_flags & O_NONBLOCK);
608 return result; 718 return result;
609} 719}
610 720
@@ -817,7 +927,7 @@ static int macvtap_sendmsg(struct kiocb *iocb, struct socket *sock,
817 struct msghdr *m, size_t total_len) 927 struct msghdr *m, size_t total_len)
818{ 928{
819 struct macvtap_queue *q = container_of(sock, struct macvtap_queue, sock); 929 struct macvtap_queue *q = container_of(sock, struct macvtap_queue, sock);
820 return macvtap_get_user(q, m->msg_iov, total_len, 930 return macvtap_get_user(q, m, m->msg_iov, total_len, m->msg_iovlen,
821 m->msg_flags & MSG_DONTWAIT); 931 m->msg_flags & MSG_DONTWAIT);
822} 932}
823 933