diff options
author | David S. Miller <davem@davemloft.net> | 2012-05-07 23:05:13 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2012-05-07 23:05:26 -0400 |
commit | 6e06c0e2347ec79d0bd5702b2438fe883f784545 (patch) | |
tree | 3f55976275c94822cd406e7edc549f668901affa /drivers | |
parent | 3a084ddb4bf299a6e898a9a07c89f3917f0713f7 (diff) | |
parent | c70aa540c7a9f67add11ad3161096fb95233aa2e (diff) |
Merge branch 'vhost-net-next' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
Michael S. Tsirkin says:
--------------------
There are mostly bugfixes here.
I hope to merge some more patches by 3.5, in particular
vlan support fixes are waiting for Eric's ack,
and a version of tracepoint patch might be
ready in time, but let's merge what's ready so it's testable.
This includes a ton of zerocopy fixes by Jason -
good stuff but too intrusive for 3.4 and zerocopy is experimental
anyway.
virtio supported delayed interrupt for a while now
so adding support to the virtio tool made sense
--------------------
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/net/macvtap.c | 57 | ||||
-rw-r--r-- | drivers/vhost/net.c | 7 | ||||
-rw-r--r-- | drivers/vhost/vhost.c | 1 |
3 files changed, 46 insertions, 19 deletions
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 0427c6561c84..163559c16988 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c | |||
@@ -505,10 +505,11 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from, | |||
505 | if (copy > size) { | 505 | if (copy > size) { |
506 | ++from; | 506 | ++from; |
507 | --count; | 507 | --count; |
508 | } | 508 | offset = 0; |
509 | } else | ||
510 | offset += size; | ||
509 | copy -= size; | 511 | copy -= size; |
510 | offset1 += size; | 512 | offset1 += size; |
511 | offset = 0; | ||
512 | } | 513 | } |
513 | 514 | ||
514 | if (len == offset1) | 515 | if (len == offset1) |
@@ -518,24 +519,29 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from, | |||
518 | struct page *page[MAX_SKB_FRAGS]; | 519 | struct page *page[MAX_SKB_FRAGS]; |
519 | int num_pages; | 520 | int num_pages; |
520 | unsigned long base; | 521 | unsigned long base; |
522 | unsigned long truesize; | ||
521 | 523 | ||
522 | len = from->iov_len - offset1; | 524 | len = from->iov_len - offset; |
523 | if (!len) { | 525 | if (!len) { |
524 | offset1 = 0; | 526 | offset = 0; |
525 | ++from; | 527 | ++from; |
526 | continue; | 528 | continue; |
527 | } | 529 | } |
528 | base = (unsigned long)from->iov_base + offset1; | 530 | base = (unsigned long)from->iov_base + offset; |
529 | size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT; | 531 | size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT; |
532 | if (i + size > MAX_SKB_FRAGS) | ||
533 | return -EMSGSIZE; | ||
530 | num_pages = get_user_pages_fast(base, size, 0, &page[i]); | 534 | num_pages = get_user_pages_fast(base, size, 0, &page[i]); |
531 | if ((num_pages != size) || | 535 | if (num_pages != size) { |
532 | (num_pages > MAX_SKB_FRAGS - skb_shinfo(skb)->nr_frags)) | 536 | for (i = 0; i < num_pages; i++) |
533 | /* put_page is in skb free */ | 537 | put_page(page[i]); |
534 | return -EFAULT; | 538 | return -EFAULT; |
539 | } | ||
540 | truesize = size * PAGE_SIZE; | ||
535 | skb->data_len += len; | 541 | skb->data_len += len; |
536 | skb->len += len; | 542 | skb->len += len; |
537 | skb->truesize += len; | 543 | skb->truesize += truesize; |
538 | atomic_add(len, &skb->sk->sk_wmem_alloc); | 544 | atomic_add(truesize, &skb->sk->sk_wmem_alloc); |
539 | while (len) { | 545 | while (len) { |
540 | int off = base & ~PAGE_MASK; | 546 | int off = base & ~PAGE_MASK; |
541 | int size = min_t(int, len, PAGE_SIZE - off); | 547 | int size = min_t(int, len, PAGE_SIZE - off); |
@@ -546,7 +552,7 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from, | |||
546 | len -= size; | 552 | len -= size; |
547 | i++; | 553 | i++; |
548 | } | 554 | } |
549 | offset1 = 0; | 555 | offset = 0; |
550 | ++from; | 556 | ++from; |
551 | } | 557 | } |
552 | return 0; | 558 | return 0; |
@@ -646,7 +652,7 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, | |||
646 | int err; | 652 | int err; |
647 | struct virtio_net_hdr vnet_hdr = { 0 }; | 653 | struct virtio_net_hdr vnet_hdr = { 0 }; |
648 | int vnet_hdr_len = 0; | 654 | int vnet_hdr_len = 0; |
649 | int copylen; | 655 | int copylen = 0; |
650 | bool zerocopy = false; | 656 | bool zerocopy = false; |
651 | 657 | ||
652 | if (q->flags & IFF_VNET_HDR) { | 658 | if (q->flags & IFF_VNET_HDR) { |
@@ -675,15 +681,31 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, | |||
675 | if (unlikely(len < ETH_HLEN)) | 681 | if (unlikely(len < ETH_HLEN)) |
676 | goto err; | 682 | goto err; |
677 | 683 | ||
684 | err = -EMSGSIZE; | ||
685 | if (unlikely(count > UIO_MAXIOV)) | ||
686 | goto err; | ||
687 | |||
678 | if (m && m->msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY)) | 688 | if (m && m->msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY)) |
679 | zerocopy = true; | 689 | zerocopy = true; |
680 | 690 | ||
681 | if (zerocopy) { | 691 | if (zerocopy) { |
692 | /* Userspace may produce vectors with count greater than | ||
693 | * MAX_SKB_FRAGS, so we need to linearize parts of the skb | ||
694 | * to let the rest of data to be fit in the frags. | ||
695 | */ | ||
696 | if (count > MAX_SKB_FRAGS) { | ||
697 | copylen = iov_length(iv, count - MAX_SKB_FRAGS); | ||
698 | if (copylen < vnet_hdr_len) | ||
699 | copylen = 0; | ||
700 | else | ||
701 | copylen -= vnet_hdr_len; | ||
702 | } | ||
682 | /* There are 256 bytes to be copied in skb, so there is enough | 703 | /* There are 256 bytes to be copied in skb, so there is enough |
683 | * room for skb expand head in case it is used. | 704 | * room for skb expand head in case it is used. |
684 | * The rest buffer is mapped from userspace. | 705 | * The rest buffer is mapped from userspace. |
685 | */ | 706 | */ |
686 | copylen = vnet_hdr.hdr_len; | 707 | if (copylen < vnet_hdr.hdr_len) |
708 | copylen = vnet_hdr.hdr_len; | ||
687 | if (!copylen) | 709 | if (!copylen) |
688 | copylen = GOODCOPY_LEN; | 710 | copylen = GOODCOPY_LEN; |
689 | } else | 711 | } else |
@@ -694,10 +716,9 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, | |||
694 | if (!skb) | 716 | if (!skb) |
695 | goto err; | 717 | goto err; |
696 | 718 | ||
697 | if (zerocopy) { | 719 | if (zerocopy) |
698 | err = zerocopy_sg_from_iovec(skb, iv, vnet_hdr_len, count); | 720 | err = zerocopy_sg_from_iovec(skb, iv, vnet_hdr_len, count); |
699 | skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY; | 721 | else |
700 | } else | ||
701 | err = skb_copy_datagram_from_iovec(skb, 0, iv, vnet_hdr_len, | 722 | err = skb_copy_datagram_from_iovec(skb, 0, iv, vnet_hdr_len, |
702 | len); | 723 | len); |
703 | if (err) | 724 | if (err) |
@@ -716,8 +737,10 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, | |||
716 | rcu_read_lock_bh(); | 737 | rcu_read_lock_bh(); |
717 | vlan = rcu_dereference_bh(q->vlan); | 738 | vlan = rcu_dereference_bh(q->vlan); |
718 | /* copy skb_ubuf_info for callback when skb has no error */ | 739 | /* copy skb_ubuf_info for callback when skb has no error */ |
719 | if (zerocopy) | 740 | if (zerocopy) { |
720 | skb_shinfo(skb)->destructor_arg = m->msg_control; | 741 | skb_shinfo(skb)->destructor_arg = m->msg_control; |
742 | skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY; | ||
743 | } | ||
721 | if (vlan) | 744 | if (vlan) |
722 | macvlan_start_xmit(skb, vlan->dev); | 745 | macvlan_start_xmit(skb, vlan->dev); |
723 | else | 746 | else |
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 1f21d2a1e528..853db7a08a26 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c | |||
@@ -166,7 +166,7 @@ static void handle_tx(struct vhost_net *net) | |||
166 | if (wmem < sock->sk->sk_sndbuf / 2) | 166 | if (wmem < sock->sk->sk_sndbuf / 2) |
167 | tx_poll_stop(net); | 167 | tx_poll_stop(net); |
168 | hdr_size = vq->vhost_hlen; | 168 | hdr_size = vq->vhost_hlen; |
169 | zcopy = vhost_sock_zcopy(sock); | 169 | zcopy = vq->ubufs; |
170 | 170 | ||
171 | for (;;) { | 171 | for (;;) { |
172 | /* Release DMAs done buffers first */ | 172 | /* Release DMAs done buffers first */ |
@@ -257,7 +257,8 @@ static void handle_tx(struct vhost_net *net) | |||
257 | UIO_MAXIOV; | 257 | UIO_MAXIOV; |
258 | } | 258 | } |
259 | vhost_discard_vq_desc(vq, 1); | 259 | vhost_discard_vq_desc(vq, 1); |
260 | tx_poll_start(net, sock); | 260 | if (err == -EAGAIN || err == -ENOBUFS) |
261 | tx_poll_start(net, sock); | ||
261 | break; | 262 | break; |
262 | } | 263 | } |
263 | if (err != len) | 264 | if (err != len) |
@@ -265,6 +266,8 @@ static void handle_tx(struct vhost_net *net) | |||
265 | " len %d != %zd\n", err, len); | 266 | " len %d != %zd\n", err, len); |
266 | if (!zcopy) | 267 | if (!zcopy) |
267 | vhost_add_used_and_signal(&net->dev, vq, head, 0); | 268 | vhost_add_used_and_signal(&net->dev, vq, head, 0); |
269 | else | ||
270 | vhost_zerocopy_signal_used(vq); | ||
268 | total_len += len; | 271 | total_len += len; |
269 | if (unlikely(total_len >= VHOST_NET_WEIGHT)) { | 272 | if (unlikely(total_len >= VHOST_NET_WEIGHT)) { |
270 | vhost_poll_queue(&vq->poll); | 273 | vhost_poll_queue(&vq->poll); |
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 51e4c1eeec4f..94dbd25caa30 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c | |||
@@ -1603,6 +1603,7 @@ void vhost_zerocopy_callback(struct ubuf_info *ubuf) | |||
1603 | struct vhost_ubuf_ref *ubufs = ubuf->ctx; | 1603 | struct vhost_ubuf_ref *ubufs = ubuf->ctx; |
1604 | struct vhost_virtqueue *vq = ubufs->vq; | 1604 | struct vhost_virtqueue *vq = ubufs->vq; |
1605 | 1605 | ||
1606 | vhost_poll_queue(&vq->poll); | ||
1606 | /* set len = 1 to mark this desc buffers done DMA */ | 1607 | /* set len = 1 to mark this desc buffers done DMA */ |
1607 | vq->heads[ubuf->desc].len = VHOST_DMA_DONE_LEN; | 1608 | vq->heads[ubuf->desc].len = VHOST_DMA_DONE_LEN; |
1608 | kref_put(&ubufs->kref, vhost_zerocopy_done_signal); | 1609 | kref_put(&ubufs->kref, vhost_zerocopy_done_signal); |