aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2012-05-07 23:05:13 -0400
committerDavid S. Miller <davem@davemloft.net>2012-05-07 23:05:26 -0400
commit6e06c0e2347ec79d0bd5702b2438fe883f784545 (patch)
tree3f55976275c94822cd406e7edc549f668901affa /drivers
parent3a084ddb4bf299a6e898a9a07c89f3917f0713f7 (diff)
parentc70aa540c7a9f67add11ad3161096fb95233aa2e (diff)
Merge branch 'vhost-net-next' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
Michael S. Tsirkin says: -------------------- There are mostly bugfixes here. I hope to merge some more patches by 3.5, in particular vlan support fixes are waiting for Eric's ack, and a version of tracepoint patch might be ready in time, but let's merge what's ready so it's testable. This includes a ton of zerocopy fixes by Jason - good stuff but too intrusive for 3.4 and zerocopy is experimental anyway. virtio supported delayed interrupt for a while now so adding support to the virtio tool made sense -------------------- Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/net/macvtap.c57
-rw-r--r--drivers/vhost/net.c7
-rw-r--r--drivers/vhost/vhost.c1
3 files changed, 46 insertions, 19 deletions
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 0427c6561c84..163559c16988 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -505,10 +505,11 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
505 if (copy > size) { 505 if (copy > size) {
506 ++from; 506 ++from;
507 --count; 507 --count;
508 } 508 offset = 0;
509 } else
510 offset += size;
509 copy -= size; 511 copy -= size;
510 offset1 += size; 512 offset1 += size;
511 offset = 0;
512 } 513 }
513 514
514 if (len == offset1) 515 if (len == offset1)
@@ -518,24 +519,29 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
518 struct page *page[MAX_SKB_FRAGS]; 519 struct page *page[MAX_SKB_FRAGS];
519 int num_pages; 520 int num_pages;
520 unsigned long base; 521 unsigned long base;
522 unsigned long truesize;
521 523
522 len = from->iov_len - offset1; 524 len = from->iov_len - offset;
523 if (!len) { 525 if (!len) {
524 offset1 = 0; 526 offset = 0;
525 ++from; 527 ++from;
526 continue; 528 continue;
527 } 529 }
528 base = (unsigned long)from->iov_base + offset1; 530 base = (unsigned long)from->iov_base + offset;
529 size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT; 531 size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT;
532 if (i + size > MAX_SKB_FRAGS)
533 return -EMSGSIZE;
530 num_pages = get_user_pages_fast(base, size, 0, &page[i]); 534 num_pages = get_user_pages_fast(base, size, 0, &page[i]);
531 if ((num_pages != size) || 535 if (num_pages != size) {
532 (num_pages > MAX_SKB_FRAGS - skb_shinfo(skb)->nr_frags)) 536 for (i = 0; i < num_pages; i++)
533 /* put_page is in skb free */ 537 put_page(page[i]);
534 return -EFAULT; 538 return -EFAULT;
539 }
540 truesize = size * PAGE_SIZE;
535 skb->data_len += len; 541 skb->data_len += len;
536 skb->len += len; 542 skb->len += len;
537 skb->truesize += len; 543 skb->truesize += truesize;
538 atomic_add(len, &skb->sk->sk_wmem_alloc); 544 atomic_add(truesize, &skb->sk->sk_wmem_alloc);
539 while (len) { 545 while (len) {
540 int off = base & ~PAGE_MASK; 546 int off = base & ~PAGE_MASK;
541 int size = min_t(int, len, PAGE_SIZE - off); 547 int size = min_t(int, len, PAGE_SIZE - off);
@@ -546,7 +552,7 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
546 len -= size; 552 len -= size;
547 i++; 553 i++;
548 } 554 }
549 offset1 = 0; 555 offset = 0;
550 ++from; 556 ++from;
551 } 557 }
552 return 0; 558 return 0;
@@ -646,7 +652,7 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
646 int err; 652 int err;
647 struct virtio_net_hdr vnet_hdr = { 0 }; 653 struct virtio_net_hdr vnet_hdr = { 0 };
648 int vnet_hdr_len = 0; 654 int vnet_hdr_len = 0;
649 int copylen; 655 int copylen = 0;
650 bool zerocopy = false; 656 bool zerocopy = false;
651 657
652 if (q->flags & IFF_VNET_HDR) { 658 if (q->flags & IFF_VNET_HDR) {
@@ -675,15 +681,31 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
675 if (unlikely(len < ETH_HLEN)) 681 if (unlikely(len < ETH_HLEN))
676 goto err; 682 goto err;
677 683
684 err = -EMSGSIZE;
685 if (unlikely(count > UIO_MAXIOV))
686 goto err;
687
678 if (m && m->msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY)) 688 if (m && m->msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY))
679 zerocopy = true; 689 zerocopy = true;
680 690
681 if (zerocopy) { 691 if (zerocopy) {
692 /* Userspace may produce vectors with count greater than
693 * MAX_SKB_FRAGS, so we need to linearize parts of the skb
694 * to let the rest of data to be fit in the frags.
695 */
696 if (count > MAX_SKB_FRAGS) {
697 copylen = iov_length(iv, count - MAX_SKB_FRAGS);
698 if (copylen < vnet_hdr_len)
699 copylen = 0;
700 else
701 copylen -= vnet_hdr_len;
702 }
682 /* There are 256 bytes to be copied in skb, so there is enough 703 /* There are 256 bytes to be copied in skb, so there is enough
683 * room for skb expand head in case it is used. 704 * room for skb expand head in case it is used.
684 * The rest buffer is mapped from userspace. 705 * The rest buffer is mapped from userspace.
685 */ 706 */
686 copylen = vnet_hdr.hdr_len; 707 if (copylen < vnet_hdr.hdr_len)
708 copylen = vnet_hdr.hdr_len;
687 if (!copylen) 709 if (!copylen)
688 copylen = GOODCOPY_LEN; 710 copylen = GOODCOPY_LEN;
689 } else 711 } else
@@ -694,10 +716,9 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
694 if (!skb) 716 if (!skb)
695 goto err; 717 goto err;
696 718
697 if (zerocopy) { 719 if (zerocopy)
698 err = zerocopy_sg_from_iovec(skb, iv, vnet_hdr_len, count); 720 err = zerocopy_sg_from_iovec(skb, iv, vnet_hdr_len, count);
699 skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY; 721 else
700 } else
701 err = skb_copy_datagram_from_iovec(skb, 0, iv, vnet_hdr_len, 722 err = skb_copy_datagram_from_iovec(skb, 0, iv, vnet_hdr_len,
702 len); 723 len);
703 if (err) 724 if (err)
@@ -716,8 +737,10 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
716 rcu_read_lock_bh(); 737 rcu_read_lock_bh();
717 vlan = rcu_dereference_bh(q->vlan); 738 vlan = rcu_dereference_bh(q->vlan);
718 /* copy skb_ubuf_info for callback when skb has no error */ 739 /* copy skb_ubuf_info for callback when skb has no error */
719 if (zerocopy) 740 if (zerocopy) {
720 skb_shinfo(skb)->destructor_arg = m->msg_control; 741 skb_shinfo(skb)->destructor_arg = m->msg_control;
742 skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
743 }
721 if (vlan) 744 if (vlan)
722 macvlan_start_xmit(skb, vlan->dev); 745 macvlan_start_xmit(skb, vlan->dev);
723 else 746 else
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 1f21d2a1e528..853db7a08a26 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -166,7 +166,7 @@ static void handle_tx(struct vhost_net *net)
166 if (wmem < sock->sk->sk_sndbuf / 2) 166 if (wmem < sock->sk->sk_sndbuf / 2)
167 tx_poll_stop(net); 167 tx_poll_stop(net);
168 hdr_size = vq->vhost_hlen; 168 hdr_size = vq->vhost_hlen;
169 zcopy = vhost_sock_zcopy(sock); 169 zcopy = vq->ubufs;
170 170
171 for (;;) { 171 for (;;) {
172 /* Release DMAs done buffers first */ 172 /* Release DMAs done buffers first */
@@ -257,7 +257,8 @@ static void handle_tx(struct vhost_net *net)
257 UIO_MAXIOV; 257 UIO_MAXIOV;
258 } 258 }
259 vhost_discard_vq_desc(vq, 1); 259 vhost_discard_vq_desc(vq, 1);
260 tx_poll_start(net, sock); 260 if (err == -EAGAIN || err == -ENOBUFS)
261 tx_poll_start(net, sock);
261 break; 262 break;
262 } 263 }
263 if (err != len) 264 if (err != len)
@@ -265,6 +266,8 @@ static void handle_tx(struct vhost_net *net)
265 " len %d != %zd\n", err, len); 266 " len %d != %zd\n", err, len);
266 if (!zcopy) 267 if (!zcopy)
267 vhost_add_used_and_signal(&net->dev, vq, head, 0); 268 vhost_add_used_and_signal(&net->dev, vq, head, 0);
269 else
270 vhost_zerocopy_signal_used(vq);
268 total_len += len; 271 total_len += len;
269 if (unlikely(total_len >= VHOST_NET_WEIGHT)) { 272 if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
270 vhost_poll_queue(&vq->poll); 273 vhost_poll_queue(&vq->poll);
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 51e4c1eeec4f..94dbd25caa30 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -1603,6 +1603,7 @@ void vhost_zerocopy_callback(struct ubuf_info *ubuf)
1603 struct vhost_ubuf_ref *ubufs = ubuf->ctx; 1603 struct vhost_ubuf_ref *ubufs = ubuf->ctx;
1604 struct vhost_virtqueue *vq = ubufs->vq; 1604 struct vhost_virtqueue *vq = ubufs->vq;
1605 1605
1606 vhost_poll_queue(&vq->poll);
1606 /* set len = 1 to mark this desc buffers done DMA */ 1607 /* set len = 1 to mark this desc buffers done DMA */
1607 vq->heads[ubuf->desc].len = VHOST_DMA_DONE_LEN; 1608 vq->heads[ubuf->desc].len = VHOST_DMA_DONE_LEN;
1608 kref_put(&ubufs->kref, vhost_zerocopy_done_signal); 1609 kref_put(&ubufs->kref, vhost_zerocopy_done_signal);