aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/net/macvtap.c57
-rw-r--r--drivers/vhost/net.c7
-rw-r--r--drivers/vhost/vhost.c1
-rw-r--r--tools/virtio/linux/virtio.h1
-rw-r--r--tools/virtio/virtio_test.c26
5 files changed, 69 insertions, 23 deletions
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 0427c6561c84..163559c16988 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -505,10 +505,11 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
505 if (copy > size) { 505 if (copy > size) {
506 ++from; 506 ++from;
507 --count; 507 --count;
508 } 508 offset = 0;
509 } else
510 offset += size;
509 copy -= size; 511 copy -= size;
510 offset1 += size; 512 offset1 += size;
511 offset = 0;
512 } 513 }
513 514
514 if (len == offset1) 515 if (len == offset1)
@@ -518,24 +519,29 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
518 struct page *page[MAX_SKB_FRAGS]; 519 struct page *page[MAX_SKB_FRAGS];
519 int num_pages; 520 int num_pages;
520 unsigned long base; 521 unsigned long base;
522 unsigned long truesize;
521 523
522 len = from->iov_len - offset1; 524 len = from->iov_len - offset;
523 if (!len) { 525 if (!len) {
524 offset1 = 0; 526 offset = 0;
525 ++from; 527 ++from;
526 continue; 528 continue;
527 } 529 }
528 base = (unsigned long)from->iov_base + offset1; 530 base = (unsigned long)from->iov_base + offset;
529 size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT; 531 size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT;
532 if (i + size > MAX_SKB_FRAGS)
533 return -EMSGSIZE;
530 num_pages = get_user_pages_fast(base, size, 0, &page[i]); 534 num_pages = get_user_pages_fast(base, size, 0, &page[i]);
531 if ((num_pages != size) || 535 if (num_pages != size) {
532 (num_pages > MAX_SKB_FRAGS - skb_shinfo(skb)->nr_frags)) 536 for (i = 0; i < num_pages; i++)
533 /* put_page is in skb free */ 537 put_page(page[i]);
534 return -EFAULT; 538 return -EFAULT;
539 }
540 truesize = size * PAGE_SIZE;
535 skb->data_len += len; 541 skb->data_len += len;
536 skb->len += len; 542 skb->len += len;
537 skb->truesize += len; 543 skb->truesize += truesize;
538 atomic_add(len, &skb->sk->sk_wmem_alloc); 544 atomic_add(truesize, &skb->sk->sk_wmem_alloc);
539 while (len) { 545 while (len) {
540 int off = base & ~PAGE_MASK; 546 int off = base & ~PAGE_MASK;
541 int size = min_t(int, len, PAGE_SIZE - off); 547 int size = min_t(int, len, PAGE_SIZE - off);
@@ -546,7 +552,7 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
546 len -= size; 552 len -= size;
547 i++; 553 i++;
548 } 554 }
549 offset1 = 0; 555 offset = 0;
550 ++from; 556 ++from;
551 } 557 }
552 return 0; 558 return 0;
@@ -646,7 +652,7 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
646 int err; 652 int err;
647 struct virtio_net_hdr vnet_hdr = { 0 }; 653 struct virtio_net_hdr vnet_hdr = { 0 };
648 int vnet_hdr_len = 0; 654 int vnet_hdr_len = 0;
649 int copylen; 655 int copylen = 0;
650 bool zerocopy = false; 656 bool zerocopy = false;
651 657
652 if (q->flags & IFF_VNET_HDR) { 658 if (q->flags & IFF_VNET_HDR) {
@@ -675,15 +681,31 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
675 if (unlikely(len < ETH_HLEN)) 681 if (unlikely(len < ETH_HLEN))
676 goto err; 682 goto err;
677 683
684 err = -EMSGSIZE;
685 if (unlikely(count > UIO_MAXIOV))
686 goto err;
687
678 if (m && m->msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY)) 688 if (m && m->msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY))
679 zerocopy = true; 689 zerocopy = true;
680 690
681 if (zerocopy) { 691 if (zerocopy) {
692 /* Userspace may produce vectors with count greater than
693 * MAX_SKB_FRAGS, so we need to linearize parts of the skb
694 * to let the rest of data to be fit in the frags.
695 */
696 if (count > MAX_SKB_FRAGS) {
697 copylen = iov_length(iv, count - MAX_SKB_FRAGS);
698 if (copylen < vnet_hdr_len)
699 copylen = 0;
700 else
701 copylen -= vnet_hdr_len;
702 }
682 /* There are 256 bytes to be copied in skb, so there is enough 703 /* There are 256 bytes to be copied in skb, so there is enough
683 * room for skb expand head in case it is used. 704 * room for skb expand head in case it is used.
684 * The rest buffer is mapped from userspace. 705 * The rest buffer is mapped from userspace.
685 */ 706 */
686 copylen = vnet_hdr.hdr_len; 707 if (copylen < vnet_hdr.hdr_len)
708 copylen = vnet_hdr.hdr_len;
687 if (!copylen) 709 if (!copylen)
688 copylen = GOODCOPY_LEN; 710 copylen = GOODCOPY_LEN;
689 } else 711 } else
@@ -694,10 +716,9 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
694 if (!skb) 716 if (!skb)
695 goto err; 717 goto err;
696 718
697 if (zerocopy) { 719 if (zerocopy)
698 err = zerocopy_sg_from_iovec(skb, iv, vnet_hdr_len, count); 720 err = zerocopy_sg_from_iovec(skb, iv, vnet_hdr_len, count);
699 skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY; 721 else
700 } else
701 err = skb_copy_datagram_from_iovec(skb, 0, iv, vnet_hdr_len, 722 err = skb_copy_datagram_from_iovec(skb, 0, iv, vnet_hdr_len,
702 len); 723 len);
703 if (err) 724 if (err)
@@ -716,8 +737,10 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m,
716 rcu_read_lock_bh(); 737 rcu_read_lock_bh();
717 vlan = rcu_dereference_bh(q->vlan); 738 vlan = rcu_dereference_bh(q->vlan);
718 /* copy skb_ubuf_info for callback when skb has no error */ 739 /* copy skb_ubuf_info for callback when skb has no error */
719 if (zerocopy) 740 if (zerocopy) {
720 skb_shinfo(skb)->destructor_arg = m->msg_control; 741 skb_shinfo(skb)->destructor_arg = m->msg_control;
742 skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY;
743 }
721 if (vlan) 744 if (vlan)
722 macvlan_start_xmit(skb, vlan->dev); 745 macvlan_start_xmit(skb, vlan->dev);
723 else 746 else
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 1f21d2a1e528..853db7a08a26 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -166,7 +166,7 @@ static void handle_tx(struct vhost_net *net)
166 if (wmem < sock->sk->sk_sndbuf / 2) 166 if (wmem < sock->sk->sk_sndbuf / 2)
167 tx_poll_stop(net); 167 tx_poll_stop(net);
168 hdr_size = vq->vhost_hlen; 168 hdr_size = vq->vhost_hlen;
169 zcopy = vhost_sock_zcopy(sock); 169 zcopy = vq->ubufs;
170 170
171 for (;;) { 171 for (;;) {
172 /* Release DMAs done buffers first */ 172 /* Release DMAs done buffers first */
@@ -257,7 +257,8 @@ static void handle_tx(struct vhost_net *net)
257 UIO_MAXIOV; 257 UIO_MAXIOV;
258 } 258 }
259 vhost_discard_vq_desc(vq, 1); 259 vhost_discard_vq_desc(vq, 1);
260 tx_poll_start(net, sock); 260 if (err == -EAGAIN || err == -ENOBUFS)
261 tx_poll_start(net, sock);
261 break; 262 break;
262 } 263 }
263 if (err != len) 264 if (err != len)
@@ -265,6 +266,8 @@ static void handle_tx(struct vhost_net *net)
265 " len %d != %zd\n", err, len); 266 " len %d != %zd\n", err, len);
266 if (!zcopy) 267 if (!zcopy)
267 vhost_add_used_and_signal(&net->dev, vq, head, 0); 268 vhost_add_used_and_signal(&net->dev, vq, head, 0);
269 else
270 vhost_zerocopy_signal_used(vq);
268 total_len += len; 271 total_len += len;
269 if (unlikely(total_len >= VHOST_NET_WEIGHT)) { 272 if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
270 vhost_poll_queue(&vq->poll); 273 vhost_poll_queue(&vq->poll);
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 51e4c1eeec4f..94dbd25caa30 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -1603,6 +1603,7 @@ void vhost_zerocopy_callback(struct ubuf_info *ubuf)
1603 struct vhost_ubuf_ref *ubufs = ubuf->ctx; 1603 struct vhost_ubuf_ref *ubufs = ubuf->ctx;
1604 struct vhost_virtqueue *vq = ubufs->vq; 1604 struct vhost_virtqueue *vq = ubufs->vq;
1605 1605
1606 vhost_poll_queue(&vq->poll);
1606 /* set len = 1 to mark this desc buffers done DMA */ 1607 /* set len = 1 to mark this desc buffers done DMA */
1607 vq->heads[ubuf->desc].len = VHOST_DMA_DONE_LEN; 1608 vq->heads[ubuf->desc].len = VHOST_DMA_DONE_LEN;
1608 kref_put(&ubufs->kref, vhost_zerocopy_done_signal); 1609 kref_put(&ubufs->kref, vhost_zerocopy_done_signal);
diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h
index 7579f19e61e0..81847dd08bd0 100644
--- a/tools/virtio/linux/virtio.h
+++ b/tools/virtio/linux/virtio.h
@@ -203,6 +203,7 @@ void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len);
203void virtqueue_disable_cb(struct virtqueue *vq); 203void virtqueue_disable_cb(struct virtqueue *vq);
204 204
205bool virtqueue_enable_cb(struct virtqueue *vq); 205bool virtqueue_enable_cb(struct virtqueue *vq);
206bool virtqueue_enable_cb_delayed(struct virtqueue *vq);
206 207
207void *virtqueue_detach_unused_buf(struct virtqueue *vq); 208void *virtqueue_detach_unused_buf(struct virtqueue *vq);
208struct virtqueue *vring_new_virtqueue(unsigned int num, 209struct virtqueue *vring_new_virtqueue(unsigned int num,
diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c
index 6bf95f995364..e626fa553c5a 100644
--- a/tools/virtio/virtio_test.c
+++ b/tools/virtio/virtio_test.c
@@ -144,7 +144,8 @@ static void wait_for_interrupt(struct vdev_info *dev)
144 } 144 }
145} 145}
146 146
147static void run_test(struct vdev_info *dev, struct vq_info *vq, int bufs) 147static void run_test(struct vdev_info *dev, struct vq_info *vq,
148 bool delayed, int bufs)
148{ 149{
149 struct scatterlist sl; 150 struct scatterlist sl;
150 long started = 0, completed = 0; 151 long started = 0, completed = 0;
@@ -183,8 +184,12 @@ static void run_test(struct vdev_info *dev, struct vq_info *vq, int bufs)
183 assert(started <= bufs); 184 assert(started <= bufs);
184 if (completed == bufs) 185 if (completed == bufs)
185 break; 186 break;
186 if (virtqueue_enable_cb(vq->vq)) { 187 if (delayed) {
187 wait_for_interrupt(dev); 188 if (virtqueue_enable_cb_delayed(vq->vq))
189 wait_for_interrupt(dev);
190 } else {
191 if (virtqueue_enable_cb(vq->vq))
192 wait_for_interrupt(dev);
188 } 193 }
189 } 194 }
190 test = 0; 195 test = 0;
@@ -216,6 +221,14 @@ const struct option longopts[] = {
216 .val = 'i', 221 .val = 'i',
217 }, 222 },
218 { 223 {
224 .name = "delayed-interrupt",
225 .val = 'D',
226 },
227 {
228 .name = "no-delayed-interrupt",
229 .val = 'd',
230 },
231 {
219 } 232 }
220}; 233};
221 234
@@ -224,6 +237,7 @@ static void help()
224 fprintf(stderr, "Usage: virtio_test [--help]" 237 fprintf(stderr, "Usage: virtio_test [--help]"
225 " [--no-indirect]" 238 " [--no-indirect]"
226 " [--no-event-idx]" 239 " [--no-event-idx]"
240 " [--delayed-interrupt]"
227 "\n"); 241 "\n");
228} 242}
229 243
@@ -233,6 +247,7 @@ int main(int argc, char **argv)
233 unsigned long long features = (1ULL << VIRTIO_RING_F_INDIRECT_DESC) | 247 unsigned long long features = (1ULL << VIRTIO_RING_F_INDIRECT_DESC) |
234 (1ULL << VIRTIO_RING_F_EVENT_IDX); 248 (1ULL << VIRTIO_RING_F_EVENT_IDX);
235 int o; 249 int o;
250 bool delayed = false;
236 251
237 for (;;) { 252 for (;;) {
238 o = getopt_long(argc, argv, optstring, longopts, NULL); 253 o = getopt_long(argc, argv, optstring, longopts, NULL);
@@ -251,6 +266,9 @@ int main(int argc, char **argv)
251 case 'i': 266 case 'i':
252 features &= ~(1ULL << VIRTIO_RING_F_INDIRECT_DESC); 267 features &= ~(1ULL << VIRTIO_RING_F_INDIRECT_DESC);
253 break; 268 break;
269 case 'D':
270 delayed = true;
271 break;
254 default: 272 default:
255 assert(0); 273 assert(0);
256 break; 274 break;
@@ -260,6 +278,6 @@ int main(int argc, char **argv)
260done: 278done:
261 vdev_info_init(&dev, features); 279 vdev_info_init(&dev, features);
262 vq_info_add(&dev, 256); 280 vq_info_add(&dev, 256);
263 run_test(&dev, &dev.vqs[0], 0x100000); 281 run_test(&dev, &dev.vqs[0], delayed, 0x100000);
264 return 0; 282 return 0;
265} 283}