diff options
-rw-r--r-- | drivers/net/macvtap.c | 57 | ||||
-rw-r--r-- | drivers/vhost/net.c | 7 | ||||
-rw-r--r-- | drivers/vhost/vhost.c | 1 | ||||
-rw-r--r-- | tools/virtio/linux/virtio.h | 1 | ||||
-rw-r--r-- | tools/virtio/virtio_test.c | 26 |
5 files changed, 69 insertions, 23 deletions
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 0427c6561c84..163559c16988 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c | |||
@@ -505,10 +505,11 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from, | |||
505 | if (copy > size) { | 505 | if (copy > size) { |
506 | ++from; | 506 | ++from; |
507 | --count; | 507 | --count; |
508 | } | 508 | offset = 0; |
509 | } else | ||
510 | offset += size; | ||
509 | copy -= size; | 511 | copy -= size; |
510 | offset1 += size; | 512 | offset1 += size; |
511 | offset = 0; | ||
512 | } | 513 | } |
513 | 514 | ||
514 | if (len == offset1) | 515 | if (len == offset1) |
@@ -518,24 +519,29 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from, | |||
518 | struct page *page[MAX_SKB_FRAGS]; | 519 | struct page *page[MAX_SKB_FRAGS]; |
519 | int num_pages; | 520 | int num_pages; |
520 | unsigned long base; | 521 | unsigned long base; |
522 | unsigned long truesize; | ||
521 | 523 | ||
522 | len = from->iov_len - offset1; | 524 | len = from->iov_len - offset; |
523 | if (!len) { | 525 | if (!len) { |
524 | offset1 = 0; | 526 | offset = 0; |
525 | ++from; | 527 | ++from; |
526 | continue; | 528 | continue; |
527 | } | 529 | } |
528 | base = (unsigned long)from->iov_base + offset1; | 530 | base = (unsigned long)from->iov_base + offset; |
529 | size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT; | 531 | size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT; |
532 | if (i + size > MAX_SKB_FRAGS) | ||
533 | return -EMSGSIZE; | ||
530 | num_pages = get_user_pages_fast(base, size, 0, &page[i]); | 534 | num_pages = get_user_pages_fast(base, size, 0, &page[i]); |
531 | if ((num_pages != size) || | 535 | if (num_pages != size) { |
532 | (num_pages > MAX_SKB_FRAGS - skb_shinfo(skb)->nr_frags)) | 536 | for (i = 0; i < num_pages; i++) |
533 | /* put_page is in skb free */ | 537 | put_page(page[i]); |
534 | return -EFAULT; | 538 | return -EFAULT; |
539 | } | ||
540 | truesize = size * PAGE_SIZE; | ||
535 | skb->data_len += len; | 541 | skb->data_len += len; |
536 | skb->len += len; | 542 | skb->len += len; |
537 | skb->truesize += len; | 543 | skb->truesize += truesize; |
538 | atomic_add(len, &skb->sk->sk_wmem_alloc); | 544 | atomic_add(truesize, &skb->sk->sk_wmem_alloc); |
539 | while (len) { | 545 | while (len) { |
540 | int off = base & ~PAGE_MASK; | 546 | int off = base & ~PAGE_MASK; |
541 | int size = min_t(int, len, PAGE_SIZE - off); | 547 | int size = min_t(int, len, PAGE_SIZE - off); |
@@ -546,7 +552,7 @@ static int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from, | |||
546 | len -= size; | 552 | len -= size; |
547 | i++; | 553 | i++; |
548 | } | 554 | } |
549 | offset1 = 0; | 555 | offset = 0; |
550 | ++from; | 556 | ++from; |
551 | } | 557 | } |
552 | return 0; | 558 | return 0; |
@@ -646,7 +652,7 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, | |||
646 | int err; | 652 | int err; |
647 | struct virtio_net_hdr vnet_hdr = { 0 }; | 653 | struct virtio_net_hdr vnet_hdr = { 0 }; |
648 | int vnet_hdr_len = 0; | 654 | int vnet_hdr_len = 0; |
649 | int copylen; | 655 | int copylen = 0; |
650 | bool zerocopy = false; | 656 | bool zerocopy = false; |
651 | 657 | ||
652 | if (q->flags & IFF_VNET_HDR) { | 658 | if (q->flags & IFF_VNET_HDR) { |
@@ -675,15 +681,31 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, | |||
675 | if (unlikely(len < ETH_HLEN)) | 681 | if (unlikely(len < ETH_HLEN)) |
676 | goto err; | 682 | goto err; |
677 | 683 | ||
684 | err = -EMSGSIZE; | ||
685 | if (unlikely(count > UIO_MAXIOV)) | ||
686 | goto err; | ||
687 | |||
678 | if (m && m->msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY)) | 688 | if (m && m->msg_control && sock_flag(&q->sk, SOCK_ZEROCOPY)) |
679 | zerocopy = true; | 689 | zerocopy = true; |
680 | 690 | ||
681 | if (zerocopy) { | 691 | if (zerocopy) { |
692 | /* Userspace may produce vectors with count greater than | ||
693 | * MAX_SKB_FRAGS, so we need to linearize parts of the skb | ||
694 | * to let the rest of data to be fit in the frags. | ||
695 | */ | ||
696 | if (count > MAX_SKB_FRAGS) { | ||
697 | copylen = iov_length(iv, count - MAX_SKB_FRAGS); | ||
698 | if (copylen < vnet_hdr_len) | ||
699 | copylen = 0; | ||
700 | else | ||
701 | copylen -= vnet_hdr_len; | ||
702 | } | ||
682 | /* There are 256 bytes to be copied in skb, so there is enough | 703 | /* There are 256 bytes to be copied in skb, so there is enough |
683 | * room for skb expand head in case it is used. | 704 | * room for skb expand head in case it is used. |
684 | * The rest buffer is mapped from userspace. | 705 | * The rest buffer is mapped from userspace. |
685 | */ | 706 | */ |
686 | copylen = vnet_hdr.hdr_len; | 707 | if (copylen < vnet_hdr.hdr_len) |
708 | copylen = vnet_hdr.hdr_len; | ||
687 | if (!copylen) | 709 | if (!copylen) |
688 | copylen = GOODCOPY_LEN; | 710 | copylen = GOODCOPY_LEN; |
689 | } else | 711 | } else |
@@ -694,10 +716,9 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, | |||
694 | if (!skb) | 716 | if (!skb) |
695 | goto err; | 717 | goto err; |
696 | 718 | ||
697 | if (zerocopy) { | 719 | if (zerocopy) |
698 | err = zerocopy_sg_from_iovec(skb, iv, vnet_hdr_len, count); | 720 | err = zerocopy_sg_from_iovec(skb, iv, vnet_hdr_len, count); |
699 | skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY; | 721 | else |
700 | } else | ||
701 | err = skb_copy_datagram_from_iovec(skb, 0, iv, vnet_hdr_len, | 722 | err = skb_copy_datagram_from_iovec(skb, 0, iv, vnet_hdr_len, |
702 | len); | 723 | len); |
703 | if (err) | 724 | if (err) |
@@ -716,8 +737,10 @@ static ssize_t macvtap_get_user(struct macvtap_queue *q, struct msghdr *m, | |||
716 | rcu_read_lock_bh(); | 737 | rcu_read_lock_bh(); |
717 | vlan = rcu_dereference_bh(q->vlan); | 738 | vlan = rcu_dereference_bh(q->vlan); |
718 | /* copy skb_ubuf_info for callback when skb has no error */ | 739 | /* copy skb_ubuf_info for callback when skb has no error */ |
719 | if (zerocopy) | 740 | if (zerocopy) { |
720 | skb_shinfo(skb)->destructor_arg = m->msg_control; | 741 | skb_shinfo(skb)->destructor_arg = m->msg_control; |
742 | skb_shinfo(skb)->tx_flags |= SKBTX_DEV_ZEROCOPY; | ||
743 | } | ||
721 | if (vlan) | 744 | if (vlan) |
722 | macvlan_start_xmit(skb, vlan->dev); | 745 | macvlan_start_xmit(skb, vlan->dev); |
723 | else | 746 | else |
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 1f21d2a1e528..853db7a08a26 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c | |||
@@ -166,7 +166,7 @@ static void handle_tx(struct vhost_net *net) | |||
166 | if (wmem < sock->sk->sk_sndbuf / 2) | 166 | if (wmem < sock->sk->sk_sndbuf / 2) |
167 | tx_poll_stop(net); | 167 | tx_poll_stop(net); |
168 | hdr_size = vq->vhost_hlen; | 168 | hdr_size = vq->vhost_hlen; |
169 | zcopy = vhost_sock_zcopy(sock); | 169 | zcopy = vq->ubufs; |
170 | 170 | ||
171 | for (;;) { | 171 | for (;;) { |
172 | /* Release DMAs done buffers first */ | 172 | /* Release DMAs done buffers first */ |
@@ -257,7 +257,8 @@ static void handle_tx(struct vhost_net *net) | |||
257 | UIO_MAXIOV; | 257 | UIO_MAXIOV; |
258 | } | 258 | } |
259 | vhost_discard_vq_desc(vq, 1); | 259 | vhost_discard_vq_desc(vq, 1); |
260 | tx_poll_start(net, sock); | 260 | if (err == -EAGAIN || err == -ENOBUFS) |
261 | tx_poll_start(net, sock); | ||
261 | break; | 262 | break; |
262 | } | 263 | } |
263 | if (err != len) | 264 | if (err != len) |
@@ -265,6 +266,8 @@ static void handle_tx(struct vhost_net *net) | |||
265 | " len %d != %zd\n", err, len); | 266 | " len %d != %zd\n", err, len); |
266 | if (!zcopy) | 267 | if (!zcopy) |
267 | vhost_add_used_and_signal(&net->dev, vq, head, 0); | 268 | vhost_add_used_and_signal(&net->dev, vq, head, 0); |
269 | else | ||
270 | vhost_zerocopy_signal_used(vq); | ||
268 | total_len += len; | 271 | total_len += len; |
269 | if (unlikely(total_len >= VHOST_NET_WEIGHT)) { | 272 | if (unlikely(total_len >= VHOST_NET_WEIGHT)) { |
270 | vhost_poll_queue(&vq->poll); | 273 | vhost_poll_queue(&vq->poll); |
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 51e4c1eeec4f..94dbd25caa30 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c | |||
@@ -1603,6 +1603,7 @@ void vhost_zerocopy_callback(struct ubuf_info *ubuf) | |||
1603 | struct vhost_ubuf_ref *ubufs = ubuf->ctx; | 1603 | struct vhost_ubuf_ref *ubufs = ubuf->ctx; |
1604 | struct vhost_virtqueue *vq = ubufs->vq; | 1604 | struct vhost_virtqueue *vq = ubufs->vq; |
1605 | 1605 | ||
1606 | vhost_poll_queue(&vq->poll); | ||
1606 | /* set len = 1 to mark this desc buffers done DMA */ | 1607 | /* set len = 1 to mark this desc buffers done DMA */ |
1607 | vq->heads[ubuf->desc].len = VHOST_DMA_DONE_LEN; | 1608 | vq->heads[ubuf->desc].len = VHOST_DMA_DONE_LEN; |
1608 | kref_put(&ubufs->kref, vhost_zerocopy_done_signal); | 1609 | kref_put(&ubufs->kref, vhost_zerocopy_done_signal); |
diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h index 7579f19e61e0..81847dd08bd0 100644 --- a/tools/virtio/linux/virtio.h +++ b/tools/virtio/linux/virtio.h | |||
@@ -203,6 +203,7 @@ void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len); | |||
203 | void virtqueue_disable_cb(struct virtqueue *vq); | 203 | void virtqueue_disable_cb(struct virtqueue *vq); |
204 | 204 | ||
205 | bool virtqueue_enable_cb(struct virtqueue *vq); | 205 | bool virtqueue_enable_cb(struct virtqueue *vq); |
206 | bool virtqueue_enable_cb_delayed(struct virtqueue *vq); | ||
206 | 207 | ||
207 | void *virtqueue_detach_unused_buf(struct virtqueue *vq); | 208 | void *virtqueue_detach_unused_buf(struct virtqueue *vq); |
208 | struct virtqueue *vring_new_virtqueue(unsigned int num, | 209 | struct virtqueue *vring_new_virtqueue(unsigned int num, |
diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c index 6bf95f995364..e626fa553c5a 100644 --- a/tools/virtio/virtio_test.c +++ b/tools/virtio/virtio_test.c | |||
@@ -144,7 +144,8 @@ static void wait_for_interrupt(struct vdev_info *dev) | |||
144 | } | 144 | } |
145 | } | 145 | } |
146 | 146 | ||
147 | static void run_test(struct vdev_info *dev, struct vq_info *vq, int bufs) | 147 | static void run_test(struct vdev_info *dev, struct vq_info *vq, |
148 | bool delayed, int bufs) | ||
148 | { | 149 | { |
149 | struct scatterlist sl; | 150 | struct scatterlist sl; |
150 | long started = 0, completed = 0; | 151 | long started = 0, completed = 0; |
@@ -183,8 +184,12 @@ static void run_test(struct vdev_info *dev, struct vq_info *vq, int bufs) | |||
183 | assert(started <= bufs); | 184 | assert(started <= bufs); |
184 | if (completed == bufs) | 185 | if (completed == bufs) |
185 | break; | 186 | break; |
186 | if (virtqueue_enable_cb(vq->vq)) { | 187 | if (delayed) { |
187 | wait_for_interrupt(dev); | 188 | if (virtqueue_enable_cb_delayed(vq->vq)) |
189 | wait_for_interrupt(dev); | ||
190 | } else { | ||
191 | if (virtqueue_enable_cb(vq->vq)) | ||
192 | wait_for_interrupt(dev); | ||
188 | } | 193 | } |
189 | } | 194 | } |
190 | test = 0; | 195 | test = 0; |
@@ -216,6 +221,14 @@ const struct option longopts[] = { | |||
216 | .val = 'i', | 221 | .val = 'i', |
217 | }, | 222 | }, |
218 | { | 223 | { |
224 | .name = "delayed-interrupt", | ||
225 | .val = 'D', | ||
226 | }, | ||
227 | { | ||
228 | .name = "no-delayed-interrupt", | ||
229 | .val = 'd', | ||
230 | }, | ||
231 | { | ||
219 | } | 232 | } |
220 | }; | 233 | }; |
221 | 234 | ||
@@ -224,6 +237,7 @@ static void help() | |||
224 | fprintf(stderr, "Usage: virtio_test [--help]" | 237 | fprintf(stderr, "Usage: virtio_test [--help]" |
225 | " [--no-indirect]" | 238 | " [--no-indirect]" |
226 | " [--no-event-idx]" | 239 | " [--no-event-idx]" |
240 | " [--delayed-interrupt]" | ||
227 | "\n"); | 241 | "\n"); |
228 | } | 242 | } |
229 | 243 | ||
@@ -233,6 +247,7 @@ int main(int argc, char **argv) | |||
233 | unsigned long long features = (1ULL << VIRTIO_RING_F_INDIRECT_DESC) | | 247 | unsigned long long features = (1ULL << VIRTIO_RING_F_INDIRECT_DESC) | |
234 | (1ULL << VIRTIO_RING_F_EVENT_IDX); | 248 | (1ULL << VIRTIO_RING_F_EVENT_IDX); |
235 | int o; | 249 | int o; |
250 | bool delayed = false; | ||
236 | 251 | ||
237 | for (;;) { | 252 | for (;;) { |
238 | o = getopt_long(argc, argv, optstring, longopts, NULL); | 253 | o = getopt_long(argc, argv, optstring, longopts, NULL); |
@@ -251,6 +266,9 @@ int main(int argc, char **argv) | |||
251 | case 'i': | 266 | case 'i': |
252 | features &= ~(1ULL << VIRTIO_RING_F_INDIRECT_DESC); | 267 | features &= ~(1ULL << VIRTIO_RING_F_INDIRECT_DESC); |
253 | break; | 268 | break; |
269 | case 'D': | ||
270 | delayed = true; | ||
271 | break; | ||
254 | default: | 272 | default: |
255 | assert(0); | 273 | assert(0); |
256 | break; | 274 | break; |
@@ -260,6 +278,6 @@ int main(int argc, char **argv) | |||
260 | done: | 278 | done: |
261 | vdev_info_init(&dev, features); | 279 | vdev_info_init(&dev, features); |
262 | vq_info_add(&dev, 256); | 280 | vq_info_add(&dev, 256); |
263 | run_test(&dev, &dev.vqs[0], 0x100000); | 281 | run_test(&dev, &dev.vqs[0], delayed, 0x100000); |
264 | return 0; | 282 | return 0; |
265 | } | 283 | } |