diff options
Diffstat (limited to 'drivers/vhost')
-rw-r--r-- | drivers/vhost/net.c | 77 | ||||
-rw-r--r-- | drivers/vhost/vhost.c | 128 | ||||
-rw-r--r-- | drivers/vhost/vhost.h | 31 |
3 files changed, 220 insertions, 16 deletions
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index e224a92baa16..f0fd52cdfadc 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c | |||
@@ -12,6 +12,7 @@ | |||
12 | #include <linux/virtio_net.h> | 12 | #include <linux/virtio_net.h> |
13 | #include <linux/miscdevice.h> | 13 | #include <linux/miscdevice.h> |
14 | #include <linux/module.h> | 14 | #include <linux/module.h> |
15 | #include <linux/moduleparam.h> | ||
15 | #include <linux/mutex.h> | 16 | #include <linux/mutex.h> |
16 | #include <linux/workqueue.h> | 17 | #include <linux/workqueue.h> |
17 | #include <linux/rcupdate.h> | 18 | #include <linux/rcupdate.h> |
@@ -28,10 +29,18 @@ | |||
28 | 29 | ||
29 | #include "vhost.h" | 30 | #include "vhost.h" |
30 | 31 | ||
32 | static int experimental_zcopytx; | ||
33 | module_param(experimental_zcopytx, int, 0444); | ||
34 | MODULE_PARM_DESC(experimental_zcopytx, "Enable Experimental Zero Copy TX"); | ||
35 | |||
31 | /* Max number of bytes transferred before requeueing the job. | 36 | /* Max number of bytes transferred before requeueing the job. |
32 | * Using this limit prevents one virtqueue from starving others. */ | 37 | * Using this limit prevents one virtqueue from starving others. */ |
33 | #define VHOST_NET_WEIGHT 0x80000 | 38 | #define VHOST_NET_WEIGHT 0x80000 |
34 | 39 | ||
40 | /* MAX number of TX used buffers for outstanding zerocopy */ | ||
41 | #define VHOST_MAX_PEND 128 | ||
42 | #define VHOST_GOODCOPY_LEN 256 | ||
43 | |||
35 | enum { | 44 | enum { |
36 | VHOST_NET_VQ_RX = 0, | 45 | VHOST_NET_VQ_RX = 0, |
37 | VHOST_NET_VQ_TX = 1, | 46 | VHOST_NET_VQ_TX = 1, |
@@ -54,6 +63,12 @@ struct vhost_net { | |||
54 | enum vhost_net_poll_state tx_poll_state; | 63 | enum vhost_net_poll_state tx_poll_state; |
55 | }; | 64 | }; |
56 | 65 | ||
66 | static bool vhost_sock_zcopy(struct socket *sock) | ||
67 | { | ||
68 | return unlikely(experimental_zcopytx) && | ||
69 | sock_flag(sock->sk, SOCK_ZEROCOPY); | ||
70 | } | ||
71 | |||
57 | /* Pop first len bytes from iovec. Return number of segments used. */ | 72 | /* Pop first len bytes from iovec. Return number of segments used. */ |
58 | static int move_iovec_hdr(struct iovec *from, struct iovec *to, | 73 | static int move_iovec_hdr(struct iovec *from, struct iovec *to, |
59 | size_t len, int iov_count) | 74 | size_t len, int iov_count) |
@@ -129,6 +144,8 @@ static void handle_tx(struct vhost_net *net) | |||
129 | int err, wmem; | 144 | int err, wmem; |
130 | size_t hdr_size; | 145 | size_t hdr_size; |
131 | struct socket *sock; | 146 | struct socket *sock; |
147 | struct vhost_ubuf_ref *uninitialized_var(ubufs); | ||
148 | bool zcopy; | ||
132 | 149 | ||
133 | /* TODO: check that we are running from vhost_worker? */ | 150 | /* TODO: check that we are running from vhost_worker? */ |
134 | sock = rcu_dereference_check(vq->private_data, 1); | 151 | sock = rcu_dereference_check(vq->private_data, 1); |
@@ -149,8 +166,13 @@ static void handle_tx(struct vhost_net *net) | |||
149 | if (wmem < sock->sk->sk_sndbuf / 2) | 166 | if (wmem < sock->sk->sk_sndbuf / 2) |
150 | tx_poll_stop(net); | 167 | tx_poll_stop(net); |
151 | hdr_size = vq->vhost_hlen; | 168 | hdr_size = vq->vhost_hlen; |
169 | zcopy = vhost_sock_zcopy(sock); | ||
152 | 170 | ||
153 | for (;;) { | 171 | for (;;) { |
172 | /* Release DMAs done buffers first */ | ||
173 | if (zcopy) | ||
174 | vhost_zerocopy_signal_used(vq); | ||
175 | |||
154 | head = vhost_get_vq_desc(&net->dev, vq, vq->iov, | 176 | head = vhost_get_vq_desc(&net->dev, vq, vq->iov, |
155 | ARRAY_SIZE(vq->iov), | 177 | ARRAY_SIZE(vq->iov), |
156 | &out, &in, | 178 | &out, &in, |
@@ -166,6 +188,13 @@ static void handle_tx(struct vhost_net *net) | |||
166 | set_bit(SOCK_ASYNC_NOSPACE, &sock->flags); | 188 | set_bit(SOCK_ASYNC_NOSPACE, &sock->flags); |
167 | break; | 189 | break; |
168 | } | 190 | } |
191 | /* If more outstanding DMAs, queue the work */ | ||
192 | if (unlikely(vq->upend_idx - vq->done_idx > | ||
193 | VHOST_MAX_PEND)) { | ||
194 | tx_poll_start(net, sock); | ||
195 | set_bit(SOCK_ASYNC_NOSPACE, &sock->flags); | ||
196 | break; | ||
197 | } | ||
169 | if (unlikely(vhost_enable_notify(&net->dev, vq))) { | 198 | if (unlikely(vhost_enable_notify(&net->dev, vq))) { |
170 | vhost_disable_notify(&net->dev, vq); | 199 | vhost_disable_notify(&net->dev, vq); |
171 | continue; | 200 | continue; |
@@ -188,9 +217,39 @@ static void handle_tx(struct vhost_net *net) | |||
188 | iov_length(vq->hdr, s), hdr_size); | 217 | iov_length(vq->hdr, s), hdr_size); |
189 | break; | 218 | break; |
190 | } | 219 | } |
220 | /* use msg_control to pass vhost zerocopy ubuf info to skb */ | ||
221 | if (zcopy) { | ||
222 | vq->heads[vq->upend_idx].id = head; | ||
223 | if (len < VHOST_GOODCOPY_LEN) { | ||
224 | /* copy don't need to wait for DMA done */ | ||
225 | vq->heads[vq->upend_idx].len = | ||
226 | VHOST_DMA_DONE_LEN; | ||
227 | msg.msg_control = NULL; | ||
228 | msg.msg_controllen = 0; | ||
229 | ubufs = NULL; | ||
230 | } else { | ||
231 | struct ubuf_info *ubuf = &vq->ubuf_info[head]; | ||
232 | |||
233 | vq->heads[vq->upend_idx].len = len; | ||
234 | ubuf->callback = vhost_zerocopy_callback; | ||
235 | ubuf->arg = vq->ubufs; | ||
236 | ubuf->desc = vq->upend_idx; | ||
237 | msg.msg_control = ubuf; | ||
238 | msg.msg_controllen = sizeof(ubuf); | ||
239 | ubufs = vq->ubufs; | ||
240 | kref_get(&ubufs->kref); | ||
241 | } | ||
242 | vq->upend_idx = (vq->upend_idx + 1) % UIO_MAXIOV; | ||
243 | } | ||
191 | /* TODO: Check specific error and bomb out unless ENOBUFS? */ | 244 | /* TODO: Check specific error and bomb out unless ENOBUFS? */ |
192 | err = sock->ops->sendmsg(NULL, sock, &msg, len); | 245 | err = sock->ops->sendmsg(NULL, sock, &msg, len); |
193 | if (unlikely(err < 0)) { | 246 | if (unlikely(err < 0)) { |
247 | if (zcopy) { | ||
248 | if (ubufs) | ||
249 | vhost_ubuf_put(ubufs); | ||
250 | vq->upend_idx = ((unsigned)vq->upend_idx - 1) % | ||
251 | UIO_MAXIOV; | ||
252 | } | ||
194 | vhost_discard_vq_desc(vq, 1); | 253 | vhost_discard_vq_desc(vq, 1); |
195 | tx_poll_start(net, sock); | 254 | tx_poll_start(net, sock); |
196 | break; | 255 | break; |
@@ -198,7 +257,8 @@ static void handle_tx(struct vhost_net *net) | |||
198 | if (err != len) | 257 | if (err != len) |
199 | pr_debug("Truncated TX packet: " | 258 | pr_debug("Truncated TX packet: " |
200 | " len %d != %zd\n", err, len); | 259 | " len %d != %zd\n", err, len); |
201 | vhost_add_used_and_signal(&net->dev, vq, head, 0); | 260 | if (!zcopy) |
261 | vhost_add_used_and_signal(&net->dev, vq, head, 0); | ||
202 | total_len += len; | 262 | total_len += len; |
203 | if (unlikely(total_len >= VHOST_NET_WEIGHT)) { | 263 | if (unlikely(total_len >= VHOST_NET_WEIGHT)) { |
204 | vhost_poll_queue(&vq->poll); | 264 | vhost_poll_queue(&vq->poll); |
@@ -603,6 +663,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) | |||
603 | { | 663 | { |
604 | struct socket *sock, *oldsock; | 664 | struct socket *sock, *oldsock; |
605 | struct vhost_virtqueue *vq; | 665 | struct vhost_virtqueue *vq; |
666 | struct vhost_ubuf_ref *ubufs, *oldubufs = NULL; | ||
606 | int r; | 667 | int r; |
607 | 668 | ||
608 | mutex_lock(&n->dev.mutex); | 669 | mutex_lock(&n->dev.mutex); |
@@ -632,6 +693,13 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) | |||
632 | oldsock = rcu_dereference_protected(vq->private_data, | 693 | oldsock = rcu_dereference_protected(vq->private_data, |
633 | lockdep_is_held(&vq->mutex)); | 694 | lockdep_is_held(&vq->mutex)); |
634 | if (sock != oldsock) { | 695 | if (sock != oldsock) { |
696 | ubufs = vhost_ubuf_alloc(vq, sock && vhost_sock_zcopy(sock)); | ||
697 | if (IS_ERR(ubufs)) { | ||
698 | r = PTR_ERR(ubufs); | ||
699 | goto err_ubufs; | ||
700 | } | ||
701 | oldubufs = vq->ubufs; | ||
702 | vq->ubufs = ubufs; | ||
635 | vhost_net_disable_vq(n, vq); | 703 | vhost_net_disable_vq(n, vq); |
636 | rcu_assign_pointer(vq->private_data, sock); | 704 | rcu_assign_pointer(vq->private_data, sock); |
637 | vhost_net_enable_vq(n, vq); | 705 | vhost_net_enable_vq(n, vq); |
@@ -639,6 +707,9 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) | |||
639 | 707 | ||
640 | mutex_unlock(&vq->mutex); | 708 | mutex_unlock(&vq->mutex); |
641 | 709 | ||
710 | if (oldubufs) | ||
711 | vhost_ubuf_put_and_wait(oldubufs); | ||
712 | |||
642 | if (oldsock) { | 713 | if (oldsock) { |
643 | vhost_net_flush_vq(n, index); | 714 | vhost_net_flush_vq(n, index); |
644 | fput(oldsock->file); | 715 | fput(oldsock->file); |
@@ -647,6 +718,8 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) | |||
647 | mutex_unlock(&n->dev.mutex); | 718 | mutex_unlock(&n->dev.mutex); |
648 | return 0; | 719 | return 0; |
649 | 720 | ||
721 | err_ubufs: | ||
722 | fput(sock->file); | ||
650 | err_vq: | 723 | err_vq: |
651 | mutex_unlock(&vq->mutex); | 724 | mutex_unlock(&vq->mutex); |
652 | err: | 725 | err: |
@@ -776,6 +849,8 @@ static struct miscdevice vhost_net_misc = { | |||
776 | 849 | ||
777 | static int vhost_net_init(void) | 850 | static int vhost_net_init(void) |
778 | { | 851 | { |
852 | if (experimental_zcopytx) | ||
853 | vhost_enable_zcopy(VHOST_NET_VQ_TX); | ||
779 | return misc_register(&vhost_net_misc); | 854 | return misc_register(&vhost_net_misc); |
780 | } | 855 | } |
781 | module_init(vhost_net_init); | 856 | module_init(vhost_net_init); |
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index ea966b356352..5ef2f62becf4 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c | |||
@@ -37,6 +37,8 @@ enum { | |||
37 | VHOST_MEMORY_F_LOG = 0x1, | 37 | VHOST_MEMORY_F_LOG = 0x1, |
38 | }; | 38 | }; |
39 | 39 | ||
40 | static unsigned vhost_zcopy_mask __read_mostly; | ||
41 | |||
40 | #define vhost_used_event(vq) ((u16 __user *)&vq->avail->ring[vq->num]) | 42 | #define vhost_used_event(vq) ((u16 __user *)&vq->avail->ring[vq->num]) |
41 | #define vhost_avail_event(vq) ((u16 __user *)&vq->used->ring[vq->num]) | 43 | #define vhost_avail_event(vq) ((u16 __user *)&vq->used->ring[vq->num]) |
42 | 44 | ||
@@ -179,6 +181,9 @@ static void vhost_vq_reset(struct vhost_dev *dev, | |||
179 | vq->call_ctx = NULL; | 181 | vq->call_ctx = NULL; |
180 | vq->call = NULL; | 182 | vq->call = NULL; |
181 | vq->log_ctx = NULL; | 183 | vq->log_ctx = NULL; |
184 | vq->upend_idx = 0; | ||
185 | vq->done_idx = 0; | ||
186 | vq->ubufs = NULL; | ||
182 | } | 187 | } |
183 | 188 | ||
184 | static int vhost_worker(void *data) | 189 | static int vhost_worker(void *data) |
@@ -225,10 +230,28 @@ static int vhost_worker(void *data) | |||
225 | return 0; | 230 | return 0; |
226 | } | 231 | } |
227 | 232 | ||
233 | static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq) | ||
234 | { | ||
235 | kfree(vq->indirect); | ||
236 | vq->indirect = NULL; | ||
237 | kfree(vq->log); | ||
238 | vq->log = NULL; | ||
239 | kfree(vq->heads); | ||
240 | vq->heads = NULL; | ||
241 | kfree(vq->ubuf_info); | ||
242 | vq->ubuf_info = NULL; | ||
243 | } | ||
244 | |||
245 | void vhost_enable_zcopy(int vq) | ||
246 | { | ||
247 | vhost_zcopy_mask |= 0x1 << vq; | ||
248 | } | ||
249 | |||
228 | /* Helper to allocate iovec buffers for all vqs. */ | 250 | /* Helper to allocate iovec buffers for all vqs. */ |
229 | static long vhost_dev_alloc_iovecs(struct vhost_dev *dev) | 251 | static long vhost_dev_alloc_iovecs(struct vhost_dev *dev) |
230 | { | 252 | { |
231 | int i; | 253 | int i; |
254 | bool zcopy; | ||
232 | 255 | ||
233 | for (i = 0; i < dev->nvqs; ++i) { | 256 | for (i = 0; i < dev->nvqs; ++i) { |
234 | dev->vqs[i].indirect = kmalloc(sizeof *dev->vqs[i].indirect * | 257 | dev->vqs[i].indirect = kmalloc(sizeof *dev->vqs[i].indirect * |
@@ -237,19 +260,21 @@ static long vhost_dev_alloc_iovecs(struct vhost_dev *dev) | |||
237 | GFP_KERNEL); | 260 | GFP_KERNEL); |
238 | dev->vqs[i].heads = kmalloc(sizeof *dev->vqs[i].heads * | 261 | dev->vqs[i].heads = kmalloc(sizeof *dev->vqs[i].heads * |
239 | UIO_MAXIOV, GFP_KERNEL); | 262 | UIO_MAXIOV, GFP_KERNEL); |
240 | 263 | zcopy = vhost_zcopy_mask & (0x1 << i); | |
264 | if (zcopy) | ||
265 | dev->vqs[i].ubuf_info = | ||
266 | kmalloc(sizeof *dev->vqs[i].ubuf_info * | ||
267 | UIO_MAXIOV, GFP_KERNEL); | ||
241 | if (!dev->vqs[i].indirect || !dev->vqs[i].log || | 268 | if (!dev->vqs[i].indirect || !dev->vqs[i].log || |
242 | !dev->vqs[i].heads) | 269 | !dev->vqs[i].heads || |
270 | (zcopy && !dev->vqs[i].ubuf_info)) | ||
243 | goto err_nomem; | 271 | goto err_nomem; |
244 | } | 272 | } |
245 | return 0; | 273 | return 0; |
246 | 274 | ||
247 | err_nomem: | 275 | err_nomem: |
248 | for (; i >= 0; --i) { | 276 | for (; i >= 0; --i) |
249 | kfree(dev->vqs[i].indirect); | 277 | vhost_vq_free_iovecs(&dev->vqs[i]); |
250 | kfree(dev->vqs[i].log); | ||
251 | kfree(dev->vqs[i].heads); | ||
252 | } | ||
253 | return -ENOMEM; | 278 | return -ENOMEM; |
254 | } | 279 | } |
255 | 280 | ||
@@ -257,14 +282,8 @@ static void vhost_dev_free_iovecs(struct vhost_dev *dev) | |||
257 | { | 282 | { |
258 | int i; | 283 | int i; |
259 | 284 | ||
260 | for (i = 0; i < dev->nvqs; ++i) { | 285 | for (i = 0; i < dev->nvqs; ++i) |
261 | kfree(dev->vqs[i].indirect); | 286 | vhost_vq_free_iovecs(&dev->vqs[i]); |
262 | dev->vqs[i].indirect = NULL; | ||
263 | kfree(dev->vqs[i].log); | ||
264 | dev->vqs[i].log = NULL; | ||
265 | kfree(dev->vqs[i].heads); | ||
266 | dev->vqs[i].heads = NULL; | ||
267 | } | ||
268 | } | 287 | } |
269 | 288 | ||
270 | long vhost_dev_init(struct vhost_dev *dev, | 289 | long vhost_dev_init(struct vhost_dev *dev, |
@@ -287,6 +306,7 @@ long vhost_dev_init(struct vhost_dev *dev, | |||
287 | dev->vqs[i].log = NULL; | 306 | dev->vqs[i].log = NULL; |
288 | dev->vqs[i].indirect = NULL; | 307 | dev->vqs[i].indirect = NULL; |
289 | dev->vqs[i].heads = NULL; | 308 | dev->vqs[i].heads = NULL; |
309 | dev->vqs[i].ubuf_info = NULL; | ||
290 | dev->vqs[i].dev = dev; | 310 | dev->vqs[i].dev = dev; |
291 | mutex_init(&dev->vqs[i].mutex); | 311 | mutex_init(&dev->vqs[i].mutex); |
292 | vhost_vq_reset(dev, dev->vqs + i); | 312 | vhost_vq_reset(dev, dev->vqs + i); |
@@ -390,6 +410,30 @@ long vhost_dev_reset_owner(struct vhost_dev *dev) | |||
390 | return 0; | 410 | return 0; |
391 | } | 411 | } |
392 | 412 | ||
413 | /* In case of DMA done not in order in lower device driver for some reason. | ||
414 | * upend_idx is used to track end of used idx, done_idx is used to track head | ||
415 | * of used idx. Once lower device DMA done contiguously, we will signal KVM | ||
416 | * guest used idx. | ||
417 | */ | ||
418 | int vhost_zerocopy_signal_used(struct vhost_virtqueue *vq) | ||
419 | { | ||
420 | int i; | ||
421 | int j = 0; | ||
422 | |||
423 | for (i = vq->done_idx; i != vq->upend_idx; i = (i + 1) % UIO_MAXIOV) { | ||
424 | if ((vq->heads[i].len == VHOST_DMA_DONE_LEN)) { | ||
425 | vq->heads[i].len = VHOST_DMA_CLEAR_LEN; | ||
426 | vhost_add_used_and_signal(vq->dev, vq, | ||
427 | vq->heads[i].id, 0); | ||
428 | ++j; | ||
429 | } else | ||
430 | break; | ||
431 | } | ||
432 | if (j) | ||
433 | vq->done_idx = i; | ||
434 | return j; | ||
435 | } | ||
436 | |||
393 | /* Caller should have device mutex */ | 437 | /* Caller should have device mutex */ |
394 | void vhost_dev_cleanup(struct vhost_dev *dev) | 438 | void vhost_dev_cleanup(struct vhost_dev *dev) |
395 | { | 439 | { |
@@ -400,6 +444,13 @@ void vhost_dev_cleanup(struct vhost_dev *dev) | |||
400 | vhost_poll_stop(&dev->vqs[i].poll); | 444 | vhost_poll_stop(&dev->vqs[i].poll); |
401 | vhost_poll_flush(&dev->vqs[i].poll); | 445 | vhost_poll_flush(&dev->vqs[i].poll); |
402 | } | 446 | } |
447 | /* Wait for all lower device DMAs done. */ | ||
448 | if (dev->vqs[i].ubufs) | ||
449 | vhost_ubuf_put_and_wait(dev->vqs[i].ubufs); | ||
450 | |||
451 | /* Signal guest as appropriate. */ | ||
452 | vhost_zerocopy_signal_used(&dev->vqs[i]); | ||
453 | |||
403 | if (dev->vqs[i].error_ctx) | 454 | if (dev->vqs[i].error_ctx) |
404 | eventfd_ctx_put(dev->vqs[i].error_ctx); | 455 | eventfd_ctx_put(dev->vqs[i].error_ctx); |
405 | if (dev->vqs[i].error) | 456 | if (dev->vqs[i].error) |
@@ -1486,3 +1537,50 @@ void vhost_disable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) | |||
1486 | &vq->used->flags, r); | 1537 | &vq->used->flags, r); |
1487 | } | 1538 | } |
1488 | } | 1539 | } |
1540 | |||
1541 | static void vhost_zerocopy_done_signal(struct kref *kref) | ||
1542 | { | ||
1543 | struct vhost_ubuf_ref *ubufs = container_of(kref, struct vhost_ubuf_ref, | ||
1544 | kref); | ||
1545 | wake_up(&ubufs->wait); | ||
1546 | } | ||
1547 | |||
1548 | struct vhost_ubuf_ref *vhost_ubuf_alloc(struct vhost_virtqueue *vq, | ||
1549 | bool zcopy) | ||
1550 | { | ||
1551 | struct vhost_ubuf_ref *ubufs; | ||
1552 | /* No zero copy backend? Nothing to count. */ | ||
1553 | if (!zcopy) | ||
1554 | return NULL; | ||
1555 | ubufs = kmalloc(sizeof *ubufs, GFP_KERNEL); | ||
1556 | if (!ubufs) | ||
1557 | return ERR_PTR(-ENOMEM); | ||
1558 | kref_init(&ubufs->kref); | ||
1559 | kref_get(&ubufs->kref); | ||
1560 | init_waitqueue_head(&ubufs->wait); | ||
1561 | ubufs->vq = vq; | ||
1562 | return ubufs; | ||
1563 | } | ||
1564 | |||
1565 | void vhost_ubuf_put(struct vhost_ubuf_ref *ubufs) | ||
1566 | { | ||
1567 | kref_put(&ubufs->kref, vhost_zerocopy_done_signal); | ||
1568 | } | ||
1569 | |||
1570 | void vhost_ubuf_put_and_wait(struct vhost_ubuf_ref *ubufs) | ||
1571 | { | ||
1572 | kref_put(&ubufs->kref, vhost_zerocopy_done_signal); | ||
1573 | wait_event(ubufs->wait, !atomic_read(&ubufs->kref.refcount)); | ||
1574 | kfree(ubufs); | ||
1575 | } | ||
1576 | |||
1577 | void vhost_zerocopy_callback(void *arg) | ||
1578 | { | ||
1579 | struct ubuf_info *ubuf = arg; | ||
1580 | struct vhost_ubuf_ref *ubufs = ubuf->arg; | ||
1581 | struct vhost_virtqueue *vq = ubufs->vq; | ||
1582 | |||
1583 | /* set len = 1 to mark this desc buffers done DMA */ | ||
1584 | vq->heads[ubuf->desc].len = VHOST_DMA_DONE_LEN; | ||
1585 | kref_put(&ubufs->kref, vhost_zerocopy_done_signal); | ||
1586 | } | ||
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index 8e03379dd30f..1544b782529b 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h | |||
@@ -13,6 +13,11 @@ | |||
13 | #include <linux/virtio_ring.h> | 13 | #include <linux/virtio_ring.h> |
14 | #include <asm/atomic.h> | 14 | #include <asm/atomic.h> |
15 | 15 | ||
16 | /* This is for zerocopy, used buffer len is set to 1 when lower device DMA | ||
17 | * done */ | ||
18 | #define VHOST_DMA_DONE_LEN 1 | ||
19 | #define VHOST_DMA_CLEAR_LEN 0 | ||
20 | |||
16 | struct vhost_device; | 21 | struct vhost_device; |
17 | 22 | ||
18 | struct vhost_work; | 23 | struct vhost_work; |
@@ -50,6 +55,18 @@ struct vhost_log { | |||
50 | u64 len; | 55 | u64 len; |
51 | }; | 56 | }; |
52 | 57 | ||
58 | struct vhost_virtqueue; | ||
59 | |||
60 | struct vhost_ubuf_ref { | ||
61 | struct kref kref; | ||
62 | wait_queue_head_t wait; | ||
63 | struct vhost_virtqueue *vq; | ||
64 | }; | ||
65 | |||
66 | struct vhost_ubuf_ref *vhost_ubuf_alloc(struct vhost_virtqueue *, bool zcopy); | ||
67 | void vhost_ubuf_put(struct vhost_ubuf_ref *); | ||
68 | void vhost_ubuf_put_and_wait(struct vhost_ubuf_ref *); | ||
69 | |||
53 | /* The virtqueue structure describes a queue attached to a device. */ | 70 | /* The virtqueue structure describes a queue attached to a device. */ |
54 | struct vhost_virtqueue { | 71 | struct vhost_virtqueue { |
55 | struct vhost_dev *dev; | 72 | struct vhost_dev *dev; |
@@ -114,6 +131,16 @@ struct vhost_virtqueue { | |||
114 | /* Log write descriptors */ | 131 | /* Log write descriptors */ |
115 | void __user *log_base; | 132 | void __user *log_base; |
116 | struct vhost_log *log; | 133 | struct vhost_log *log; |
134 | /* vhost zerocopy support fields below: */ | ||
135 | /* last used idx for outstanding DMA zerocopy buffers */ | ||
136 | int upend_idx; | ||
137 | /* first used idx for DMA done zerocopy buffers */ | ||
138 | int done_idx; | ||
139 | /* an array of userspace buffers info */ | ||
140 | struct ubuf_info *ubuf_info; | ||
141 | /* Reference counting for outstanding ubufs. | ||
142 | * Protected by vq mutex. Writers must also take device mutex. */ | ||
143 | struct vhost_ubuf_ref *ubufs; | ||
117 | }; | 144 | }; |
118 | 145 | ||
119 | struct vhost_dev { | 146 | struct vhost_dev { |
@@ -160,6 +187,8 @@ bool vhost_enable_notify(struct vhost_dev *, struct vhost_virtqueue *); | |||
160 | 187 | ||
161 | int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, | 188 | int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, |
162 | unsigned int log_num, u64 len); | 189 | unsigned int log_num, u64 len); |
190 | void vhost_zerocopy_callback(void *arg); | ||
191 | int vhost_zerocopy_signal_used(struct vhost_virtqueue *vq); | ||
163 | 192 | ||
164 | #define vq_err(vq, fmt, ...) do { \ | 193 | #define vq_err(vq, fmt, ...) do { \ |
165 | pr_debug(pr_fmt(fmt), ##__VA_ARGS__); \ | 194 | pr_debug(pr_fmt(fmt), ##__VA_ARGS__); \ |
@@ -186,4 +215,6 @@ static inline int vhost_has_feature(struct vhost_dev *dev, int bit) | |||
186 | return acked_features & (1 << bit); | 215 | return acked_features & (1 << bit); |
187 | } | 216 | } |
188 | 217 | ||
218 | void vhost_enable_zcopy(int vq); | ||
219 | |||
189 | #endif | 220 | #endif |