aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/vhost/net.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/vhost/net.c')
-rw-r--r--drivers/vhost/net.c47
1 files changed, 26 insertions, 21 deletions
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 9a68409580d5..a0fa5de210cf 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -70,7 +70,12 @@ enum {
70}; 70};
71 71
72struct vhost_net_ubuf_ref { 72struct vhost_net_ubuf_ref {
73 struct kref kref; 73 /* refcount follows semantics similar to kref:
74 * 0: object is released
75 * 1: no outstanding ubufs
76 * >1: outstanding ubufs
77 */
78 atomic_t refcount;
74 wait_queue_head_t wait; 79 wait_queue_head_t wait;
75 struct vhost_virtqueue *vq; 80 struct vhost_virtqueue *vq;
76}; 81};
@@ -116,14 +121,6 @@ static void vhost_net_enable_zcopy(int vq)
116 vhost_net_zcopy_mask |= 0x1 << vq; 121 vhost_net_zcopy_mask |= 0x1 << vq;
117} 122}
118 123
119static void vhost_net_zerocopy_done_signal(struct kref *kref)
120{
121 struct vhost_net_ubuf_ref *ubufs;
122
123 ubufs = container_of(kref, struct vhost_net_ubuf_ref, kref);
124 wake_up(&ubufs->wait);
125}
126
127static struct vhost_net_ubuf_ref * 124static struct vhost_net_ubuf_ref *
128vhost_net_ubuf_alloc(struct vhost_virtqueue *vq, bool zcopy) 125vhost_net_ubuf_alloc(struct vhost_virtqueue *vq, bool zcopy)
129{ 126{
@@ -134,21 +131,24 @@ vhost_net_ubuf_alloc(struct vhost_virtqueue *vq, bool zcopy)
134 ubufs = kmalloc(sizeof(*ubufs), GFP_KERNEL); 131 ubufs = kmalloc(sizeof(*ubufs), GFP_KERNEL);
135 if (!ubufs) 132 if (!ubufs)
136 return ERR_PTR(-ENOMEM); 133 return ERR_PTR(-ENOMEM);
137 kref_init(&ubufs->kref); 134 atomic_set(&ubufs->refcount, 1);
138 init_waitqueue_head(&ubufs->wait); 135 init_waitqueue_head(&ubufs->wait);
139 ubufs->vq = vq; 136 ubufs->vq = vq;
140 return ubufs; 137 return ubufs;
141} 138}
142 139
143static void vhost_net_ubuf_put(struct vhost_net_ubuf_ref *ubufs) 140static int vhost_net_ubuf_put(struct vhost_net_ubuf_ref *ubufs)
144{ 141{
145 kref_put(&ubufs->kref, vhost_net_zerocopy_done_signal); 142 int r = atomic_sub_return(1, &ubufs->refcount);
143 if (unlikely(!r))
144 wake_up(&ubufs->wait);
145 return r;
146} 146}
147 147
148static void vhost_net_ubuf_put_and_wait(struct vhost_net_ubuf_ref *ubufs) 148static void vhost_net_ubuf_put_and_wait(struct vhost_net_ubuf_ref *ubufs)
149{ 149{
150 kref_put(&ubufs->kref, vhost_net_zerocopy_done_signal); 150 vhost_net_ubuf_put(ubufs);
151 wait_event(ubufs->wait, !atomic_read(&ubufs->kref.refcount)); 151 wait_event(ubufs->wait, !atomic_read(&ubufs->refcount));
152} 152}
153 153
154static void vhost_net_ubuf_put_wait_and_free(struct vhost_net_ubuf_ref *ubufs) 154static void vhost_net_ubuf_put_wait_and_free(struct vhost_net_ubuf_ref *ubufs)
@@ -306,23 +306,26 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success)
306{ 306{
307 struct vhost_net_ubuf_ref *ubufs = ubuf->ctx; 307 struct vhost_net_ubuf_ref *ubufs = ubuf->ctx;
308 struct vhost_virtqueue *vq = ubufs->vq; 308 struct vhost_virtqueue *vq = ubufs->vq;
309 int cnt = atomic_read(&ubufs->kref.refcount); 309 int cnt;
310
311 rcu_read_lock_bh();
310 312
311 /* set len to mark this desc buffers done DMA */ 313 /* set len to mark this desc buffers done DMA */
312 vq->heads[ubuf->desc].len = success ? 314 vq->heads[ubuf->desc].len = success ?
313 VHOST_DMA_DONE_LEN : VHOST_DMA_FAILED_LEN; 315 VHOST_DMA_DONE_LEN : VHOST_DMA_FAILED_LEN;
314 vhost_net_ubuf_put(ubufs); 316 cnt = vhost_net_ubuf_put(ubufs);
315 317
316 /* 318 /*
317 * Trigger polling thread if guest stopped submitting new buffers: 319 * Trigger polling thread if guest stopped submitting new buffers:
318 * in this case, the refcount after decrement will eventually reach 1 320 * in this case, the refcount after decrement will eventually reach 1.
319 * so here it is 2.
320 * We also trigger polling periodically after each 16 packets 321 * We also trigger polling periodically after each 16 packets
321 * (the value 16 here is more or less arbitrary, it's tuned to trigger 322 * (the value 16 here is more or less arbitrary, it's tuned to trigger
322 * less than 10% of times). 323 * less than 10% of times).
323 */ 324 */
324 if (cnt <= 2 || !(cnt % 16)) 325 if (cnt <= 1 || !(cnt % 16))
325 vhost_poll_queue(&vq->poll); 326 vhost_poll_queue(&vq->poll);
327
328 rcu_read_unlock_bh();
326} 329}
327 330
328/* Expects to be always run from workqueue - which acts as 331/* Expects to be always run from workqueue - which acts as
@@ -420,7 +423,7 @@ static void handle_tx(struct vhost_net *net)
420 msg.msg_control = ubuf; 423 msg.msg_control = ubuf;
421 msg.msg_controllen = sizeof(ubuf); 424 msg.msg_controllen = sizeof(ubuf);
422 ubufs = nvq->ubufs; 425 ubufs = nvq->ubufs;
423 kref_get(&ubufs->kref); 426 atomic_inc(&ubufs->refcount);
424 nvq->upend_idx = (nvq->upend_idx + 1) % UIO_MAXIOV; 427 nvq->upend_idx = (nvq->upend_idx + 1) % UIO_MAXIOV;
425 } else { 428 } else {
426 msg.msg_control = NULL; 429 msg.msg_control = NULL;
@@ -780,7 +783,7 @@ static void vhost_net_flush(struct vhost_net *n)
780 vhost_net_ubuf_put_and_wait(n->vqs[VHOST_NET_VQ_TX].ubufs); 783 vhost_net_ubuf_put_and_wait(n->vqs[VHOST_NET_VQ_TX].ubufs);
781 mutex_lock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex); 784 mutex_lock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex);
782 n->tx_flush = false; 785 n->tx_flush = false;
783 kref_init(&n->vqs[VHOST_NET_VQ_TX].ubufs->kref); 786 atomic_set(&n->vqs[VHOST_NET_VQ_TX].ubufs->refcount, 1);
784 mutex_unlock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex); 787 mutex_unlock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex);
785 } 788 }
786} 789}
@@ -800,6 +803,8 @@ static int vhost_net_release(struct inode *inode, struct file *f)
800 fput(tx_sock->file); 803 fput(tx_sock->file);
801 if (rx_sock) 804 if (rx_sock)
802 fput(rx_sock->file); 805 fput(rx_sock->file);
806 /* Make sure no callbacks are outstanding */
807 synchronize_rcu_bh();
803 /* We do an extra flush before freeing memory, 808 /* We do an extra flush before freeing memory,
804 * since jobs can re-queue themselves. */ 809 * since jobs can re-queue themselves. */
805 vhost_net_flush(n); 810 vhost_net_flush(n);