diff options
Diffstat (limited to 'drivers/vhost/net.c')
-rw-r--r-- | drivers/vhost/net.c | 164 |
1 files changed, 141 insertions, 23 deletions
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 176aa030dc5f..8672e0538d59 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c | |||
@@ -64,8 +64,24 @@ enum { | |||
64 | VHOST_NET_VQ_MAX = 2, | 64 | VHOST_NET_VQ_MAX = 2, |
65 | }; | 65 | }; |
66 | 66 | ||
67 | struct vhost_ubuf_ref { | ||
68 | struct kref kref; | ||
69 | wait_queue_head_t wait; | ||
70 | struct vhost_virtqueue *vq; | ||
71 | }; | ||
72 | |||
67 | struct vhost_net_virtqueue { | 73 | struct vhost_net_virtqueue { |
68 | struct vhost_virtqueue vq; | 74 | struct vhost_virtqueue vq; |
75 | /* vhost zerocopy support fields below: */ | ||
76 | /* last used idx for outstanding DMA zerocopy buffers */ | ||
77 | int upend_idx; | ||
78 | /* first used idx for DMA done zerocopy buffers */ | ||
79 | int done_idx; | ||
80 | /* an array of userspace buffers info */ | ||
81 | struct ubuf_info *ubuf_info; | ||
82 | /* Reference counting for outstanding ubufs. | ||
83 | * Protected by vq mutex. Writers must also take device mutex. */ | ||
84 | struct vhost_ubuf_ref *ubufs; | ||
69 | }; | 85 | }; |
70 | 86 | ||
71 | struct vhost_net { | 87 | struct vhost_net { |
@@ -82,6 +98,88 @@ struct vhost_net { | |||
82 | bool tx_flush; | 98 | bool tx_flush; |
83 | }; | 99 | }; |
84 | 100 | ||
101 | static unsigned vhost_zcopy_mask __read_mostly; | ||
102 | |||
103 | void vhost_enable_zcopy(int vq) | ||
104 | { | ||
105 | vhost_zcopy_mask |= 0x1 << vq; | ||
106 | } | ||
107 | |||
108 | static void vhost_zerocopy_done_signal(struct kref *kref) | ||
109 | { | ||
110 | struct vhost_ubuf_ref *ubufs = container_of(kref, struct vhost_ubuf_ref, | ||
111 | kref); | ||
112 | wake_up(&ubufs->wait); | ||
113 | } | ||
114 | |||
115 | struct vhost_ubuf_ref *vhost_ubuf_alloc(struct vhost_virtqueue *vq, | ||
116 | bool zcopy) | ||
117 | { | ||
118 | struct vhost_ubuf_ref *ubufs; | ||
119 | /* No zero copy backend? Nothing to count. */ | ||
120 | if (!zcopy) | ||
121 | return NULL; | ||
122 | ubufs = kmalloc(sizeof(*ubufs), GFP_KERNEL); | ||
123 | if (!ubufs) | ||
124 | return ERR_PTR(-ENOMEM); | ||
125 | kref_init(&ubufs->kref); | ||
126 | init_waitqueue_head(&ubufs->wait); | ||
127 | ubufs->vq = vq; | ||
128 | return ubufs; | ||
129 | } | ||
130 | |||
131 | void vhost_ubuf_put(struct vhost_ubuf_ref *ubufs) | ||
132 | { | ||
133 | kref_put(&ubufs->kref, vhost_zerocopy_done_signal); | ||
134 | } | ||
135 | |||
136 | void vhost_ubuf_put_and_wait(struct vhost_ubuf_ref *ubufs) | ||
137 | { | ||
138 | kref_put(&ubufs->kref, vhost_zerocopy_done_signal); | ||
139 | wait_event(ubufs->wait, !atomic_read(&ubufs->kref.refcount)); | ||
140 | kfree(ubufs); | ||
141 | } | ||
142 | |||
143 | int vhost_net_set_ubuf_info(struct vhost_net *n) | ||
144 | { | ||
145 | bool zcopy; | ||
146 | int i; | ||
147 | |||
148 | for (i = 0; i < n->dev.nvqs; ++i) { | ||
149 | zcopy = vhost_zcopy_mask & (0x1 << i); | ||
150 | if (!zcopy) | ||
151 | continue; | ||
152 | n->vqs[i].ubuf_info = kmalloc(sizeof(*n->vqs[i].ubuf_info) * | ||
153 | UIO_MAXIOV, GFP_KERNEL); | ||
154 | if (!n->vqs[i].ubuf_info) | ||
155 | goto err; | ||
156 | } | ||
157 | return 0; | ||
158 | |||
159 | err: | ||
160 | while (i--) { | ||
161 | zcopy = vhost_zcopy_mask & (0x1 << i); | ||
162 | if (!zcopy) | ||
163 | continue; | ||
164 | kfree(n->vqs[i].ubuf_info); | ||
165 | } | ||
166 | return -ENOMEM; | ||
167 | } | ||
168 | |||
169 | void vhost_net_reset_ubuf_info(struct vhost_net *n) | ||
170 | { | ||
171 | int i; | ||
172 | |||
173 | for (i = 0; i < VHOST_NET_VQ_MAX; i++) { | ||
174 | n->vqs[i].done_idx = 0; | ||
175 | n->vqs[i].upend_idx = 0; | ||
176 | n->vqs[i].ubufs = NULL; | ||
177 | kfree(n->vqs[i].ubuf_info); | ||
178 | n->vqs[i].ubuf_info = NULL; | ||
179 | } | ||
180 | |||
181 | } | ||
182 | |||
85 | static void vhost_net_tx_packet(struct vhost_net *net) | 183 | static void vhost_net_tx_packet(struct vhost_net *net) |
86 | { | 184 | { |
87 | ++net->tx_packets; | 185 | ++net->tx_packets; |
@@ -157,10 +255,12 @@ static void copy_iovec_hdr(const struct iovec *from, struct iovec *to, | |||
157 | static int vhost_zerocopy_signal_used(struct vhost_net *net, | 255 | static int vhost_zerocopy_signal_used(struct vhost_net *net, |
158 | struct vhost_virtqueue *vq) | 256 | struct vhost_virtqueue *vq) |
159 | { | 257 | { |
258 | struct vhost_net_virtqueue *nvq = | ||
259 | container_of(vq, struct vhost_net_virtqueue, vq); | ||
160 | int i; | 260 | int i; |
161 | int j = 0; | 261 | int j = 0; |
162 | 262 | ||
163 | for (i = vq->done_idx; i != vq->upend_idx; i = (i + 1) % UIO_MAXIOV) { | 263 | for (i = nvq->done_idx; i != nvq->upend_idx; i = (i + 1) % UIO_MAXIOV) { |
164 | if (vq->heads[i].len == VHOST_DMA_FAILED_LEN) | 264 | if (vq->heads[i].len == VHOST_DMA_FAILED_LEN) |
165 | vhost_net_tx_err(net); | 265 | vhost_net_tx_err(net); |
166 | if (VHOST_DMA_IS_DONE(vq->heads[i].len)) { | 266 | if (VHOST_DMA_IS_DONE(vq->heads[i].len)) { |
@@ -172,7 +272,7 @@ static int vhost_zerocopy_signal_used(struct vhost_net *net, | |||
172 | break; | 272 | break; |
173 | } | 273 | } |
174 | if (j) | 274 | if (j) |
175 | vq->done_idx = i; | 275 | nvq->done_idx = i; |
176 | return j; | 276 | return j; |
177 | } | 277 | } |
178 | 278 | ||
@@ -203,6 +303,7 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success) | |||
203 | static void handle_tx(struct vhost_net *net) | 303 | static void handle_tx(struct vhost_net *net) |
204 | { | 304 | { |
205 | struct vhost_virtqueue *vq = &net->vqs[VHOST_NET_VQ_TX].vq; | 305 | struct vhost_virtqueue *vq = &net->vqs[VHOST_NET_VQ_TX].vq; |
306 | struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX]; | ||
206 | unsigned out, in, s; | 307 | unsigned out, in, s; |
207 | int head; | 308 | int head; |
208 | struct msghdr msg = { | 309 | struct msghdr msg = { |
@@ -229,7 +330,7 @@ static void handle_tx(struct vhost_net *net) | |||
229 | vhost_disable_notify(&net->dev, vq); | 330 | vhost_disable_notify(&net->dev, vq); |
230 | 331 | ||
231 | hdr_size = vq->vhost_hlen; | 332 | hdr_size = vq->vhost_hlen; |
232 | zcopy = vq->ubufs; | 333 | zcopy = nvq->ubufs; |
233 | 334 | ||
234 | for (;;) { | 335 | for (;;) { |
235 | /* Release DMAs done buffers first */ | 336 | /* Release DMAs done buffers first */ |
@@ -250,9 +351,10 @@ static void handle_tx(struct vhost_net *net) | |||
250 | /* If more outstanding DMAs, queue the work. | 351 | /* If more outstanding DMAs, queue the work. |
251 | * Handle upend_idx wrap around | 352 | * Handle upend_idx wrap around |
252 | */ | 353 | */ |
253 | num_pends = likely(vq->upend_idx >= vq->done_idx) ? | 354 | num_pends = likely(nvq->upend_idx >= nvq->done_idx) ? |
254 | (vq->upend_idx - vq->done_idx) : | 355 | (nvq->upend_idx - nvq->done_idx) : |
255 | (vq->upend_idx + UIO_MAXIOV - vq->done_idx); | 356 | (nvq->upend_idx + UIO_MAXIOV - |
357 | nvq->done_idx); | ||
256 | if (unlikely(num_pends > VHOST_MAX_PEND)) | 358 | if (unlikely(num_pends > VHOST_MAX_PEND)) |
257 | break; | 359 | break; |
258 | if (unlikely(vhost_enable_notify(&net->dev, vq))) { | 360 | if (unlikely(vhost_enable_notify(&net->dev, vq))) { |
@@ -278,34 +380,34 @@ static void handle_tx(struct vhost_net *net) | |||
278 | break; | 380 | break; |
279 | } | 381 | } |
280 | zcopy_used = zcopy && (len >= VHOST_GOODCOPY_LEN || | 382 | zcopy_used = zcopy && (len >= VHOST_GOODCOPY_LEN || |
281 | vq->upend_idx != vq->done_idx); | 383 | nvq->upend_idx != nvq->done_idx); |
282 | 384 | ||
283 | /* use msg_control to pass vhost zerocopy ubuf info to skb */ | 385 | /* use msg_control to pass vhost zerocopy ubuf info to skb */ |
284 | if (zcopy_used) { | 386 | if (zcopy_used) { |
285 | vq->heads[vq->upend_idx].id = head; | 387 | vq->heads[nvq->upend_idx].id = head; |
286 | if (!vhost_net_tx_select_zcopy(net) || | 388 | if (!vhost_net_tx_select_zcopy(net) || |
287 | len < VHOST_GOODCOPY_LEN) { | 389 | len < VHOST_GOODCOPY_LEN) { |
288 | /* copy don't need to wait for DMA done */ | 390 | /* copy don't need to wait for DMA done */ |
289 | vq->heads[vq->upend_idx].len = | 391 | vq->heads[nvq->upend_idx].len = |
290 | VHOST_DMA_DONE_LEN; | 392 | VHOST_DMA_DONE_LEN; |
291 | msg.msg_control = NULL; | 393 | msg.msg_control = NULL; |
292 | msg.msg_controllen = 0; | 394 | msg.msg_controllen = 0; |
293 | ubufs = NULL; | 395 | ubufs = NULL; |
294 | } else { | 396 | } else { |
295 | struct ubuf_info *ubuf; | 397 | struct ubuf_info *ubuf; |
296 | ubuf = vq->ubuf_info + vq->upend_idx; | 398 | ubuf = nvq->ubuf_info + nvq->upend_idx; |
297 | 399 | ||
298 | vq->heads[vq->upend_idx].len = | 400 | vq->heads[nvq->upend_idx].len = |
299 | VHOST_DMA_IN_PROGRESS; | 401 | VHOST_DMA_IN_PROGRESS; |
300 | ubuf->callback = vhost_zerocopy_callback; | 402 | ubuf->callback = vhost_zerocopy_callback; |
301 | ubuf->ctx = vq->ubufs; | 403 | ubuf->ctx = nvq->ubufs; |
302 | ubuf->desc = vq->upend_idx; | 404 | ubuf->desc = nvq->upend_idx; |
303 | msg.msg_control = ubuf; | 405 | msg.msg_control = ubuf; |
304 | msg.msg_controllen = sizeof(ubuf); | 406 | msg.msg_controllen = sizeof(ubuf); |
305 | ubufs = vq->ubufs; | 407 | ubufs = nvq->ubufs; |
306 | kref_get(&ubufs->kref); | 408 | kref_get(&ubufs->kref); |
307 | } | 409 | } |
308 | vq->upend_idx = (vq->upend_idx + 1) % UIO_MAXIOV; | 410 | nvq->upend_idx = (nvq->upend_idx + 1) % UIO_MAXIOV; |
309 | } | 411 | } |
310 | /* TODO: Check specific error and bomb out unless ENOBUFS? */ | 412 | /* TODO: Check specific error and bomb out unless ENOBUFS? */ |
311 | err = sock->ops->sendmsg(NULL, sock, &msg, len); | 413 | err = sock->ops->sendmsg(NULL, sock, &msg, len); |
@@ -313,8 +415,8 @@ static void handle_tx(struct vhost_net *net) | |||
313 | if (zcopy_used) { | 415 | if (zcopy_used) { |
314 | if (ubufs) | 416 | if (ubufs) |
315 | vhost_ubuf_put(ubufs); | 417 | vhost_ubuf_put(ubufs); |
316 | vq->upend_idx = ((unsigned)vq->upend_idx - 1) % | 418 | nvq->upend_idx = ((unsigned)nvq->upend_idx - 1) |
317 | UIO_MAXIOV; | 419 | % UIO_MAXIOV; |
318 | } | 420 | } |
319 | vhost_discard_vq_desc(vq, 1); | 421 | vhost_discard_vq_desc(vq, 1); |
320 | break; | 422 | break; |
@@ -564,7 +666,7 @@ static int vhost_net_open(struct inode *inode, struct file *f) | |||
564 | struct vhost_net *n = kmalloc(sizeof *n, GFP_KERNEL); | 666 | struct vhost_net *n = kmalloc(sizeof *n, GFP_KERNEL); |
565 | struct vhost_dev *dev; | 667 | struct vhost_dev *dev; |
566 | struct vhost_virtqueue **vqs; | 668 | struct vhost_virtqueue **vqs; |
567 | int r; | 669 | int r, i; |
568 | 670 | ||
569 | if (!n) | 671 | if (!n) |
570 | return -ENOMEM; | 672 | return -ENOMEM; |
@@ -579,6 +681,12 @@ static int vhost_net_open(struct inode *inode, struct file *f) | |||
579 | vqs[VHOST_NET_VQ_RX] = &n->vqs[VHOST_NET_VQ_RX].vq; | 681 | vqs[VHOST_NET_VQ_RX] = &n->vqs[VHOST_NET_VQ_RX].vq; |
580 | n->vqs[VHOST_NET_VQ_TX].vq.handle_kick = handle_tx_kick; | 682 | n->vqs[VHOST_NET_VQ_TX].vq.handle_kick = handle_tx_kick; |
581 | n->vqs[VHOST_NET_VQ_RX].vq.handle_kick = handle_rx_kick; | 683 | n->vqs[VHOST_NET_VQ_RX].vq.handle_kick = handle_rx_kick; |
684 | for (i = 0; i < VHOST_NET_VQ_MAX; i++) { | ||
685 | n->vqs[i].ubufs = NULL; | ||
686 | n->vqs[i].ubuf_info = NULL; | ||
687 | n->vqs[i].upend_idx = 0; | ||
688 | n->vqs[i].done_idx = 0; | ||
689 | } | ||
582 | r = vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX); | 690 | r = vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX); |
583 | if (r < 0) { | 691 | if (r < 0) { |
584 | kfree(n); | 692 | kfree(n); |
@@ -652,15 +760,15 @@ static void vhost_net_flush(struct vhost_net *n) | |||
652 | { | 760 | { |
653 | vhost_net_flush_vq(n, VHOST_NET_VQ_TX); | 761 | vhost_net_flush_vq(n, VHOST_NET_VQ_TX); |
654 | vhost_net_flush_vq(n, VHOST_NET_VQ_RX); | 762 | vhost_net_flush_vq(n, VHOST_NET_VQ_RX); |
655 | if (n->vqs[VHOST_NET_VQ_TX].vq.ubufs) { | 763 | if (n->vqs[VHOST_NET_VQ_TX].ubufs) { |
656 | mutex_lock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex); | 764 | mutex_lock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex); |
657 | n->tx_flush = true; | 765 | n->tx_flush = true; |
658 | mutex_unlock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex); | 766 | mutex_unlock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex); |
659 | /* Wait for all lower device DMAs done. */ | 767 | /* Wait for all lower device DMAs done. */ |
660 | vhost_ubuf_put_and_wait(n->vqs[VHOST_NET_VQ_TX].vq.ubufs); | 768 | vhost_ubuf_put_and_wait(n->vqs[VHOST_NET_VQ_TX].ubufs); |
661 | mutex_lock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex); | 769 | mutex_lock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex); |
662 | n->tx_flush = false; | 770 | n->tx_flush = false; |
663 | kref_init(&n->vqs[VHOST_NET_VQ_TX].vq.ubufs->kref); | 771 | kref_init(&n->vqs[VHOST_NET_VQ_TX].ubufs->kref); |
664 | mutex_unlock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex); | 772 | mutex_unlock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex); |
665 | } | 773 | } |
666 | } | 774 | } |
@@ -675,6 +783,7 @@ static int vhost_net_release(struct inode *inode, struct file *f) | |||
675 | vhost_net_flush(n); | 783 | vhost_net_flush(n); |
676 | vhost_dev_stop(&n->dev); | 784 | vhost_dev_stop(&n->dev); |
677 | vhost_dev_cleanup(&n->dev, false); | 785 | vhost_dev_cleanup(&n->dev, false); |
786 | vhost_net_reset_ubuf_info(n); | ||
678 | if (tx_sock) | 787 | if (tx_sock) |
679 | fput(tx_sock->file); | 788 | fput(tx_sock->file); |
680 | if (rx_sock) | 789 | if (rx_sock) |
@@ -756,6 +865,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) | |||
756 | { | 865 | { |
757 | struct socket *sock, *oldsock; | 866 | struct socket *sock, *oldsock; |
758 | struct vhost_virtqueue *vq; | 867 | struct vhost_virtqueue *vq; |
868 | struct vhost_net_virtqueue *nvq; | ||
759 | struct vhost_ubuf_ref *ubufs, *oldubufs = NULL; | 869 | struct vhost_ubuf_ref *ubufs, *oldubufs = NULL; |
760 | int r; | 870 | int r; |
761 | 871 | ||
@@ -769,6 +879,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) | |||
769 | goto err; | 879 | goto err; |
770 | } | 880 | } |
771 | vq = &n->vqs[index].vq; | 881 | vq = &n->vqs[index].vq; |
882 | nvq = &n->vqs[index]; | ||
772 | mutex_lock(&vq->mutex); | 883 | mutex_lock(&vq->mutex); |
773 | 884 | ||
774 | /* Verify that ring has been setup correctly. */ | 885 | /* Verify that ring has been setup correctly. */ |
@@ -801,8 +912,8 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd) | |||
801 | if (r) | 912 | if (r) |
802 | goto err_used; | 913 | goto err_used; |
803 | 914 | ||
804 | oldubufs = vq->ubufs; | 915 | oldubufs = nvq->ubufs; |
805 | vq->ubufs = ubufs; | 916 | nvq->ubufs = ubufs; |
806 | 917 | ||
807 | n->tx_packets = 0; | 918 | n->tx_packets = 0; |
808 | n->tx_zcopy_err = 0; | 919 | n->tx_zcopy_err = 0; |
@@ -853,6 +964,7 @@ static long vhost_net_reset_owner(struct vhost_net *n) | |||
853 | vhost_net_stop(n, &tx_sock, &rx_sock); | 964 | vhost_net_stop(n, &tx_sock, &rx_sock); |
854 | vhost_net_flush(n); | 965 | vhost_net_flush(n); |
855 | err = vhost_dev_reset_owner(&n->dev); | 966 | err = vhost_dev_reset_owner(&n->dev); |
967 | vhost_net_reset_ubuf_info(n); | ||
856 | done: | 968 | done: |
857 | mutex_unlock(&n->dev.mutex); | 969 | mutex_unlock(&n->dev.mutex); |
858 | if (tx_sock) | 970 | if (tx_sock) |
@@ -928,11 +1040,17 @@ static long vhost_net_ioctl(struct file *f, unsigned int ioctl, | |||
928 | return vhost_net_reset_owner(n); | 1040 | return vhost_net_reset_owner(n); |
929 | default: | 1041 | default: |
930 | mutex_lock(&n->dev.mutex); | 1042 | mutex_lock(&n->dev.mutex); |
1043 | if (ioctl == VHOST_SET_OWNER) { | ||
1044 | r = vhost_net_set_ubuf_info(n); | ||
1045 | if (r) | ||
1046 | goto out; | ||
1047 | } | ||
931 | r = vhost_dev_ioctl(&n->dev, ioctl, argp); | 1048 | r = vhost_dev_ioctl(&n->dev, ioctl, argp); |
932 | if (r == -ENOIOCTLCMD) | 1049 | if (r == -ENOIOCTLCMD) |
933 | r = vhost_vring_ioctl(&n->dev, ioctl, argp); | 1050 | r = vhost_vring_ioctl(&n->dev, ioctl, argp); |
934 | else | 1051 | else |
935 | vhost_net_flush(n); | 1052 | vhost_net_flush(n); |
1053 | out: | ||
936 | mutex_unlock(&n->dev.mutex); | 1054 | mutex_unlock(&n->dev.mutex); |
937 | return r; | 1055 | return r; |
938 | } | 1056 | } |