aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-05-02 16:29:14 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-05-02 16:29:14 -0400
commite95893004104054d49406fd108fefa3ddc054366 (patch)
tree9bf38e91f4767d9842ba32839eda28a1ac1a971a
parent5a148af66932c31814e263366094b5812210b501 (diff)
parent181c04a357bb791587c55a99362c2fdde2c64f18 (diff)
Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
Pull vhost cleanups and fixes from Michael Tsirkin: "Here are vhost cleanups and fixes by Asias He and myself. They affect both vhost-net and vhost-scsi devices. They also *depend* on both net-next and target-pending, where the net and target commits these changes depend on are already merged. So merging through the common vhost tree." * tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost: vhost_scsi: module rename tcm_vhost: header split up vhost: src file renames vhost: fix error handling in RESET_OWNER ioctl tcm_vhost: remove virtio-net.h dependency vhost: move per-vq net specific fields out to net tcm_vhost: document inflight ref-counting use vhost: move vhost-net zerocopy fields to net.c tcm_vhost: Wait for pending requests in vhost_scsi_flush() vhost: Allow device specific fields per vq
-rw-r--r--drivers/vhost/Kconfig10
-rw-r--r--drivers/vhost/Kconfig.tcm6
-rw-r--r--drivers/vhost/Makefile3
-rw-r--r--drivers/vhost/net.c255
-rw-r--r--drivers/vhost/scsi.c (renamed from drivers/vhost/tcm_vhost.c)255
-rw-r--r--drivers/vhost/tcm_vhost.h128
-rw-r--r--drivers/vhost/test.c9
-rw-r--r--drivers/vhost/vhost.c153
-rw-r--r--drivers/vhost/vhost.h32
-rw-r--r--include/uapi/linux/vhost.h28
10 files changed, 533 insertions, 346 deletions
diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig
index bf243177ffe1..26a64e5b8a58 100644
--- a/drivers/vhost/Kconfig
+++ b/drivers/vhost/Kconfig
@@ -9,6 +9,10 @@ config VHOST_NET
9 To compile this driver as a module, choose M here: the module will 9 To compile this driver as a module, choose M here: the module will
10 be called vhost_net. 10 be called vhost_net.
11 11
12if STAGING 12config VHOST_SCSI
13source "drivers/vhost/Kconfig.tcm" 13 tristate "VHOST_SCSI TCM fabric driver"
14endif 14 depends on TARGET_CORE && EVENTFD && m
15 default n
16 ---help---
17 Say M here to enable the vhost_scsi TCM fabric module
18 for use with virtio-scsi guests
diff --git a/drivers/vhost/Kconfig.tcm b/drivers/vhost/Kconfig.tcm
deleted file mode 100644
index 7e3aa28d999e..000000000000
--- a/drivers/vhost/Kconfig.tcm
+++ /dev/null
@@ -1,6 +0,0 @@
1config TCM_VHOST
2 tristate "TCM_VHOST fabric module"
3 depends on TARGET_CORE && EVENTFD && m
4 default n
5 ---help---
6 Say M here to enable the TCM_VHOST fabric module for use with virtio-scsi guests
diff --git a/drivers/vhost/Makefile b/drivers/vhost/Makefile
index a27b053bc9ab..ef21d5fdfa7d 100644
--- a/drivers/vhost/Makefile
+++ b/drivers/vhost/Makefile
@@ -1,4 +1,5 @@
1obj-$(CONFIG_VHOST_NET) += vhost_net.o 1obj-$(CONFIG_VHOST_NET) += vhost_net.o
2vhost_net-y := vhost.o net.o 2vhost_net-y := vhost.o net.o
3 3
4obj-$(CONFIG_TCM_VHOST) += tcm_vhost.o 4obj-$(CONFIG_VHOST_SCSI) += vhost_scsi.o
5vhost_scsi-y := scsi.o
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 87c216c1e54e..a3645bd163d8 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -64,9 +64,35 @@ enum {
64 VHOST_NET_VQ_MAX = 2, 64 VHOST_NET_VQ_MAX = 2,
65}; 65};
66 66
67struct vhost_ubuf_ref {
68 struct kref kref;
69 wait_queue_head_t wait;
70 struct vhost_virtqueue *vq;
71};
72
73struct vhost_net_virtqueue {
74 struct vhost_virtqueue vq;
75 /* hdr is used to store the virtio header.
76 * Since each iovec has >= 1 byte length, we never need more than
77 * header length entries to store the header. */
78 struct iovec hdr[sizeof(struct virtio_net_hdr_mrg_rxbuf)];
79 size_t vhost_hlen;
80 size_t sock_hlen;
81 /* vhost zerocopy support fields below: */
82 /* last used idx for outstanding DMA zerocopy buffers */
83 int upend_idx;
84 /* first used idx for DMA done zerocopy buffers */
85 int done_idx;
86 /* an array of userspace buffers info */
87 struct ubuf_info *ubuf_info;
88 /* Reference counting for outstanding ubufs.
89 * Protected by vq mutex. Writers must also take device mutex. */
90 struct vhost_ubuf_ref *ubufs;
91};
92
67struct vhost_net { 93struct vhost_net {
68 struct vhost_dev dev; 94 struct vhost_dev dev;
69 struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX]; 95 struct vhost_net_virtqueue vqs[VHOST_NET_VQ_MAX];
70 struct vhost_poll poll[VHOST_NET_VQ_MAX]; 96 struct vhost_poll poll[VHOST_NET_VQ_MAX];
71 /* Number of TX recently submitted. 97 /* Number of TX recently submitted.
72 * Protected by tx vq lock. */ 98 * Protected by tx vq lock. */
@@ -78,6 +104,90 @@ struct vhost_net {
78 bool tx_flush; 104 bool tx_flush;
79}; 105};
80 106
107static unsigned vhost_zcopy_mask __read_mostly;
108
109void vhost_enable_zcopy(int vq)
110{
111 vhost_zcopy_mask |= 0x1 << vq;
112}
113
114static void vhost_zerocopy_done_signal(struct kref *kref)
115{
116 struct vhost_ubuf_ref *ubufs = container_of(kref, struct vhost_ubuf_ref,
117 kref);
118 wake_up(&ubufs->wait);
119}
120
121struct vhost_ubuf_ref *vhost_ubuf_alloc(struct vhost_virtqueue *vq,
122 bool zcopy)
123{
124 struct vhost_ubuf_ref *ubufs;
125 /* No zero copy backend? Nothing to count. */
126 if (!zcopy)
127 return NULL;
128 ubufs = kmalloc(sizeof(*ubufs), GFP_KERNEL);
129 if (!ubufs)
130 return ERR_PTR(-ENOMEM);
131 kref_init(&ubufs->kref);
132 init_waitqueue_head(&ubufs->wait);
133 ubufs->vq = vq;
134 return ubufs;
135}
136
137void vhost_ubuf_put(struct vhost_ubuf_ref *ubufs)
138{
139 kref_put(&ubufs->kref, vhost_zerocopy_done_signal);
140}
141
142void vhost_ubuf_put_and_wait(struct vhost_ubuf_ref *ubufs)
143{
144 kref_put(&ubufs->kref, vhost_zerocopy_done_signal);
145 wait_event(ubufs->wait, !atomic_read(&ubufs->kref.refcount));
146 kfree(ubufs);
147}
148
149int vhost_net_set_ubuf_info(struct vhost_net *n)
150{
151 bool zcopy;
152 int i;
153
154 for (i = 0; i < n->dev.nvqs; ++i) {
155 zcopy = vhost_zcopy_mask & (0x1 << i);
156 if (!zcopy)
157 continue;
158 n->vqs[i].ubuf_info = kmalloc(sizeof(*n->vqs[i].ubuf_info) *
159 UIO_MAXIOV, GFP_KERNEL);
160 if (!n->vqs[i].ubuf_info)
161 goto err;
162 }
163 return 0;
164
165err:
166 while (i--) {
167 zcopy = vhost_zcopy_mask & (0x1 << i);
168 if (!zcopy)
169 continue;
170 kfree(n->vqs[i].ubuf_info);
171 }
172 return -ENOMEM;
173}
174
175void vhost_net_vq_reset(struct vhost_net *n)
176{
177 int i;
178
179 for (i = 0; i < VHOST_NET_VQ_MAX; i++) {
180 n->vqs[i].done_idx = 0;
181 n->vqs[i].upend_idx = 0;
182 n->vqs[i].ubufs = NULL;
183 kfree(n->vqs[i].ubuf_info);
184 n->vqs[i].ubuf_info = NULL;
185 n->vqs[i].vhost_hlen = 0;
186 n->vqs[i].sock_hlen = 0;
187 }
188
189}
190
81static void vhost_net_tx_packet(struct vhost_net *net) 191static void vhost_net_tx_packet(struct vhost_net *net)
82{ 192{
83 ++net->tx_packets; 193 ++net->tx_packets;
@@ -153,10 +263,12 @@ static void copy_iovec_hdr(const struct iovec *from, struct iovec *to,
153static int vhost_zerocopy_signal_used(struct vhost_net *net, 263static int vhost_zerocopy_signal_used(struct vhost_net *net,
154 struct vhost_virtqueue *vq) 264 struct vhost_virtqueue *vq)
155{ 265{
266 struct vhost_net_virtqueue *nvq =
267 container_of(vq, struct vhost_net_virtqueue, vq);
156 int i; 268 int i;
157 int j = 0; 269 int j = 0;
158 270
159 for (i = vq->done_idx; i != vq->upend_idx; i = (i + 1) % UIO_MAXIOV) { 271 for (i = nvq->done_idx; i != nvq->upend_idx; i = (i + 1) % UIO_MAXIOV) {
160 if (vq->heads[i].len == VHOST_DMA_FAILED_LEN) 272 if (vq->heads[i].len == VHOST_DMA_FAILED_LEN)
161 vhost_net_tx_err(net); 273 vhost_net_tx_err(net);
162 if (VHOST_DMA_IS_DONE(vq->heads[i].len)) { 274 if (VHOST_DMA_IS_DONE(vq->heads[i].len)) {
@@ -168,7 +280,7 @@ static int vhost_zerocopy_signal_used(struct vhost_net *net,
168 break; 280 break;
169 } 281 }
170 if (j) 282 if (j)
171 vq->done_idx = i; 283 nvq->done_idx = i;
172 return j; 284 return j;
173} 285}
174 286
@@ -198,7 +310,8 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success)
198 * read-size critical section for our kind of RCU. */ 310 * read-size critical section for our kind of RCU. */
199static void handle_tx(struct vhost_net *net) 311static void handle_tx(struct vhost_net *net)
200{ 312{
201 struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_TX]; 313 struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
314 struct vhost_virtqueue *vq = &nvq->vq;
202 unsigned out, in, s; 315 unsigned out, in, s;
203 int head; 316 int head;
204 struct msghdr msg = { 317 struct msghdr msg = {
@@ -224,8 +337,8 @@ static void handle_tx(struct vhost_net *net)
224 mutex_lock(&vq->mutex); 337 mutex_lock(&vq->mutex);
225 vhost_disable_notify(&net->dev, vq); 338 vhost_disable_notify(&net->dev, vq);
226 339
227 hdr_size = vq->vhost_hlen; 340 hdr_size = nvq->vhost_hlen;
228 zcopy = vq->ubufs; 341 zcopy = nvq->ubufs;
229 342
230 for (;;) { 343 for (;;) {
231 /* Release DMAs done buffers first */ 344 /* Release DMAs done buffers first */
@@ -246,9 +359,10 @@ static void handle_tx(struct vhost_net *net)
246 /* If more outstanding DMAs, queue the work. 359 /* If more outstanding DMAs, queue the work.
247 * Handle upend_idx wrap around 360 * Handle upend_idx wrap around
248 */ 361 */
249 num_pends = likely(vq->upend_idx >= vq->done_idx) ? 362 num_pends = likely(nvq->upend_idx >= nvq->done_idx) ?
250 (vq->upend_idx - vq->done_idx) : 363 (nvq->upend_idx - nvq->done_idx) :
251 (vq->upend_idx + UIO_MAXIOV - vq->done_idx); 364 (nvq->upend_idx + UIO_MAXIOV -
365 nvq->done_idx);
252 if (unlikely(num_pends > VHOST_MAX_PEND)) 366 if (unlikely(num_pends > VHOST_MAX_PEND))
253 break; 367 break;
254 if (unlikely(vhost_enable_notify(&net->dev, vq))) { 368 if (unlikely(vhost_enable_notify(&net->dev, vq))) {
@@ -263,45 +377,45 @@ static void handle_tx(struct vhost_net *net)
263 break; 377 break;
264 } 378 }
265 /* Skip header. TODO: support TSO. */ 379 /* Skip header. TODO: support TSO. */
266 s = move_iovec_hdr(vq->iov, vq->hdr, hdr_size, out); 380 s = move_iovec_hdr(vq->iov, nvq->hdr, hdr_size, out);
267 msg.msg_iovlen = out; 381 msg.msg_iovlen = out;
268 len = iov_length(vq->iov, out); 382 len = iov_length(vq->iov, out);
269 /* Sanity check */ 383 /* Sanity check */
270 if (!len) { 384 if (!len) {
271 vq_err(vq, "Unexpected header len for TX: " 385 vq_err(vq, "Unexpected header len for TX: "
272 "%zd expected %zd\n", 386 "%zd expected %zd\n",
273 iov_length(vq->hdr, s), hdr_size); 387 iov_length(nvq->hdr, s), hdr_size);
274 break; 388 break;
275 } 389 }
276 zcopy_used = zcopy && (len >= VHOST_GOODCOPY_LEN || 390 zcopy_used = zcopy && (len >= VHOST_GOODCOPY_LEN ||
277 vq->upend_idx != vq->done_idx); 391 nvq->upend_idx != nvq->done_idx);
278 392
279 /* use msg_control to pass vhost zerocopy ubuf info to skb */ 393 /* use msg_control to pass vhost zerocopy ubuf info to skb */
280 if (zcopy_used) { 394 if (zcopy_used) {
281 vq->heads[vq->upend_idx].id = head; 395 vq->heads[nvq->upend_idx].id = head;
282 if (!vhost_net_tx_select_zcopy(net) || 396 if (!vhost_net_tx_select_zcopy(net) ||
283 len < VHOST_GOODCOPY_LEN) { 397 len < VHOST_GOODCOPY_LEN) {
284 /* copy don't need to wait for DMA done */ 398 /* copy don't need to wait for DMA done */
285 vq->heads[vq->upend_idx].len = 399 vq->heads[nvq->upend_idx].len =
286 VHOST_DMA_DONE_LEN; 400 VHOST_DMA_DONE_LEN;
287 msg.msg_control = NULL; 401 msg.msg_control = NULL;
288 msg.msg_controllen = 0; 402 msg.msg_controllen = 0;
289 ubufs = NULL; 403 ubufs = NULL;
290 } else { 404 } else {
291 struct ubuf_info *ubuf; 405 struct ubuf_info *ubuf;
292 ubuf = vq->ubuf_info + vq->upend_idx; 406 ubuf = nvq->ubuf_info + nvq->upend_idx;
293 407
294 vq->heads[vq->upend_idx].len = 408 vq->heads[nvq->upend_idx].len =
295 VHOST_DMA_IN_PROGRESS; 409 VHOST_DMA_IN_PROGRESS;
296 ubuf->callback = vhost_zerocopy_callback; 410 ubuf->callback = vhost_zerocopy_callback;
297 ubuf->ctx = vq->ubufs; 411 ubuf->ctx = nvq->ubufs;
298 ubuf->desc = vq->upend_idx; 412 ubuf->desc = nvq->upend_idx;
299 msg.msg_control = ubuf; 413 msg.msg_control = ubuf;
300 msg.msg_controllen = sizeof(ubuf); 414 msg.msg_controllen = sizeof(ubuf);
301 ubufs = vq->ubufs; 415 ubufs = nvq->ubufs;
302 kref_get(&ubufs->kref); 416 kref_get(&ubufs->kref);
303 } 417 }
304 vq->upend_idx = (vq->upend_idx + 1) % UIO_MAXIOV; 418 nvq->upend_idx = (nvq->upend_idx + 1) % UIO_MAXIOV;
305 } 419 }
306 /* TODO: Check specific error and bomb out unless ENOBUFS? */ 420 /* TODO: Check specific error and bomb out unless ENOBUFS? */
307 err = sock->ops->sendmsg(NULL, sock, &msg, len); 421 err = sock->ops->sendmsg(NULL, sock, &msg, len);
@@ -309,8 +423,8 @@ static void handle_tx(struct vhost_net *net)
309 if (zcopy_used) { 423 if (zcopy_used) {
310 if (ubufs) 424 if (ubufs)
311 vhost_ubuf_put(ubufs); 425 vhost_ubuf_put(ubufs);
312 vq->upend_idx = ((unsigned)vq->upend_idx - 1) % 426 nvq->upend_idx = ((unsigned)nvq->upend_idx - 1)
313 UIO_MAXIOV; 427 % UIO_MAXIOV;
314 } 428 }
315 vhost_discard_vq_desc(vq, 1); 429 vhost_discard_vq_desc(vq, 1);
316 break; 430 break;
@@ -417,7 +531,8 @@ err:
417 * read-size critical section for our kind of RCU. */ 531 * read-size critical section for our kind of RCU. */
418static void handle_rx(struct vhost_net *net) 532static void handle_rx(struct vhost_net *net)
419{ 533{
420 struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX]; 534 struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_RX];
535 struct vhost_virtqueue *vq = &nvq->vq;
421 unsigned uninitialized_var(in), log; 536 unsigned uninitialized_var(in), log;
422 struct vhost_log *vq_log; 537 struct vhost_log *vq_log;
423 struct msghdr msg = { 538 struct msghdr msg = {
@@ -445,8 +560,8 @@ static void handle_rx(struct vhost_net *net)
445 560
446 mutex_lock(&vq->mutex); 561 mutex_lock(&vq->mutex);
447 vhost_disable_notify(&net->dev, vq); 562 vhost_disable_notify(&net->dev, vq);
448 vhost_hlen = vq->vhost_hlen; 563 vhost_hlen = nvq->vhost_hlen;
449 sock_hlen = vq->sock_hlen; 564 sock_hlen = nvq->sock_hlen;
450 565
451 vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ? 566 vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ?
452 vq->log : NULL; 567 vq->log : NULL;
@@ -476,11 +591,11 @@ static void handle_rx(struct vhost_net *net)
476 /* We don't need to be notified again. */ 591 /* We don't need to be notified again. */
477 if (unlikely((vhost_hlen))) 592 if (unlikely((vhost_hlen)))
478 /* Skip header. TODO: support TSO. */ 593 /* Skip header. TODO: support TSO. */
479 move_iovec_hdr(vq->iov, vq->hdr, vhost_hlen, in); 594 move_iovec_hdr(vq->iov, nvq->hdr, vhost_hlen, in);
480 else 595 else
481 /* Copy the header for use in VIRTIO_NET_F_MRG_RXBUF: 596 /* Copy the header for use in VIRTIO_NET_F_MRG_RXBUF:
482 * needed because recvmsg can modify msg_iov. */ 597 * needed because recvmsg can modify msg_iov. */
483 copy_iovec_hdr(vq->iov, vq->hdr, sock_hlen, in); 598 copy_iovec_hdr(vq->iov, nvq->hdr, sock_hlen, in);
484 msg.msg_iovlen = in; 599 msg.msg_iovlen = in;
485 err = sock->ops->recvmsg(NULL, sock, &msg, 600 err = sock->ops->recvmsg(NULL, sock, &msg,
486 sock_len, MSG_DONTWAIT | MSG_TRUNC); 601 sock_len, MSG_DONTWAIT | MSG_TRUNC);
@@ -494,7 +609,7 @@ static void handle_rx(struct vhost_net *net)
494 continue; 609 continue;
495 } 610 }
496 if (unlikely(vhost_hlen) && 611 if (unlikely(vhost_hlen) &&
497 memcpy_toiovecend(vq->hdr, (unsigned char *)&hdr, 0, 612 memcpy_toiovecend(nvq->hdr, (unsigned char *)&hdr, 0,
498 vhost_hlen)) { 613 vhost_hlen)) {
499 vq_err(vq, "Unable to write vnet_hdr at addr %p\n", 614 vq_err(vq, "Unable to write vnet_hdr at addr %p\n",
500 vq->iov->iov_base); 615 vq->iov->iov_base);
@@ -502,7 +617,7 @@ static void handle_rx(struct vhost_net *net)
502 } 617 }
503 /* TODO: Should check and handle checksum. */ 618 /* TODO: Should check and handle checksum. */
504 if (likely(mergeable) && 619 if (likely(mergeable) &&
505 memcpy_toiovecend(vq->hdr, (unsigned char *)&headcount, 620 memcpy_toiovecend(nvq->hdr, (unsigned char *)&headcount,
506 offsetof(typeof(hdr), num_buffers), 621 offsetof(typeof(hdr), num_buffers),
507 sizeof hdr.num_buffers)) { 622 sizeof hdr.num_buffers)) {
508 vq_err(vq, "Failed num_buffers write"); 623 vq_err(vq, "Failed num_buffers write");
@@ -559,17 +674,34 @@ static int vhost_net_open(struct inode *inode, struct file *f)
559{ 674{
560 struct vhost_net *n = kmalloc(sizeof *n, GFP_KERNEL); 675 struct vhost_net *n = kmalloc(sizeof *n, GFP_KERNEL);
561 struct vhost_dev *dev; 676 struct vhost_dev *dev;
562 int r; 677 struct vhost_virtqueue **vqs;
678 int r, i;
563 679
564 if (!n) 680 if (!n)
565 return -ENOMEM; 681 return -ENOMEM;
682 vqs = kmalloc(VHOST_NET_VQ_MAX * sizeof(*vqs), GFP_KERNEL);
683 if (!vqs) {
684 kfree(n);
685 return -ENOMEM;
686 }
566 687
567 dev = &n->dev; 688 dev = &n->dev;
568 n->vqs[VHOST_NET_VQ_TX].handle_kick = handle_tx_kick; 689 vqs[VHOST_NET_VQ_TX] = &n->vqs[VHOST_NET_VQ_TX].vq;
569 n->vqs[VHOST_NET_VQ_RX].handle_kick = handle_rx_kick; 690 vqs[VHOST_NET_VQ_RX] = &n->vqs[VHOST_NET_VQ_RX].vq;
570 r = vhost_dev_init(dev, n->vqs, VHOST_NET_VQ_MAX); 691 n->vqs[VHOST_NET_VQ_TX].vq.handle_kick = handle_tx_kick;
692 n->vqs[VHOST_NET_VQ_RX].vq.handle_kick = handle_rx_kick;
693 for (i = 0; i < VHOST_NET_VQ_MAX; i++) {
694 n->vqs[i].ubufs = NULL;
695 n->vqs[i].ubuf_info = NULL;
696 n->vqs[i].upend_idx = 0;
697 n->vqs[i].done_idx = 0;
698 n->vqs[i].vhost_hlen = 0;
699 n->vqs[i].sock_hlen = 0;
700 }
701 r = vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX);
571 if (r < 0) { 702 if (r < 0) {
572 kfree(n); 703 kfree(n);
704 kfree(vqs);
573 return r; 705 return r;
574 } 706 }
575 707
@@ -584,7 +716,9 @@ static int vhost_net_open(struct inode *inode, struct file *f)
584static void vhost_net_disable_vq(struct vhost_net *n, 716static void vhost_net_disable_vq(struct vhost_net *n,
585 struct vhost_virtqueue *vq) 717 struct vhost_virtqueue *vq)
586{ 718{
587 struct vhost_poll *poll = n->poll + (vq - n->vqs); 719 struct vhost_net_virtqueue *nvq =
720 container_of(vq, struct vhost_net_virtqueue, vq);
721 struct vhost_poll *poll = n->poll + (nvq - n->vqs);
588 if (!vq->private_data) 722 if (!vq->private_data)
589 return; 723 return;
590 vhost_poll_stop(poll); 724 vhost_poll_stop(poll);
@@ -593,7 +727,9 @@ static void vhost_net_disable_vq(struct vhost_net *n,
593static int vhost_net_enable_vq(struct vhost_net *n, 727static int vhost_net_enable_vq(struct vhost_net *n,
594 struct vhost_virtqueue *vq) 728 struct vhost_virtqueue *vq)
595{ 729{
596 struct vhost_poll *poll = n->poll + (vq - n->vqs); 730 struct vhost_net_virtqueue *nvq =
731 container_of(vq, struct vhost_net_virtqueue, vq);
732 struct vhost_poll *poll = n->poll + (nvq - n->vqs);
597 struct socket *sock; 733 struct socket *sock;
598 734
599 sock = rcu_dereference_protected(vq->private_data, 735 sock = rcu_dereference_protected(vq->private_data,
@@ -621,30 +757,30 @@ static struct socket *vhost_net_stop_vq(struct vhost_net *n,
621static void vhost_net_stop(struct vhost_net *n, struct socket **tx_sock, 757static void vhost_net_stop(struct vhost_net *n, struct socket **tx_sock,
622 struct socket **rx_sock) 758 struct socket **rx_sock)
623{ 759{
624 *tx_sock = vhost_net_stop_vq(n, n->vqs + VHOST_NET_VQ_TX); 760 *tx_sock = vhost_net_stop_vq(n, &n->vqs[VHOST_NET_VQ_TX].vq);
625 *rx_sock = vhost_net_stop_vq(n, n->vqs + VHOST_NET_VQ_RX); 761 *rx_sock = vhost_net_stop_vq(n, &n->vqs[VHOST_NET_VQ_RX].vq);
626} 762}
627 763
628static void vhost_net_flush_vq(struct vhost_net *n, int index) 764static void vhost_net_flush_vq(struct vhost_net *n, int index)
629{ 765{
630 vhost_poll_flush(n->poll + index); 766 vhost_poll_flush(n->poll + index);
631 vhost_poll_flush(&n->dev.vqs[index].poll); 767 vhost_poll_flush(&n->vqs[index].vq.poll);
632} 768}
633 769
634static void vhost_net_flush(struct vhost_net *n) 770static void vhost_net_flush(struct vhost_net *n)
635{ 771{
636 vhost_net_flush_vq(n, VHOST_NET_VQ_TX); 772 vhost_net_flush_vq(n, VHOST_NET_VQ_TX);
637 vhost_net_flush_vq(n, VHOST_NET_VQ_RX); 773 vhost_net_flush_vq(n, VHOST_NET_VQ_RX);
638 if (n->dev.vqs[VHOST_NET_VQ_TX].ubufs) { 774 if (n->vqs[VHOST_NET_VQ_TX].ubufs) {
639 mutex_lock(&n->dev.vqs[VHOST_NET_VQ_TX].mutex); 775 mutex_lock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex);
640 n->tx_flush = true; 776 n->tx_flush = true;
641 mutex_unlock(&n->dev.vqs[VHOST_NET_VQ_TX].mutex); 777 mutex_unlock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex);
642 /* Wait for all lower device DMAs done. */ 778 /* Wait for all lower device DMAs done. */
643 vhost_ubuf_put_and_wait(n->dev.vqs[VHOST_NET_VQ_TX].ubufs); 779 vhost_ubuf_put_and_wait(n->vqs[VHOST_NET_VQ_TX].ubufs);
644 mutex_lock(&n->dev.vqs[VHOST_NET_VQ_TX].mutex); 780 mutex_lock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex);
645 n->tx_flush = false; 781 n->tx_flush = false;
646 kref_init(&n->dev.vqs[VHOST_NET_VQ_TX].ubufs->kref); 782 kref_init(&n->vqs[VHOST_NET_VQ_TX].ubufs->kref);
647 mutex_unlock(&n->dev.vqs[VHOST_NET_VQ_TX].mutex); 783 mutex_unlock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex);
648 } 784 }
649} 785}
650 786
@@ -658,6 +794,7 @@ static int vhost_net_release(struct inode *inode, struct file *f)
658 vhost_net_flush(n); 794 vhost_net_flush(n);
659 vhost_dev_stop(&n->dev); 795 vhost_dev_stop(&n->dev);
660 vhost_dev_cleanup(&n->dev, false); 796 vhost_dev_cleanup(&n->dev, false);
797 vhost_net_vq_reset(n);
661 if (tx_sock) 798 if (tx_sock)
662 fput(tx_sock->file); 799 fput(tx_sock->file);
663 if (rx_sock) 800 if (rx_sock)
@@ -665,6 +802,7 @@ static int vhost_net_release(struct inode *inode, struct file *f)
665 /* We do an extra flush before freeing memory, 802 /* We do an extra flush before freeing memory,
666 * since jobs can re-queue themselves. */ 803 * since jobs can re-queue themselves. */
667 vhost_net_flush(n); 804 vhost_net_flush(n);
805 kfree(n->dev.vqs);
668 kfree(n); 806 kfree(n);
669 return 0; 807 return 0;
670} 808}
@@ -738,6 +876,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
738{ 876{
739 struct socket *sock, *oldsock; 877 struct socket *sock, *oldsock;
740 struct vhost_virtqueue *vq; 878 struct vhost_virtqueue *vq;
879 struct vhost_net_virtqueue *nvq;
741 struct vhost_ubuf_ref *ubufs, *oldubufs = NULL; 880 struct vhost_ubuf_ref *ubufs, *oldubufs = NULL;
742 int r; 881 int r;
743 882
@@ -750,7 +889,8 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
750 r = -ENOBUFS; 889 r = -ENOBUFS;
751 goto err; 890 goto err;
752 } 891 }
753 vq = n->vqs + index; 892 vq = &n->vqs[index].vq;
893 nvq = &n->vqs[index];
754 mutex_lock(&vq->mutex); 894 mutex_lock(&vq->mutex);
755 895
756 /* Verify that ring has been setup correctly. */ 896 /* Verify that ring has been setup correctly. */
@@ -783,8 +923,8 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
783 if (r) 923 if (r)
784 goto err_used; 924 goto err_used;
785 925
786 oldubufs = vq->ubufs; 926 oldubufs = nvq->ubufs;
787 vq->ubufs = ubufs; 927 nvq->ubufs = ubufs;
788 928
789 n->tx_packets = 0; 929 n->tx_packets = 0;
790 n->tx_zcopy_err = 0; 930 n->tx_zcopy_err = 0;
@@ -827,14 +967,21 @@ static long vhost_net_reset_owner(struct vhost_net *n)
827 struct socket *tx_sock = NULL; 967 struct socket *tx_sock = NULL;
828 struct socket *rx_sock = NULL; 968 struct socket *rx_sock = NULL;
829 long err; 969 long err;
970 struct vhost_memory *memory;
830 971
831 mutex_lock(&n->dev.mutex); 972 mutex_lock(&n->dev.mutex);
832 err = vhost_dev_check_owner(&n->dev); 973 err = vhost_dev_check_owner(&n->dev);
833 if (err) 974 if (err)
834 goto done; 975 goto done;
976 memory = vhost_dev_reset_owner_prepare();
977 if (!memory) {
978 err = -ENOMEM;
979 goto done;
980 }
835 vhost_net_stop(n, &tx_sock, &rx_sock); 981 vhost_net_stop(n, &tx_sock, &rx_sock);
836 vhost_net_flush(n); 982 vhost_net_flush(n);
837 err = vhost_dev_reset_owner(&n->dev); 983 vhost_dev_reset_owner(&n->dev, memory);
984 vhost_net_vq_reset(n);
838done: 985done:
839 mutex_unlock(&n->dev.mutex); 986 mutex_unlock(&n->dev.mutex);
840 if (tx_sock) 987 if (tx_sock)
@@ -870,10 +1017,10 @@ static int vhost_net_set_features(struct vhost_net *n, u64 features)
870 n->dev.acked_features = features; 1017 n->dev.acked_features = features;
871 smp_wmb(); 1018 smp_wmb();
872 for (i = 0; i < VHOST_NET_VQ_MAX; ++i) { 1019 for (i = 0; i < VHOST_NET_VQ_MAX; ++i) {
873 mutex_lock(&n->vqs[i].mutex); 1020 mutex_lock(&n->vqs[i].vq.mutex);
874 n->vqs[i].vhost_hlen = vhost_hlen; 1021 n->vqs[i].vhost_hlen = vhost_hlen;
875 n->vqs[i].sock_hlen = sock_hlen; 1022 n->vqs[i].sock_hlen = sock_hlen;
876 mutex_unlock(&n->vqs[i].mutex); 1023 mutex_unlock(&n->vqs[i].vq.mutex);
877 } 1024 }
878 vhost_net_flush(n); 1025 vhost_net_flush(n);
879 mutex_unlock(&n->dev.mutex); 1026 mutex_unlock(&n->dev.mutex);
@@ -910,11 +1057,17 @@ static long vhost_net_ioctl(struct file *f, unsigned int ioctl,
910 return vhost_net_reset_owner(n); 1057 return vhost_net_reset_owner(n);
911 default: 1058 default:
912 mutex_lock(&n->dev.mutex); 1059 mutex_lock(&n->dev.mutex);
1060 if (ioctl == VHOST_SET_OWNER) {
1061 r = vhost_net_set_ubuf_info(n);
1062 if (r)
1063 goto out;
1064 }
913 r = vhost_dev_ioctl(&n->dev, ioctl, argp); 1065 r = vhost_dev_ioctl(&n->dev, ioctl, argp);
914 if (r == -ENOIOCTLCMD) 1066 if (r == -ENOIOCTLCMD)
915 r = vhost_vring_ioctl(&n->dev, ioctl, argp); 1067 r = vhost_vring_ioctl(&n->dev, ioctl, argp);
916 else 1068 else
917 vhost_net_flush(n); 1069 vhost_net_flush(n);
1070out:
918 mutex_unlock(&n->dev.mutex); 1071 mutex_unlock(&n->dev.mutex);
919 return r; 1072 return r;
920 } 1073 }
diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/scsi.c
index 1677238d281f..5179f7aa1b0b 100644
--- a/drivers/vhost/tcm_vhost.c
+++ b/drivers/vhost/scsi.c
@@ -45,14 +45,116 @@
45#include <target/target_core_configfs.h> 45#include <target/target_core_configfs.h>
46#include <target/configfs_macros.h> 46#include <target/configfs_macros.h>
47#include <linux/vhost.h> 47#include <linux/vhost.h>
48#include <linux/virtio_net.h> /* TODO vhost.h currently depends on this */
49#include <linux/virtio_scsi.h> 48#include <linux/virtio_scsi.h>
50#include <linux/llist.h> 49#include <linux/llist.h>
51#include <linux/bitmap.h> 50#include <linux/bitmap.h>
52 51
53#include "vhost.c" 52#include "vhost.c"
54#include "vhost.h" 53#include "vhost.h"
55#include "tcm_vhost.h" 54
55#define TCM_VHOST_VERSION "v0.1"
56#define TCM_VHOST_NAMELEN 256
57#define TCM_VHOST_MAX_CDB_SIZE 32
58
59struct vhost_scsi_inflight {
60 /* Wait for the flush operation to finish */
61 struct completion comp;
62 /* Refcount for the inflight reqs */
63 struct kref kref;
64};
65
66struct tcm_vhost_cmd {
67 /* Descriptor from vhost_get_vq_desc() for virt_queue segment */
68 int tvc_vq_desc;
69 /* virtio-scsi initiator task attribute */
70 int tvc_task_attr;
71 /* virtio-scsi initiator data direction */
72 enum dma_data_direction tvc_data_direction;
73 /* Expected data transfer length from virtio-scsi header */
74 u32 tvc_exp_data_len;
75 /* The Tag from include/linux/virtio_scsi.h:struct virtio_scsi_cmd_req */
76 u64 tvc_tag;
77 /* The number of scatterlists associated with this cmd */
78 u32 tvc_sgl_count;
79 /* Saved unpacked SCSI LUN for tcm_vhost_submission_work() */
80 u32 tvc_lun;
81 /* Pointer to the SGL formatted memory from virtio-scsi */
82 struct scatterlist *tvc_sgl;
83 /* Pointer to response */
84 struct virtio_scsi_cmd_resp __user *tvc_resp;
85 /* Pointer to vhost_scsi for our device */
86 struct vhost_scsi *tvc_vhost;
87 /* Pointer to vhost_virtqueue for the cmd */
88 struct vhost_virtqueue *tvc_vq;
89 /* Pointer to vhost nexus memory */
90 struct tcm_vhost_nexus *tvc_nexus;
91 /* The TCM I/O descriptor that is accessed via container_of() */
92 struct se_cmd tvc_se_cmd;
93 /* work item used for cmwq dispatch to tcm_vhost_submission_work() */
94 struct work_struct work;
95 /* Copy of the incoming SCSI command descriptor block (CDB) */
96 unsigned char tvc_cdb[TCM_VHOST_MAX_CDB_SIZE];
97 /* Sense buffer that will be mapped into outgoing status */
98 unsigned char tvc_sense_buf[TRANSPORT_SENSE_BUFFER];
99 /* Completed commands list, serviced from vhost worker thread */
100 struct llist_node tvc_completion_list;
101 /* Used to track inflight cmd */
102 struct vhost_scsi_inflight *inflight;
103};
104
105struct tcm_vhost_nexus {
106 /* Pointer to TCM session for I_T Nexus */
107 struct se_session *tvn_se_sess;
108};
109
110struct tcm_vhost_nacl {
111 /* Binary World Wide unique Port Name for Vhost Initiator port */
112 u64 iport_wwpn;
113 /* ASCII formatted WWPN for Sas Initiator port */
114 char iport_name[TCM_VHOST_NAMELEN];
115 /* Returned by tcm_vhost_make_nodeacl() */
116 struct se_node_acl se_node_acl;
117};
118
119struct vhost_scsi;
120struct tcm_vhost_tpg {
121 /* Vhost port target portal group tag for TCM */
122 u16 tport_tpgt;
123 /* Used to track number of TPG Port/Lun Links wrt to explict I_T Nexus shutdown */
124 int tv_tpg_port_count;
125 /* Used for vhost_scsi device reference to tpg_nexus, protected by tv_tpg_mutex */
126 int tv_tpg_vhost_count;
127 /* list for tcm_vhost_list */
128 struct list_head tv_tpg_list;
129 /* Used to protect access for tpg_nexus */
130 struct mutex tv_tpg_mutex;
131 /* Pointer to the TCM VHost I_T Nexus for this TPG endpoint */
132 struct tcm_vhost_nexus *tpg_nexus;
133 /* Pointer back to tcm_vhost_tport */
134 struct tcm_vhost_tport *tport;
135 /* Returned by tcm_vhost_make_tpg() */
136 struct se_portal_group se_tpg;
137 /* Pointer back to vhost_scsi, protected by tv_tpg_mutex */
138 struct vhost_scsi *vhost_scsi;
139};
140
141struct tcm_vhost_tport {
142 /* SCSI protocol the tport is providing */
143 u8 tport_proto_id;
144 /* Binary World Wide unique Port Name for Vhost Target port */
145 u64 tport_wwpn;
146 /* ASCII formatted WWPN for Vhost Target port */
147 char tport_name[TCM_VHOST_NAMELEN];
148 /* Returned by tcm_vhost_make_tport() */
149 struct se_wwn tport_wwn;
150};
151
152struct tcm_vhost_evt {
153 /* event to be sent to guest */
154 struct virtio_scsi_event event;
155 /* event list, serviced from vhost worker thread */
156 struct llist_node list;
157};
56 158
57enum { 159enum {
58 VHOST_SCSI_VQ_CTL = 0, 160 VHOST_SCSI_VQ_CTL = 0,
@@ -74,13 +176,28 @@ enum {
74#define VHOST_SCSI_MAX_VQ 128 176#define VHOST_SCSI_MAX_VQ 128
75#define VHOST_SCSI_MAX_EVENT 128 177#define VHOST_SCSI_MAX_EVENT 128
76 178
179struct vhost_scsi_virtqueue {
180 struct vhost_virtqueue vq;
181 /*
182 * Reference counting for inflight reqs, used for flush operation. At
183 * each time, one reference tracks new commands submitted, while we
184 * wait for another one to reach 0.
185 */
186 struct vhost_scsi_inflight inflights[2];
187 /*
188 * Indicate current inflight in use, protected by vq->mutex.
189 * Writers must also take dev mutex and flush under it.
190 */
191 int inflight_idx;
192};
193
77struct vhost_scsi { 194struct vhost_scsi {
78 /* Protected by vhost_scsi->dev.mutex */ 195 /* Protected by vhost_scsi->dev.mutex */
79 struct tcm_vhost_tpg **vs_tpg; 196 struct tcm_vhost_tpg **vs_tpg;
80 char vs_vhost_wwpn[TRANSPORT_IQN_LEN]; 197 char vs_vhost_wwpn[TRANSPORT_IQN_LEN];
81 198
82 struct vhost_dev dev; 199 struct vhost_dev dev;
83 struct vhost_virtqueue vqs[VHOST_SCSI_MAX_VQ]; 200 struct vhost_scsi_virtqueue vqs[VHOST_SCSI_MAX_VQ];
84 201
85 struct vhost_work vs_completion_work; /* cmd completion work item */ 202 struct vhost_work vs_completion_work; /* cmd completion work item */
86 struct llist_head vs_completion_list; /* cmd completion queue */ 203 struct llist_head vs_completion_list; /* cmd completion queue */
@@ -107,6 +224,59 @@ static int iov_num_pages(struct iovec *iov)
107 ((unsigned long)iov->iov_base & PAGE_MASK)) >> PAGE_SHIFT; 224 ((unsigned long)iov->iov_base & PAGE_MASK)) >> PAGE_SHIFT;
108} 225}
109 226
227void tcm_vhost_done_inflight(struct kref *kref)
228{
229 struct vhost_scsi_inflight *inflight;
230
231 inflight = container_of(kref, struct vhost_scsi_inflight, kref);
232 complete(&inflight->comp);
233}
234
235static void tcm_vhost_init_inflight(struct vhost_scsi *vs,
236 struct vhost_scsi_inflight *old_inflight[])
237{
238 struct vhost_scsi_inflight *new_inflight;
239 struct vhost_virtqueue *vq;
240 int idx, i;
241
242 for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
243 vq = &vs->vqs[i].vq;
244
245 mutex_lock(&vq->mutex);
246
247 /* store old infight */
248 idx = vs->vqs[i].inflight_idx;
249 if (old_inflight)
250 old_inflight[i] = &vs->vqs[i].inflights[idx];
251
252 /* setup new infight */
253 vs->vqs[i].inflight_idx = idx ^ 1;
254 new_inflight = &vs->vqs[i].inflights[idx ^ 1];
255 kref_init(&new_inflight->kref);
256 init_completion(&new_inflight->comp);
257
258 mutex_unlock(&vq->mutex);
259 }
260}
261
262static struct vhost_scsi_inflight *
263tcm_vhost_get_inflight(struct vhost_virtqueue *vq)
264{
265 struct vhost_scsi_inflight *inflight;
266 struct vhost_scsi_virtqueue *svq;
267
268 svq = container_of(vq, struct vhost_scsi_virtqueue, vq);
269 inflight = &svq->inflights[svq->inflight_idx];
270 kref_get(&inflight->kref);
271
272 return inflight;
273}
274
275static void tcm_vhost_put_inflight(struct vhost_scsi_inflight *inflight)
276{
277 kref_put(&inflight->kref, tcm_vhost_done_inflight);
278}
279
110static int tcm_vhost_check_true(struct se_portal_group *se_tpg) 280static int tcm_vhost_check_true(struct se_portal_group *se_tpg)
111{ 281{
112 return 1; 282 return 1;
@@ -366,7 +536,7 @@ static void tcm_vhost_free_evt(struct vhost_scsi *vs, struct tcm_vhost_evt *evt)
366static struct tcm_vhost_evt *tcm_vhost_allocate_evt(struct vhost_scsi *vs, 536static struct tcm_vhost_evt *tcm_vhost_allocate_evt(struct vhost_scsi *vs,
367 u32 event, u32 reason) 537 u32 event, u32 reason)
368{ 538{
369 struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT]; 539 struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq;
370 struct tcm_vhost_evt *evt; 540 struct tcm_vhost_evt *evt;
371 541
372 if (vs->vs_events_nr > VHOST_SCSI_MAX_EVENT) { 542 if (vs->vs_events_nr > VHOST_SCSI_MAX_EVENT) {
@@ -403,13 +573,15 @@ static void vhost_scsi_free_cmd(struct tcm_vhost_cmd *tv_cmd)
403 kfree(tv_cmd->tvc_sgl); 573 kfree(tv_cmd->tvc_sgl);
404 } 574 }
405 575
576 tcm_vhost_put_inflight(tv_cmd->inflight);
577
406 kfree(tv_cmd); 578 kfree(tv_cmd);
407} 579}
408 580
409static void tcm_vhost_do_evt_work(struct vhost_scsi *vs, 581static void tcm_vhost_do_evt_work(struct vhost_scsi *vs,
410 struct tcm_vhost_evt *evt) 582 struct tcm_vhost_evt *evt)
411{ 583{
412 struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT]; 584 struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq;
413 struct virtio_scsi_event *event = &evt->event; 585 struct virtio_scsi_event *event = &evt->event;
414 struct virtio_scsi_event __user *eventp; 586 struct virtio_scsi_event __user *eventp;
415 unsigned out, in; 587 unsigned out, in;
@@ -460,7 +632,7 @@ static void tcm_vhost_evt_work(struct vhost_work *work)
460{ 632{
461 struct vhost_scsi *vs = container_of(work, struct vhost_scsi, 633 struct vhost_scsi *vs = container_of(work, struct vhost_scsi,
462 vs_event_work); 634 vs_event_work);
463 struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT]; 635 struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq;
464 struct tcm_vhost_evt *evt; 636 struct tcm_vhost_evt *evt;
465 struct llist_node *llnode; 637 struct llist_node *llnode;
466 638
@@ -511,8 +683,10 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
511 v_rsp.sense_len); 683 v_rsp.sense_len);
512 ret = copy_to_user(tv_cmd->tvc_resp, &v_rsp, sizeof(v_rsp)); 684 ret = copy_to_user(tv_cmd->tvc_resp, &v_rsp, sizeof(v_rsp));
513 if (likely(ret == 0)) { 685 if (likely(ret == 0)) {
686 struct vhost_scsi_virtqueue *q;
514 vhost_add_used(tv_cmd->tvc_vq, tv_cmd->tvc_vq_desc, 0); 687 vhost_add_used(tv_cmd->tvc_vq, tv_cmd->tvc_vq_desc, 0);
515 vq = tv_cmd->tvc_vq - vs->vqs; 688 q = container_of(tv_cmd->tvc_vq, struct vhost_scsi_virtqueue, vq);
689 vq = q - vs->vqs;
516 __set_bit(vq, signal); 690 __set_bit(vq, signal);
517 } else 691 } else
518 pr_err("Faulted on virtio_scsi_cmd_resp\n"); 692 pr_err("Faulted on virtio_scsi_cmd_resp\n");
@@ -523,10 +697,11 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
523 vq = -1; 697 vq = -1;
524 while ((vq = find_next_bit(signal, VHOST_SCSI_MAX_VQ, vq + 1)) 698 while ((vq = find_next_bit(signal, VHOST_SCSI_MAX_VQ, vq + 1))
525 < VHOST_SCSI_MAX_VQ) 699 < VHOST_SCSI_MAX_VQ)
526 vhost_signal(&vs->dev, &vs->vqs[vq]); 700 vhost_signal(&vs->dev, &vs->vqs[vq].vq);
527} 701}
528 702
529static struct tcm_vhost_cmd *vhost_scsi_allocate_cmd( 703static struct tcm_vhost_cmd *vhost_scsi_allocate_cmd(
704 struct vhost_virtqueue *vq,
530 struct tcm_vhost_tpg *tv_tpg, 705 struct tcm_vhost_tpg *tv_tpg,
531 struct virtio_scsi_cmd_req *v_req, 706 struct virtio_scsi_cmd_req *v_req,
532 u32 exp_data_len, 707 u32 exp_data_len,
@@ -551,6 +726,7 @@ static struct tcm_vhost_cmd *vhost_scsi_allocate_cmd(
551 tv_cmd->tvc_exp_data_len = exp_data_len; 726 tv_cmd->tvc_exp_data_len = exp_data_len;
552 tv_cmd->tvc_data_direction = data_direction; 727 tv_cmd->tvc_data_direction = data_direction;
553 tv_cmd->tvc_nexus = tv_nexus; 728 tv_cmd->tvc_nexus = tv_nexus;
729 tv_cmd->inflight = tcm_vhost_get_inflight(vq);
554 730
555 return tv_cmd; 731 return tv_cmd;
556} 732}
@@ -806,7 +982,7 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs,
806 for (i = 0; i < data_num; i++) 982 for (i = 0; i < data_num; i++)
807 exp_data_len += vq->iov[data_first + i].iov_len; 983 exp_data_len += vq->iov[data_first + i].iov_len;
808 984
809 tv_cmd = vhost_scsi_allocate_cmd(tv_tpg, &v_req, 985 tv_cmd = vhost_scsi_allocate_cmd(vq, tv_tpg, &v_req,
810 exp_data_len, data_direction); 986 exp_data_len, data_direction);
811 if (IS_ERR(tv_cmd)) { 987 if (IS_ERR(tv_cmd)) {
812 vq_err(vq, "vhost_scsi_allocate_cmd failed %ld\n", 988 vq_err(vq, "vhost_scsi_allocate_cmd failed %ld\n",
@@ -938,17 +1114,35 @@ static void vhost_scsi_handle_kick(struct vhost_work *work)
938 1114
939static void vhost_scsi_flush_vq(struct vhost_scsi *vs, int index) 1115static void vhost_scsi_flush_vq(struct vhost_scsi *vs, int index)
940{ 1116{
941 vhost_poll_flush(&vs->dev.vqs[index].poll); 1117 vhost_poll_flush(&vs->vqs[index].vq.poll);
942} 1118}
943 1119
1120/* Callers must hold dev mutex */
944static void vhost_scsi_flush(struct vhost_scsi *vs) 1121static void vhost_scsi_flush(struct vhost_scsi *vs)
945{ 1122{
1123 struct vhost_scsi_inflight *old_inflight[VHOST_SCSI_MAX_VQ];
946 int i; 1124 int i;
947 1125
1126 /* Init new inflight and remember the old inflight */
1127 tcm_vhost_init_inflight(vs, old_inflight);
1128
1129 /*
1130 * The inflight->kref was initialized to 1. We decrement it here to
1131 * indicate the start of the flush operation so that it will reach 0
1132 * when all the reqs are finished.
1133 */
1134 for (i = 0; i < VHOST_SCSI_MAX_VQ; i++)
1135 kref_put(&old_inflight[i]->kref, tcm_vhost_done_inflight);
1136
1137 /* Flush both the vhost poll and vhost work */
948 for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) 1138 for (i = 0; i < VHOST_SCSI_MAX_VQ; i++)
949 vhost_scsi_flush_vq(vs, i); 1139 vhost_scsi_flush_vq(vs, i);
950 vhost_work_flush(&vs->dev, &vs->vs_completion_work); 1140 vhost_work_flush(&vs->dev, &vs->vs_completion_work);
951 vhost_work_flush(&vs->dev, &vs->vs_event_work); 1141 vhost_work_flush(&vs->dev, &vs->vs_event_work);
1142
1143 /* Wait for all reqs issued before the flush to be finished */
1144 for (i = 0; i < VHOST_SCSI_MAX_VQ; i++)
1145 wait_for_completion(&old_inflight[i]->comp);
952} 1146}
953 1147
954/* 1148/*
@@ -975,7 +1169,7 @@ static int vhost_scsi_set_endpoint(
975 /* Verify that ring has been setup correctly. */ 1169 /* Verify that ring has been setup correctly. */
976 for (index = 0; index < vs->dev.nvqs; ++index) { 1170 for (index = 0; index < vs->dev.nvqs; ++index) {
977 /* Verify that ring has been setup correctly. */ 1171 /* Verify that ring has been setup correctly. */
978 if (!vhost_vq_access_ok(&vs->vqs[index])) { 1172 if (!vhost_vq_access_ok(&vs->vqs[index].vq)) {
979 ret = -EFAULT; 1173 ret = -EFAULT;
980 goto out; 1174 goto out;
981 } 1175 }
@@ -1022,7 +1216,7 @@ static int vhost_scsi_set_endpoint(
1022 memcpy(vs->vs_vhost_wwpn, t->vhost_wwpn, 1216 memcpy(vs->vs_vhost_wwpn, t->vhost_wwpn,
1023 sizeof(vs->vs_vhost_wwpn)); 1217 sizeof(vs->vs_vhost_wwpn));
1024 for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) { 1218 for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
1025 vq = &vs->vqs[i]; 1219 vq = &vs->vqs[i].vq;
1026 /* Flushing the vhost_work acts as synchronize_rcu */ 1220 /* Flushing the vhost_work acts as synchronize_rcu */
1027 mutex_lock(&vq->mutex); 1221 mutex_lock(&vq->mutex);
1028 rcu_assign_pointer(vq->private_data, vs_tpg); 1222 rcu_assign_pointer(vq->private_data, vs_tpg);
@@ -1063,7 +1257,7 @@ static int vhost_scsi_clear_endpoint(
1063 mutex_lock(&vs->dev.mutex); 1257 mutex_lock(&vs->dev.mutex);
1064 /* Verify that ring has been setup correctly. */ 1258 /* Verify that ring has been setup correctly. */
1065 for (index = 0; index < vs->dev.nvqs; ++index) { 1259 for (index = 0; index < vs->dev.nvqs; ++index) {
1066 if (!vhost_vq_access_ok(&vs->vqs[index])) { 1260 if (!vhost_vq_access_ok(&vs->vqs[index].vq)) {
1067 ret = -EFAULT; 1261 ret = -EFAULT;
1068 goto err_dev; 1262 goto err_dev;
1069 } 1263 }
@@ -1103,7 +1297,7 @@ static int vhost_scsi_clear_endpoint(
1103 } 1297 }
1104 if (match) { 1298 if (match) {
1105 for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) { 1299 for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
1106 vq = &vs->vqs[i]; 1300 vq = &vs->vqs[i].vq;
1107 /* Flushing the vhost_work acts as synchronize_rcu */ 1301 /* Flushing the vhost_work acts as synchronize_rcu */
1108 mutex_lock(&vq->mutex); 1302 mutex_lock(&vq->mutex);
1109 rcu_assign_pointer(vq->private_data, NULL); 1303 rcu_assign_pointer(vq->private_data, NULL);
@@ -1151,24 +1345,39 @@ static int vhost_scsi_set_features(struct vhost_scsi *vs, u64 features)
1151static int vhost_scsi_open(struct inode *inode, struct file *f) 1345static int vhost_scsi_open(struct inode *inode, struct file *f)
1152{ 1346{
1153 struct vhost_scsi *s; 1347 struct vhost_scsi *s;
1348 struct vhost_virtqueue **vqs;
1154 int r, i; 1349 int r, i;
1155 1350
1156 s = kzalloc(sizeof(*s), GFP_KERNEL); 1351 s = kzalloc(sizeof(*s), GFP_KERNEL);
1157 if (!s) 1352 if (!s)
1158 return -ENOMEM; 1353 return -ENOMEM;
1159 1354
1355 vqs = kmalloc(VHOST_SCSI_MAX_VQ * sizeof(*vqs), GFP_KERNEL);
1356 if (!vqs) {
1357 kfree(s);
1358 return -ENOMEM;
1359 }
1360
1160 vhost_work_init(&s->vs_completion_work, vhost_scsi_complete_cmd_work); 1361 vhost_work_init(&s->vs_completion_work, vhost_scsi_complete_cmd_work);
1161 vhost_work_init(&s->vs_event_work, tcm_vhost_evt_work); 1362 vhost_work_init(&s->vs_event_work, tcm_vhost_evt_work);
1162 1363
1163 s->vs_events_nr = 0; 1364 s->vs_events_nr = 0;
1164 s->vs_events_missed = false; 1365 s->vs_events_missed = false;
1165 1366
1166 s->vqs[VHOST_SCSI_VQ_CTL].handle_kick = vhost_scsi_ctl_handle_kick; 1367 vqs[VHOST_SCSI_VQ_CTL] = &s->vqs[VHOST_SCSI_VQ_CTL].vq;
1167 s->vqs[VHOST_SCSI_VQ_EVT].handle_kick = vhost_scsi_evt_handle_kick; 1368 vqs[VHOST_SCSI_VQ_EVT] = &s->vqs[VHOST_SCSI_VQ_EVT].vq;
1168 for (i = VHOST_SCSI_VQ_IO; i < VHOST_SCSI_MAX_VQ; i++) 1369 s->vqs[VHOST_SCSI_VQ_CTL].vq.handle_kick = vhost_scsi_ctl_handle_kick;
1169 s->vqs[i].handle_kick = vhost_scsi_handle_kick; 1370 s->vqs[VHOST_SCSI_VQ_EVT].vq.handle_kick = vhost_scsi_evt_handle_kick;
1170 r = vhost_dev_init(&s->dev, s->vqs, VHOST_SCSI_MAX_VQ); 1371 for (i = VHOST_SCSI_VQ_IO; i < VHOST_SCSI_MAX_VQ; i++) {
1372 vqs[i] = &s->vqs[i].vq;
1373 s->vqs[i].vq.handle_kick = vhost_scsi_handle_kick;
1374 }
1375 r = vhost_dev_init(&s->dev, vqs, VHOST_SCSI_MAX_VQ);
1376
1377 tcm_vhost_init_inflight(s, NULL);
1378
1171 if (r < 0) { 1379 if (r < 0) {
1380 kfree(vqs);
1172 kfree(s); 1381 kfree(s);
1173 return r; 1382 return r;
1174 } 1383 }
@@ -1190,6 +1399,7 @@ static int vhost_scsi_release(struct inode *inode, struct file *f)
1190 vhost_dev_cleanup(&s->dev, false); 1399 vhost_dev_cleanup(&s->dev, false);
1191 /* Jobs can re-queue themselves in evt kick handler. Do extra flush. */ 1400 /* Jobs can re-queue themselves in evt kick handler. Do extra flush. */
1192 vhost_scsi_flush(s); 1401 vhost_scsi_flush(s);
1402 kfree(s->dev.vqs);
1193 kfree(s); 1403 kfree(s);
1194 return 0; 1404 return 0;
1195} 1405}
@@ -1205,7 +1415,7 @@ static long vhost_scsi_ioctl(struct file *f, unsigned int ioctl,
1205 u32 events_missed; 1415 u32 events_missed;
1206 u64 features; 1416 u64 features;
1207 int r, abi_version = VHOST_SCSI_ABI_VERSION; 1417 int r, abi_version = VHOST_SCSI_ABI_VERSION;
1208 struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT]; 1418 struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq;
1209 1419
1210 switch (ioctl) { 1420 switch (ioctl) {
1211 case VHOST_SCSI_SET_ENDPOINT: 1421 case VHOST_SCSI_SET_ENDPOINT:
@@ -1333,7 +1543,7 @@ static void tcm_vhost_do_plug(struct tcm_vhost_tpg *tpg,
1333 else 1543 else
1334 reason = VIRTIO_SCSI_EVT_RESET_REMOVED; 1544 reason = VIRTIO_SCSI_EVT_RESET_REMOVED;
1335 1545
1336 vq = &vs->vqs[VHOST_SCSI_VQ_EVT]; 1546 vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq;
1337 mutex_lock(&vq->mutex); 1547 mutex_lock(&vq->mutex);
1338 tcm_vhost_send_evt(vs, tpg, lun, 1548 tcm_vhost_send_evt(vs, tpg, lun,
1339 VIRTIO_SCSI_T_TRANSPORT_RESET, reason); 1549 VIRTIO_SCSI_T_TRANSPORT_RESET, reason);
@@ -1926,7 +2136,8 @@ static void tcm_vhost_exit(void)
1926 destroy_workqueue(tcm_vhost_workqueue); 2136 destroy_workqueue(tcm_vhost_workqueue);
1927}; 2137};
1928 2138
1929MODULE_DESCRIPTION("TCM_VHOST series fabric driver"); 2139MODULE_DESCRIPTION("VHOST_SCSI series fabric driver");
2140MODULE_ALIAS("tcm_vhost");
1930MODULE_LICENSE("GPL"); 2141MODULE_LICENSE("GPL");
1931module_init(tcm_vhost_init); 2142module_init(tcm_vhost_init);
1932module_exit(tcm_vhost_exit); 2143module_exit(tcm_vhost_exit);
diff --git a/drivers/vhost/tcm_vhost.h b/drivers/vhost/tcm_vhost.h
deleted file mode 100644
index 514b9fda230e..000000000000
--- a/drivers/vhost/tcm_vhost.h
+++ /dev/null
@@ -1,128 +0,0 @@
1#define TCM_VHOST_VERSION "v0.1"
2#define TCM_VHOST_NAMELEN 256
3#define TCM_VHOST_MAX_CDB_SIZE 32
4
5struct tcm_vhost_cmd {
6 /* Descriptor from vhost_get_vq_desc() for virt_queue segment */
7 int tvc_vq_desc;
8 /* virtio-scsi initiator task attribute */
9 int tvc_task_attr;
10 /* virtio-scsi initiator data direction */
11 enum dma_data_direction tvc_data_direction;
12 /* Expected data transfer length from virtio-scsi header */
13 u32 tvc_exp_data_len;
14 /* The Tag from include/linux/virtio_scsi.h:struct virtio_scsi_cmd_req */
15 u64 tvc_tag;
16 /* The number of scatterlists associated with this cmd */
17 u32 tvc_sgl_count;
18 /* Saved unpacked SCSI LUN for tcm_vhost_submission_work() */
19 u32 tvc_lun;
20 /* Pointer to the SGL formatted memory from virtio-scsi */
21 struct scatterlist *tvc_sgl;
22 /* Pointer to response */
23 struct virtio_scsi_cmd_resp __user *tvc_resp;
24 /* Pointer to vhost_scsi for our device */
25 struct vhost_scsi *tvc_vhost;
26 /* Pointer to vhost_virtqueue for the cmd */
27 struct vhost_virtqueue *tvc_vq;
28 /* Pointer to vhost nexus memory */
29 struct tcm_vhost_nexus *tvc_nexus;
30 /* The TCM I/O descriptor that is accessed via container_of() */
31 struct se_cmd tvc_se_cmd;
32 /* work item used for cmwq dispatch to tcm_vhost_submission_work() */
33 struct work_struct work;
34 /* Copy of the incoming SCSI command descriptor block (CDB) */
35 unsigned char tvc_cdb[TCM_VHOST_MAX_CDB_SIZE];
36 /* Sense buffer that will be mapped into outgoing status */
37 unsigned char tvc_sense_buf[TRANSPORT_SENSE_BUFFER];
38 /* Completed commands list, serviced from vhost worker thread */
39 struct llist_node tvc_completion_list;
40};
41
42struct tcm_vhost_nexus {
43 /* Pointer to TCM session for I_T Nexus */
44 struct se_session *tvn_se_sess;
45};
46
47struct tcm_vhost_nacl {
48 /* Binary World Wide unique Port Name for Vhost Initiator port */
49 u64 iport_wwpn;
50 /* ASCII formatted WWPN for Sas Initiator port */
51 char iport_name[TCM_VHOST_NAMELEN];
52 /* Returned by tcm_vhost_make_nodeacl() */
53 struct se_node_acl se_node_acl;
54};
55
56struct vhost_scsi;
57struct tcm_vhost_tpg {
58 /* Vhost port target portal group tag for TCM */
59 u16 tport_tpgt;
60 /* Used to track number of TPG Port/Lun Links wrt to explict I_T Nexus shutdown */
61 int tv_tpg_port_count;
62 /* Used for vhost_scsi device reference to tpg_nexus, protected by tv_tpg_mutex */
63 int tv_tpg_vhost_count;
64 /* list for tcm_vhost_list */
65 struct list_head tv_tpg_list;
66 /* Used to protect access for tpg_nexus */
67 struct mutex tv_tpg_mutex;
68 /* Pointer to the TCM VHost I_T Nexus for this TPG endpoint */
69 struct tcm_vhost_nexus *tpg_nexus;
70 /* Pointer back to tcm_vhost_tport */
71 struct tcm_vhost_tport *tport;
72 /* Returned by tcm_vhost_make_tpg() */
73 struct se_portal_group se_tpg;
74 /* Pointer back to vhost_scsi, protected by tv_tpg_mutex */
75 struct vhost_scsi *vhost_scsi;
76};
77
78struct tcm_vhost_tport {
79 /* SCSI protocol the tport is providing */
80 u8 tport_proto_id;
81 /* Binary World Wide unique Port Name for Vhost Target port */
82 u64 tport_wwpn;
83 /* ASCII formatted WWPN for Vhost Target port */
84 char tport_name[TCM_VHOST_NAMELEN];
85 /* Returned by tcm_vhost_make_tport() */
86 struct se_wwn tport_wwn;
87};
88
89struct tcm_vhost_evt {
90 /* event to be sent to guest */
91 struct virtio_scsi_event event;
92 /* event list, serviced from vhost worker thread */
93 struct llist_node list;
94};
95
96/*
97 * As per request from MST, keep TCM_VHOST related ioctl defines out of
98 * linux/vhost.h (user-space) for now..
99 */
100
101#include <linux/vhost.h>
102
103/*
104 * Used by QEMU userspace to ensure a consistent vhost-scsi ABI.
105 *
106 * ABI Rev 0: July 2012 version starting point for v3.6-rc merge candidate +
107 * RFC-v2 vhost-scsi userspace. Add GET_ABI_VERSION ioctl usage
108 * ABI Rev 1: January 2013. Ignore vhost_tpgt filed in struct vhost_scsi_target.
109 * All the targets under vhost_wwpn can be seen and used by guset.
110 */
111
112#define VHOST_SCSI_ABI_VERSION 1
113
114struct vhost_scsi_target {
115 int abi_version;
116 char vhost_wwpn[TRANSPORT_IQN_LEN];
117 unsigned short vhost_tpgt;
118 unsigned short reserved;
119};
120
121/* VHOST_SCSI specific defines */
122#define VHOST_SCSI_SET_ENDPOINT _IOW(VHOST_VIRTIO, 0x40, struct vhost_scsi_target)
123#define VHOST_SCSI_CLEAR_ENDPOINT _IOW(VHOST_VIRTIO, 0x41, struct vhost_scsi_target)
124/* Changing this breaks userspace. */
125#define VHOST_SCSI_GET_ABI_VERSION _IOW(VHOST_VIRTIO, 0x42, int)
126/* Set and get the events missed flag */
127#define VHOST_SCSI_SET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x43, __u32)
128#define VHOST_SCSI_GET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x44, __u32)
diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
index 91d6f060aade..be65414d5bb1 100644
--- a/drivers/vhost/test.c
+++ b/drivers/vhost/test.c
@@ -219,13 +219,20 @@ static long vhost_test_reset_owner(struct vhost_test *n)
219{ 219{
220 void *priv = NULL; 220 void *priv = NULL;
221 long err; 221 long err;
222 struct vhost_memory *memory;
223
222 mutex_lock(&n->dev.mutex); 224 mutex_lock(&n->dev.mutex);
223 err = vhost_dev_check_owner(&n->dev); 225 err = vhost_dev_check_owner(&n->dev);
224 if (err) 226 if (err)
225 goto done; 227 goto done;
228 memory = vhost_dev_reset_owner_prepare();
229 if (!memory) {
230 err = -ENOMEM;
231 goto done;
232 }
226 vhost_test_stop(n, &priv); 233 vhost_test_stop(n, &priv);
227 vhost_test_flush(n); 234 vhost_test_flush(n);
228 err = vhost_dev_reset_owner(&n->dev); 235 vhost_dev_reset_owner(&n->dev, memory);
229done: 236done:
230 mutex_unlock(&n->dev.mutex); 237 mutex_unlock(&n->dev.mutex);
231 return err; 238 return err;
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 4eecdb867d53..749b5ab5bfbb 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -33,8 +33,6 @@ enum {
33 VHOST_MEMORY_F_LOG = 0x1, 33 VHOST_MEMORY_F_LOG = 0x1,
34}; 34};
35 35
36static unsigned vhost_zcopy_mask __read_mostly;
37
38#define vhost_used_event(vq) ((u16 __user *)&vq->avail->ring[vq->num]) 36#define vhost_used_event(vq) ((u16 __user *)&vq->avail->ring[vq->num])
39#define vhost_avail_event(vq) ((u16 __user *)&vq->used->ring[vq->num]) 37#define vhost_avail_event(vq) ((u16 __user *)&vq->used->ring[vq->num])
40 38
@@ -181,8 +179,6 @@ static void vhost_vq_reset(struct vhost_dev *dev,
181 vq->used_flags = 0; 179 vq->used_flags = 0;
182 vq->log_used = false; 180 vq->log_used = false;
183 vq->log_addr = -1ull; 181 vq->log_addr = -1ull;
184 vq->vhost_hlen = 0;
185 vq->sock_hlen = 0;
186 vq->private_data = NULL; 182 vq->private_data = NULL;
187 vq->log_base = NULL; 183 vq->log_base = NULL;
188 vq->error_ctx = NULL; 184 vq->error_ctx = NULL;
@@ -191,9 +187,6 @@ static void vhost_vq_reset(struct vhost_dev *dev,
191 vq->call_ctx = NULL; 187 vq->call_ctx = NULL;
192 vq->call = NULL; 188 vq->call = NULL;
193 vq->log_ctx = NULL; 189 vq->log_ctx = NULL;
194 vq->upend_idx = 0;
195 vq->done_idx = 0;
196 vq->ubufs = NULL;
197} 190}
198 191
199static int vhost_worker(void *data) 192static int vhost_worker(void *data)
@@ -253,43 +246,29 @@ static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq)
253 vq->log = NULL; 246 vq->log = NULL;
254 kfree(vq->heads); 247 kfree(vq->heads);
255 vq->heads = NULL; 248 vq->heads = NULL;
256 kfree(vq->ubuf_info);
257 vq->ubuf_info = NULL;
258}
259
260void vhost_enable_zcopy(int vq)
261{
262 vhost_zcopy_mask |= 0x1 << vq;
263} 249}
264 250
265/* Helper to allocate iovec buffers for all vqs. */ 251/* Helper to allocate iovec buffers for all vqs. */
266static long vhost_dev_alloc_iovecs(struct vhost_dev *dev) 252static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
267{ 253{
268 int i; 254 int i;
269 bool zcopy;
270 255
271 for (i = 0; i < dev->nvqs; ++i) { 256 for (i = 0; i < dev->nvqs; ++i) {
272 dev->vqs[i].indirect = kmalloc(sizeof *dev->vqs[i].indirect * 257 dev->vqs[i]->indirect = kmalloc(sizeof *dev->vqs[i]->indirect *
273 UIO_MAXIOV, GFP_KERNEL); 258 UIO_MAXIOV, GFP_KERNEL);
274 dev->vqs[i].log = kmalloc(sizeof *dev->vqs[i].log * UIO_MAXIOV, 259 dev->vqs[i]->log = kmalloc(sizeof *dev->vqs[i]->log * UIO_MAXIOV,
275 GFP_KERNEL); 260 GFP_KERNEL);
276 dev->vqs[i].heads = kmalloc(sizeof *dev->vqs[i].heads * 261 dev->vqs[i]->heads = kmalloc(sizeof *dev->vqs[i]->heads *
277 UIO_MAXIOV, GFP_KERNEL); 262 UIO_MAXIOV, GFP_KERNEL);
278 zcopy = vhost_zcopy_mask & (0x1 << i); 263 if (!dev->vqs[i]->indirect || !dev->vqs[i]->log ||
279 if (zcopy) 264 !dev->vqs[i]->heads)
280 dev->vqs[i].ubuf_info =
281 kmalloc(sizeof *dev->vqs[i].ubuf_info *
282 UIO_MAXIOV, GFP_KERNEL);
283 if (!dev->vqs[i].indirect || !dev->vqs[i].log ||
284 !dev->vqs[i].heads ||
285 (zcopy && !dev->vqs[i].ubuf_info))
286 goto err_nomem; 265 goto err_nomem;
287 } 266 }
288 return 0; 267 return 0;
289 268
290err_nomem: 269err_nomem:
291 for (; i >= 0; --i) 270 for (; i >= 0; --i)
292 vhost_vq_free_iovecs(&dev->vqs[i]); 271 vhost_vq_free_iovecs(dev->vqs[i]);
293 return -ENOMEM; 272 return -ENOMEM;
294} 273}
295 274
@@ -298,11 +277,11 @@ static void vhost_dev_free_iovecs(struct vhost_dev *dev)
298 int i; 277 int i;
299 278
300 for (i = 0; i < dev->nvqs; ++i) 279 for (i = 0; i < dev->nvqs; ++i)
301 vhost_vq_free_iovecs(&dev->vqs[i]); 280 vhost_vq_free_iovecs(dev->vqs[i]);
302} 281}
303 282
304long vhost_dev_init(struct vhost_dev *dev, 283long vhost_dev_init(struct vhost_dev *dev,
305 struct vhost_virtqueue *vqs, int nvqs) 284 struct vhost_virtqueue **vqs, int nvqs)
306{ 285{
307 int i; 286 int i;
308 287
@@ -318,16 +297,15 @@ long vhost_dev_init(struct vhost_dev *dev,
318 dev->worker = NULL; 297 dev->worker = NULL;
319 298
320 for (i = 0; i < dev->nvqs; ++i) { 299 for (i = 0; i < dev->nvqs; ++i) {
321 dev->vqs[i].log = NULL; 300 dev->vqs[i]->log = NULL;
322 dev->vqs[i].indirect = NULL; 301 dev->vqs[i]->indirect = NULL;
323 dev->vqs[i].heads = NULL; 302 dev->vqs[i]->heads = NULL;
324 dev->vqs[i].ubuf_info = NULL; 303 dev->vqs[i]->dev = dev;
325 dev->vqs[i].dev = dev; 304 mutex_init(&dev->vqs[i]->mutex);
326 mutex_init(&dev->vqs[i].mutex); 305 vhost_vq_reset(dev, dev->vqs[i]);
327 vhost_vq_reset(dev, dev->vqs + i); 306 if (dev->vqs[i]->handle_kick)
328 if (dev->vqs[i].handle_kick) 307 vhost_poll_init(&dev->vqs[i]->poll,
329 vhost_poll_init(&dev->vqs[i].poll, 308 dev->vqs[i]->handle_kick, POLLIN, dev);
330 dev->vqs[i].handle_kick, POLLIN, dev);
331 } 309 }
332 310
333 return 0; 311 return 0;
@@ -408,21 +386,19 @@ err_mm:
408 return err; 386 return err;
409} 387}
410 388
411/* Caller should have device mutex */ 389struct vhost_memory *vhost_dev_reset_owner_prepare(void)
412long vhost_dev_reset_owner(struct vhost_dev *dev)
413{ 390{
414 struct vhost_memory *memory; 391 return kmalloc(offsetof(struct vhost_memory, regions), GFP_KERNEL);
415 392}
416 /* Restore memory to default empty mapping. */
417 memory = kmalloc(offsetof(struct vhost_memory, regions), GFP_KERNEL);
418 if (!memory)
419 return -ENOMEM;
420 393
394/* Caller should have device mutex */
395void vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_memory *memory)
396{
421 vhost_dev_cleanup(dev, true); 397 vhost_dev_cleanup(dev, true);
422 398
399 /* Restore memory to default empty mapping. */
423 memory->nregions = 0; 400 memory->nregions = 0;
424 RCU_INIT_POINTER(dev->memory, memory); 401 RCU_INIT_POINTER(dev->memory, memory);
425 return 0;
426} 402}
427 403
428void vhost_dev_stop(struct vhost_dev *dev) 404void vhost_dev_stop(struct vhost_dev *dev)
@@ -430,9 +406,9 @@ void vhost_dev_stop(struct vhost_dev *dev)
430 int i; 406 int i;
431 407
432 for (i = 0; i < dev->nvqs; ++i) { 408 for (i = 0; i < dev->nvqs; ++i) {
433 if (dev->vqs[i].kick && dev->vqs[i].handle_kick) { 409 if (dev->vqs[i]->kick && dev->vqs[i]->handle_kick) {
434 vhost_poll_stop(&dev->vqs[i].poll); 410 vhost_poll_stop(&dev->vqs[i]->poll);
435 vhost_poll_flush(&dev->vqs[i].poll); 411 vhost_poll_flush(&dev->vqs[i]->poll);
436 } 412 }
437 } 413 }
438} 414}
@@ -443,17 +419,17 @@ void vhost_dev_cleanup(struct vhost_dev *dev, bool locked)
443 int i; 419 int i;
444 420
445 for (i = 0; i < dev->nvqs; ++i) { 421 for (i = 0; i < dev->nvqs; ++i) {
446 if (dev->vqs[i].error_ctx) 422 if (dev->vqs[i]->error_ctx)
447 eventfd_ctx_put(dev->vqs[i].error_ctx); 423 eventfd_ctx_put(dev->vqs[i]->error_ctx);
448 if (dev->vqs[i].error) 424 if (dev->vqs[i]->error)
449 fput(dev->vqs[i].error); 425 fput(dev->vqs[i]->error);
450 if (dev->vqs[i].kick) 426 if (dev->vqs[i]->kick)
451 fput(dev->vqs[i].kick); 427 fput(dev->vqs[i]->kick);
452 if (dev->vqs[i].call_ctx) 428 if (dev->vqs[i]->call_ctx)
453 eventfd_ctx_put(dev->vqs[i].call_ctx); 429 eventfd_ctx_put(dev->vqs[i]->call_ctx);
454 if (dev->vqs[i].call) 430 if (dev->vqs[i]->call)
455 fput(dev->vqs[i].call); 431 fput(dev->vqs[i]->call);
456 vhost_vq_reset(dev, dev->vqs + i); 432 vhost_vq_reset(dev, dev->vqs[i]);
457 } 433 }
458 vhost_dev_free_iovecs(dev); 434 vhost_dev_free_iovecs(dev);
459 if (dev->log_ctx) 435 if (dev->log_ctx)
@@ -524,14 +500,14 @@ static int memory_access_ok(struct vhost_dev *d, struct vhost_memory *mem,
524 500
525 for (i = 0; i < d->nvqs; ++i) { 501 for (i = 0; i < d->nvqs; ++i) {
526 int ok; 502 int ok;
527 mutex_lock(&d->vqs[i].mutex); 503 mutex_lock(&d->vqs[i]->mutex);
528 /* If ring is inactive, will check when it's enabled. */ 504 /* If ring is inactive, will check when it's enabled. */
529 if (d->vqs[i].private_data) 505 if (d->vqs[i]->private_data)
530 ok = vq_memory_access_ok(d->vqs[i].log_base, mem, 506 ok = vq_memory_access_ok(d->vqs[i]->log_base, mem,
531 log_all); 507 log_all);
532 else 508 else
533 ok = 1; 509 ok = 1;
534 mutex_unlock(&d->vqs[i].mutex); 510 mutex_unlock(&d->vqs[i]->mutex);
535 if (!ok) 511 if (!ok)
536 return 0; 512 return 0;
537 } 513 }
@@ -641,7 +617,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp)
641 if (idx >= d->nvqs) 617 if (idx >= d->nvqs)
642 return -ENOBUFS; 618 return -ENOBUFS;
643 619
644 vq = d->vqs + idx; 620 vq = d->vqs[idx];
645 621
646 mutex_lock(&vq->mutex); 622 mutex_lock(&vq->mutex);
647 623
@@ -852,7 +828,7 @@ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp)
852 for (i = 0; i < d->nvqs; ++i) { 828 for (i = 0; i < d->nvqs; ++i) {
853 struct vhost_virtqueue *vq; 829 struct vhost_virtqueue *vq;
854 void __user *base = (void __user *)(unsigned long)p; 830 void __user *base = (void __user *)(unsigned long)p;
855 vq = d->vqs + i; 831 vq = d->vqs[i];
856 mutex_lock(&vq->mutex); 832 mutex_lock(&vq->mutex);
857 /* If ring is inactive, will check when it's enabled. */ 833 /* If ring is inactive, will check when it's enabled. */
858 if (vq->private_data && !vq_log_access_ok(d, vq, base)) 834 if (vq->private_data && !vq_log_access_ok(d, vq, base))
@@ -879,9 +855,9 @@ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp)
879 } else 855 } else
880 filep = eventfp; 856 filep = eventfp;
881 for (i = 0; i < d->nvqs; ++i) { 857 for (i = 0; i < d->nvqs; ++i) {
882 mutex_lock(&d->vqs[i].mutex); 858 mutex_lock(&d->vqs[i]->mutex);
883 d->vqs[i].log_ctx = d->log_ctx; 859 d->vqs[i]->log_ctx = d->log_ctx;
884 mutex_unlock(&d->vqs[i].mutex); 860 mutex_unlock(&d->vqs[i]->mutex);
885 } 861 }
886 if (ctx) 862 if (ctx)
887 eventfd_ctx_put(ctx); 863 eventfd_ctx_put(ctx);
@@ -1551,38 +1527,3 @@ void vhost_disable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
1551 &vq->used->flags, r); 1527 &vq->used->flags, r);
1552 } 1528 }
1553} 1529}
1554
1555static void vhost_zerocopy_done_signal(struct kref *kref)
1556{
1557 struct vhost_ubuf_ref *ubufs = container_of(kref, struct vhost_ubuf_ref,
1558 kref);
1559 wake_up(&ubufs->wait);
1560}
1561
1562struct vhost_ubuf_ref *vhost_ubuf_alloc(struct vhost_virtqueue *vq,
1563 bool zcopy)
1564{
1565 struct vhost_ubuf_ref *ubufs;
1566 /* No zero copy backend? Nothing to count. */
1567 if (!zcopy)
1568 return NULL;
1569 ubufs = kmalloc(sizeof *ubufs, GFP_KERNEL);
1570 if (!ubufs)
1571 return ERR_PTR(-ENOMEM);
1572 kref_init(&ubufs->kref);
1573 init_waitqueue_head(&ubufs->wait);
1574 ubufs->vq = vq;
1575 return ubufs;
1576}
1577
1578void vhost_ubuf_put(struct vhost_ubuf_ref *ubufs)
1579{
1580 kref_put(&ubufs->kref, vhost_zerocopy_done_signal);
1581}
1582
1583void vhost_ubuf_put_and_wait(struct vhost_ubuf_ref *ubufs)
1584{
1585 kref_put(&ubufs->kref, vhost_zerocopy_done_signal);
1586 wait_event(ubufs->wait, !atomic_read(&ubufs->kref.refcount));
1587 kfree(ubufs);
1588}
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 17261e277c02..b58f4ae82cb8 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -54,18 +54,6 @@ struct vhost_log {
54 54
55struct vhost_virtqueue; 55struct vhost_virtqueue;
56 56
57struct vhost_ubuf_ref {
58 struct kref kref;
59 wait_queue_head_t wait;
60 struct vhost_virtqueue *vq;
61};
62
63struct vhost_ubuf_ref *vhost_ubuf_alloc(struct vhost_virtqueue *, bool zcopy);
64void vhost_ubuf_put(struct vhost_ubuf_ref *);
65void vhost_ubuf_put_and_wait(struct vhost_ubuf_ref *);
66
67struct ubuf_info;
68
69/* The virtqueue structure describes a queue attached to a device. */ 57/* The virtqueue structure describes a queue attached to a device. */
70struct vhost_virtqueue { 58struct vhost_virtqueue {
71 struct vhost_dev *dev; 59 struct vhost_dev *dev;
@@ -114,10 +102,7 @@ struct vhost_virtqueue {
114 /* hdr is used to store the virtio header. 102 /* hdr is used to store the virtio header.
115 * Since each iovec has >= 1 byte length, we never need more than 103 * Since each iovec has >= 1 byte length, we never need more than
116 * header length entries to store the header. */ 104 * header length entries to store the header. */
117 struct iovec hdr[sizeof(struct virtio_net_hdr_mrg_rxbuf)];
118 struct iovec *indirect; 105 struct iovec *indirect;
119 size_t vhost_hlen;
120 size_t sock_hlen;
121 struct vring_used_elem *heads; 106 struct vring_used_elem *heads;
122 /* We use a kind of RCU to access private pointer. 107 /* We use a kind of RCU to access private pointer.
123 * All readers access it from worker, which makes it possible to 108 * All readers access it from worker, which makes it possible to
@@ -130,16 +115,6 @@ struct vhost_virtqueue {
130 /* Log write descriptors */ 115 /* Log write descriptors */
131 void __user *log_base; 116 void __user *log_base;
132 struct vhost_log *log; 117 struct vhost_log *log;
133 /* vhost zerocopy support fields below: */
134 /* last used idx for outstanding DMA zerocopy buffers */
135 int upend_idx;
136 /* first used idx for DMA done zerocopy buffers */
137 int done_idx;
138 /* an array of userspace buffers info */
139 struct ubuf_info *ubuf_info;
140 /* Reference counting for outstanding ubufs.
141 * Protected by vq mutex. Writers must also take device mutex. */
142 struct vhost_ubuf_ref *ubufs;
143}; 118};
144 119
145struct vhost_dev { 120struct vhost_dev {
@@ -150,7 +125,7 @@ struct vhost_dev {
150 struct mm_struct *mm; 125 struct mm_struct *mm;
151 struct mutex mutex; 126 struct mutex mutex;
152 unsigned acked_features; 127 unsigned acked_features;
153 struct vhost_virtqueue *vqs; 128 struct vhost_virtqueue **vqs;
154 int nvqs; 129 int nvqs;
155 struct file *log_file; 130 struct file *log_file;
156 struct eventfd_ctx *log_ctx; 131 struct eventfd_ctx *log_ctx;
@@ -159,9 +134,10 @@ struct vhost_dev {
159 struct task_struct *worker; 134 struct task_struct *worker;
160}; 135};
161 136
162long vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue *vqs, int nvqs); 137long vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs, int nvqs);
163long vhost_dev_check_owner(struct vhost_dev *); 138long vhost_dev_check_owner(struct vhost_dev *);
164long vhost_dev_reset_owner(struct vhost_dev *); 139struct vhost_memory *vhost_dev_reset_owner_prepare(void);
140void vhost_dev_reset_owner(struct vhost_dev *, struct vhost_memory *);
165void vhost_dev_cleanup(struct vhost_dev *, bool locked); 141void vhost_dev_cleanup(struct vhost_dev *, bool locked);
166void vhost_dev_stop(struct vhost_dev *); 142void vhost_dev_stop(struct vhost_dev *);
167long vhost_dev_ioctl(struct vhost_dev *, unsigned int ioctl, void __user *argp); 143long vhost_dev_ioctl(struct vhost_dev *, unsigned int ioctl, void __user *argp);
diff --git a/include/uapi/linux/vhost.h b/include/uapi/linux/vhost.h
index e847f1e30756..bb6a5b4cb3c5 100644
--- a/include/uapi/linux/vhost.h
+++ b/include/uapi/linux/vhost.h
@@ -127,4 +127,32 @@ struct vhost_memory {
127/* vhost-net should add virtio_net_hdr for RX, and strip for TX packets. */ 127/* vhost-net should add virtio_net_hdr for RX, and strip for TX packets. */
128#define VHOST_NET_F_VIRTIO_NET_HDR 27 128#define VHOST_NET_F_VIRTIO_NET_HDR 27
129 129
130/* VHOST_SCSI specific definitions */
131
132/*
133 * Used by QEMU userspace to ensure a consistent vhost-scsi ABI.
134 *
135 * ABI Rev 0: July 2012 version starting point for v3.6-rc merge candidate +
136 * RFC-v2 vhost-scsi userspace. Add GET_ABI_VERSION ioctl usage
137 * ABI Rev 1: January 2013. Ignore vhost_tpgt filed in struct vhost_scsi_target.
138 * All the targets under vhost_wwpn can be seen and used by guset.
139 */
140
141#define VHOST_SCSI_ABI_VERSION 1
142
143struct vhost_scsi_target {
144 int abi_version;
145 char vhost_wwpn[224]; /* TRANSPORT_IQN_LEN */
146 unsigned short vhost_tpgt;
147 unsigned short reserved;
148};
149
150#define VHOST_SCSI_SET_ENDPOINT _IOW(VHOST_VIRTIO, 0x40, struct vhost_scsi_target)
151#define VHOST_SCSI_CLEAR_ENDPOINT _IOW(VHOST_VIRTIO, 0x41, struct vhost_scsi_target)
152/* Changing this breaks userspace. */
153#define VHOST_SCSI_GET_ABI_VERSION _IOW(VHOST_VIRTIO, 0x42, int)
154/* Set and get the events missed flag */
155#define VHOST_SCSI_SET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x43, __u32)
156#define VHOST_SCSI_GET_EVENTS_MISSED _IOW(VHOST_VIRTIO, 0x44, __u32)
157
130#endif 158#endif