aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/vhost
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/vhost')
-rw-r--r--drivers/vhost/Kconfig10
-rw-r--r--drivers/vhost/Kconfig.tcm6
-rw-r--r--drivers/vhost/Makefile3
-rw-r--r--drivers/vhost/net.c322
-rw-r--r--drivers/vhost/scsi.c (renamed from drivers/vhost/tcm_vhost.c)707
-rw-r--r--drivers/vhost/tcm_vhost.h115
-rw-r--r--drivers/vhost/test.c9
-rw-r--r--drivers/vhost/vhost.c156
-rw-r--r--drivers/vhost/vhost.h32
9 files changed, 890 insertions, 470 deletions
diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig
index bf243177ffe1..26a64e5b8a58 100644
--- a/drivers/vhost/Kconfig
+++ b/drivers/vhost/Kconfig
@@ -9,6 +9,10 @@ config VHOST_NET
9 To compile this driver as a module, choose M here: the module will 9 To compile this driver as a module, choose M here: the module will
10 be called vhost_net. 10 be called vhost_net.
11 11
12if STAGING 12config VHOST_SCSI
13source "drivers/vhost/Kconfig.tcm" 13 tristate "VHOST_SCSI TCM fabric driver"
14endif 14 depends on TARGET_CORE && EVENTFD && m
15 default n
16 ---help---
17 Say M here to enable the vhost_scsi TCM fabric module
18 for use with virtio-scsi guests
diff --git a/drivers/vhost/Kconfig.tcm b/drivers/vhost/Kconfig.tcm
deleted file mode 100644
index 7e3aa28d999e..000000000000
--- a/drivers/vhost/Kconfig.tcm
+++ /dev/null
@@ -1,6 +0,0 @@
1config TCM_VHOST
2 tristate "TCM_VHOST fabric module"
3 depends on TARGET_CORE && EVENTFD && m
4 default n
5 ---help---
6 Say M here to enable the TCM_VHOST fabric module for use with virtio-scsi guests
diff --git a/drivers/vhost/Makefile b/drivers/vhost/Makefile
index a27b053bc9ab..ef21d5fdfa7d 100644
--- a/drivers/vhost/Makefile
+++ b/drivers/vhost/Makefile
@@ -1,4 +1,5 @@
1obj-$(CONFIG_VHOST_NET) += vhost_net.o 1obj-$(CONFIG_VHOST_NET) += vhost_net.o
2vhost_net-y := vhost.o net.o 2vhost_net-y := vhost.o net.o
3 3
4obj-$(CONFIG_TCM_VHOST) += tcm_vhost.o 4obj-$(CONFIG_VHOST_SCSI) += vhost_scsi.o
5vhost_scsi-y := scsi.o
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 959b1cd89e6a..a3645bd163d8 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -64,20 +64,36 @@ enum {
64 VHOST_NET_VQ_MAX = 2, 64 VHOST_NET_VQ_MAX = 2,
65}; 65};
66 66
67enum vhost_net_poll_state { 67struct vhost_ubuf_ref {
68 VHOST_NET_POLL_DISABLED = 0, 68 struct kref kref;
69 VHOST_NET_POLL_STARTED = 1, 69 wait_queue_head_t wait;
70 VHOST_NET_POLL_STOPPED = 2, 70 struct vhost_virtqueue *vq;
71};
72
73struct vhost_net_virtqueue {
74 struct vhost_virtqueue vq;
75 /* hdr is used to store the virtio header.
76 * Since each iovec has >= 1 byte length, we never need more than
77 * header length entries to store the header. */
78 struct iovec hdr[sizeof(struct virtio_net_hdr_mrg_rxbuf)];
79 size_t vhost_hlen;
80 size_t sock_hlen;
81 /* vhost zerocopy support fields below: */
82 /* last used idx for outstanding DMA zerocopy buffers */
83 int upend_idx;
84 /* first used idx for DMA done zerocopy buffers */
85 int done_idx;
86 /* an array of userspace buffers info */
87 struct ubuf_info *ubuf_info;
88 /* Reference counting for outstanding ubufs.
89 * Protected by vq mutex. Writers must also take device mutex. */
90 struct vhost_ubuf_ref *ubufs;
71}; 91};
72 92
73struct vhost_net { 93struct vhost_net {
74 struct vhost_dev dev; 94 struct vhost_dev dev;
75 struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX]; 95 struct vhost_net_virtqueue vqs[VHOST_NET_VQ_MAX];
76 struct vhost_poll poll[VHOST_NET_VQ_MAX]; 96 struct vhost_poll poll[VHOST_NET_VQ_MAX];
77 /* Tells us whether we are polling a socket for TX.
78 * We only do this when socket buffer fills up.
79 * Protected by tx vq lock. */
80 enum vhost_net_poll_state tx_poll_state;
81 /* Number of TX recently submitted. 97 /* Number of TX recently submitted.
82 * Protected by tx vq lock. */ 98 * Protected by tx vq lock. */
83 unsigned tx_packets; 99 unsigned tx_packets;
@@ -88,6 +104,90 @@ struct vhost_net {
88 bool tx_flush; 104 bool tx_flush;
89}; 105};
90 106
107static unsigned vhost_zcopy_mask __read_mostly;
108
109void vhost_enable_zcopy(int vq)
110{
111 vhost_zcopy_mask |= 0x1 << vq;
112}
113
114static void vhost_zerocopy_done_signal(struct kref *kref)
115{
116 struct vhost_ubuf_ref *ubufs = container_of(kref, struct vhost_ubuf_ref,
117 kref);
118 wake_up(&ubufs->wait);
119}
120
121struct vhost_ubuf_ref *vhost_ubuf_alloc(struct vhost_virtqueue *vq,
122 bool zcopy)
123{
124 struct vhost_ubuf_ref *ubufs;
125 /* No zero copy backend? Nothing to count. */
126 if (!zcopy)
127 return NULL;
128 ubufs = kmalloc(sizeof(*ubufs), GFP_KERNEL);
129 if (!ubufs)
130 return ERR_PTR(-ENOMEM);
131 kref_init(&ubufs->kref);
132 init_waitqueue_head(&ubufs->wait);
133 ubufs->vq = vq;
134 return ubufs;
135}
136
137void vhost_ubuf_put(struct vhost_ubuf_ref *ubufs)
138{
139 kref_put(&ubufs->kref, vhost_zerocopy_done_signal);
140}
141
142void vhost_ubuf_put_and_wait(struct vhost_ubuf_ref *ubufs)
143{
144 kref_put(&ubufs->kref, vhost_zerocopy_done_signal);
145 wait_event(ubufs->wait, !atomic_read(&ubufs->kref.refcount));
146 kfree(ubufs);
147}
148
149int vhost_net_set_ubuf_info(struct vhost_net *n)
150{
151 bool zcopy;
152 int i;
153
154 for (i = 0; i < n->dev.nvqs; ++i) {
155 zcopy = vhost_zcopy_mask & (0x1 << i);
156 if (!zcopy)
157 continue;
158 n->vqs[i].ubuf_info = kmalloc(sizeof(*n->vqs[i].ubuf_info) *
159 UIO_MAXIOV, GFP_KERNEL);
160 if (!n->vqs[i].ubuf_info)
161 goto err;
162 }
163 return 0;
164
165err:
166 while (i--) {
167 zcopy = vhost_zcopy_mask & (0x1 << i);
168 if (!zcopy)
169 continue;
170 kfree(n->vqs[i].ubuf_info);
171 }
172 return -ENOMEM;
173}
174
175void vhost_net_vq_reset(struct vhost_net *n)
176{
177 int i;
178
179 for (i = 0; i < VHOST_NET_VQ_MAX; i++) {
180 n->vqs[i].done_idx = 0;
181 n->vqs[i].upend_idx = 0;
182 n->vqs[i].ubufs = NULL;
183 kfree(n->vqs[i].ubuf_info);
184 n->vqs[i].ubuf_info = NULL;
185 n->vqs[i].vhost_hlen = 0;
186 n->vqs[i].sock_hlen = 0;
187 }
188
189}
190
91static void vhost_net_tx_packet(struct vhost_net *net) 191static void vhost_net_tx_packet(struct vhost_net *net)
92{ 192{
93 ++net->tx_packets; 193 ++net->tx_packets;
@@ -155,28 +255,6 @@ static void copy_iovec_hdr(const struct iovec *from, struct iovec *to,
155 } 255 }
156} 256}
157 257
158/* Caller must have TX VQ lock */
159static void tx_poll_stop(struct vhost_net *net)
160{
161 if (likely(net->tx_poll_state != VHOST_NET_POLL_STARTED))
162 return;
163 vhost_poll_stop(net->poll + VHOST_NET_VQ_TX);
164 net->tx_poll_state = VHOST_NET_POLL_STOPPED;
165}
166
167/* Caller must have TX VQ lock */
168static int tx_poll_start(struct vhost_net *net, struct socket *sock)
169{
170 int ret;
171
172 if (unlikely(net->tx_poll_state != VHOST_NET_POLL_STOPPED))
173 return 0;
174 ret = vhost_poll_start(net->poll + VHOST_NET_VQ_TX, sock->file);
175 if (!ret)
176 net->tx_poll_state = VHOST_NET_POLL_STARTED;
177 return ret;
178}
179
180/* In case of DMA done not in order in lower device driver for some reason. 258/* In case of DMA done not in order in lower device driver for some reason.
181 * upend_idx is used to track end of used idx, done_idx is used to track head 259 * upend_idx is used to track end of used idx, done_idx is used to track head
182 * of used idx. Once lower device DMA done contiguously, we will signal KVM 260 * of used idx. Once lower device DMA done contiguously, we will signal KVM
@@ -185,10 +263,12 @@ static int tx_poll_start(struct vhost_net *net, struct socket *sock)
185static int vhost_zerocopy_signal_used(struct vhost_net *net, 263static int vhost_zerocopy_signal_used(struct vhost_net *net,
186 struct vhost_virtqueue *vq) 264 struct vhost_virtqueue *vq)
187{ 265{
266 struct vhost_net_virtqueue *nvq =
267 container_of(vq, struct vhost_net_virtqueue, vq);
188 int i; 268 int i;
189 int j = 0; 269 int j = 0;
190 270
191 for (i = vq->done_idx; i != vq->upend_idx; i = (i + 1) % UIO_MAXIOV) { 271 for (i = nvq->done_idx; i != nvq->upend_idx; i = (i + 1) % UIO_MAXIOV) {
192 if (vq->heads[i].len == VHOST_DMA_FAILED_LEN) 272 if (vq->heads[i].len == VHOST_DMA_FAILED_LEN)
193 vhost_net_tx_err(net); 273 vhost_net_tx_err(net);
194 if (VHOST_DMA_IS_DONE(vq->heads[i].len)) { 274 if (VHOST_DMA_IS_DONE(vq->heads[i].len)) {
@@ -200,7 +280,7 @@ static int vhost_zerocopy_signal_used(struct vhost_net *net,
200 break; 280 break;
201 } 281 }
202 if (j) 282 if (j)
203 vq->done_idx = i; 283 nvq->done_idx = i;
204 return j; 284 return j;
205} 285}
206 286
@@ -230,7 +310,8 @@ static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success)
230 * read-size critical section for our kind of RCU. */ 310 * read-size critical section for our kind of RCU. */
231static void handle_tx(struct vhost_net *net) 311static void handle_tx(struct vhost_net *net)
232{ 312{
233 struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_TX]; 313 struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_TX];
314 struct vhost_virtqueue *vq = &nvq->vq;
234 unsigned out, in, s; 315 unsigned out, in, s;
235 int head; 316 int head;
236 struct msghdr msg = { 317 struct msghdr msg = {
@@ -242,7 +323,7 @@ static void handle_tx(struct vhost_net *net)
242 .msg_flags = MSG_DONTWAIT, 323 .msg_flags = MSG_DONTWAIT,
243 }; 324 };
244 size_t len, total_len = 0; 325 size_t len, total_len = 0;
245 int err, wmem; 326 int err;
246 size_t hdr_size; 327 size_t hdr_size;
247 struct socket *sock; 328 struct socket *sock;
248 struct vhost_ubuf_ref *uninitialized_var(ubufs); 329 struct vhost_ubuf_ref *uninitialized_var(ubufs);
@@ -253,21 +334,11 @@ static void handle_tx(struct vhost_net *net)
253 if (!sock) 334 if (!sock)
254 return; 335 return;
255 336
256 wmem = atomic_read(&sock->sk->sk_wmem_alloc);
257 if (wmem >= sock->sk->sk_sndbuf) {
258 mutex_lock(&vq->mutex);
259 tx_poll_start(net, sock);
260 mutex_unlock(&vq->mutex);
261 return;
262 }
263
264 mutex_lock(&vq->mutex); 337 mutex_lock(&vq->mutex);
265 vhost_disable_notify(&net->dev, vq); 338 vhost_disable_notify(&net->dev, vq);
266 339
267 if (wmem < sock->sk->sk_sndbuf / 2) 340 hdr_size = nvq->vhost_hlen;
268 tx_poll_stop(net); 341 zcopy = nvq->ubufs;
269 hdr_size = vq->vhost_hlen;
270 zcopy = vq->ubufs;
271 342
272 for (;;) { 343 for (;;) {
273 /* Release DMAs done buffers first */ 344 /* Release DMAs done buffers first */
@@ -285,23 +356,15 @@ static void handle_tx(struct vhost_net *net)
285 if (head == vq->num) { 356 if (head == vq->num) {
286 int num_pends; 357 int num_pends;
287 358
288 wmem = atomic_read(&sock->sk->sk_wmem_alloc);
289 if (wmem >= sock->sk->sk_sndbuf * 3 / 4) {
290 tx_poll_start(net, sock);
291 set_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
292 break;
293 }
294 /* If more outstanding DMAs, queue the work. 359 /* If more outstanding DMAs, queue the work.
295 * Handle upend_idx wrap around 360 * Handle upend_idx wrap around
296 */ 361 */
297 num_pends = likely(vq->upend_idx >= vq->done_idx) ? 362 num_pends = likely(nvq->upend_idx >= nvq->done_idx) ?
298 (vq->upend_idx - vq->done_idx) : 363 (nvq->upend_idx - nvq->done_idx) :
299 (vq->upend_idx + UIO_MAXIOV - vq->done_idx); 364 (nvq->upend_idx + UIO_MAXIOV -
300 if (unlikely(num_pends > VHOST_MAX_PEND)) { 365 nvq->done_idx);
301 tx_poll_start(net, sock); 366 if (unlikely(num_pends > VHOST_MAX_PEND))
302 set_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
303 break; 367 break;
304 }
305 if (unlikely(vhost_enable_notify(&net->dev, vq))) { 368 if (unlikely(vhost_enable_notify(&net->dev, vq))) {
306 vhost_disable_notify(&net->dev, vq); 369 vhost_disable_notify(&net->dev, vq);
307 continue; 370 continue;
@@ -314,44 +377,45 @@ static void handle_tx(struct vhost_net *net)
314 break; 377 break;
315 } 378 }
316 /* Skip header. TODO: support TSO. */ 379 /* Skip header. TODO: support TSO. */
317 s = move_iovec_hdr(vq->iov, vq->hdr, hdr_size, out); 380 s = move_iovec_hdr(vq->iov, nvq->hdr, hdr_size, out);
318 msg.msg_iovlen = out; 381 msg.msg_iovlen = out;
319 len = iov_length(vq->iov, out); 382 len = iov_length(vq->iov, out);
320 /* Sanity check */ 383 /* Sanity check */
321 if (!len) { 384 if (!len) {
322 vq_err(vq, "Unexpected header len for TX: " 385 vq_err(vq, "Unexpected header len for TX: "
323 "%zd expected %zd\n", 386 "%zd expected %zd\n",
324 iov_length(vq->hdr, s), hdr_size); 387 iov_length(nvq->hdr, s), hdr_size);
325 break; 388 break;
326 } 389 }
327 zcopy_used = zcopy && (len >= VHOST_GOODCOPY_LEN || 390 zcopy_used = zcopy && (len >= VHOST_GOODCOPY_LEN ||
328 vq->upend_idx != vq->done_idx); 391 nvq->upend_idx != nvq->done_idx);
329 392
330 /* use msg_control to pass vhost zerocopy ubuf info to skb */ 393 /* use msg_control to pass vhost zerocopy ubuf info to skb */
331 if (zcopy_used) { 394 if (zcopy_used) {
332 vq->heads[vq->upend_idx].id = head; 395 vq->heads[nvq->upend_idx].id = head;
333 if (!vhost_net_tx_select_zcopy(net) || 396 if (!vhost_net_tx_select_zcopy(net) ||
334 len < VHOST_GOODCOPY_LEN) { 397 len < VHOST_GOODCOPY_LEN) {
335 /* copy don't need to wait for DMA done */ 398 /* copy don't need to wait for DMA done */
336 vq->heads[vq->upend_idx].len = 399 vq->heads[nvq->upend_idx].len =
337 VHOST_DMA_DONE_LEN; 400 VHOST_DMA_DONE_LEN;
338 msg.msg_control = NULL; 401 msg.msg_control = NULL;
339 msg.msg_controllen = 0; 402 msg.msg_controllen = 0;
340 ubufs = NULL; 403 ubufs = NULL;
341 } else { 404 } else {
342 struct ubuf_info *ubuf = &vq->ubuf_info[head]; 405 struct ubuf_info *ubuf;
406 ubuf = nvq->ubuf_info + nvq->upend_idx;
343 407
344 vq->heads[vq->upend_idx].len = 408 vq->heads[nvq->upend_idx].len =
345 VHOST_DMA_IN_PROGRESS; 409 VHOST_DMA_IN_PROGRESS;
346 ubuf->callback = vhost_zerocopy_callback; 410 ubuf->callback = vhost_zerocopy_callback;
347 ubuf->ctx = vq->ubufs; 411 ubuf->ctx = nvq->ubufs;
348 ubuf->desc = vq->upend_idx; 412 ubuf->desc = nvq->upend_idx;
349 msg.msg_control = ubuf; 413 msg.msg_control = ubuf;
350 msg.msg_controllen = sizeof(ubuf); 414 msg.msg_controllen = sizeof(ubuf);
351 ubufs = vq->ubufs; 415 ubufs = nvq->ubufs;
352 kref_get(&ubufs->kref); 416 kref_get(&ubufs->kref);
353 } 417 }
354 vq->upend_idx = (vq->upend_idx + 1) % UIO_MAXIOV; 418 nvq->upend_idx = (nvq->upend_idx + 1) % UIO_MAXIOV;
355 } 419 }
356 /* TODO: Check specific error and bomb out unless ENOBUFS? */ 420 /* TODO: Check specific error and bomb out unless ENOBUFS? */
357 err = sock->ops->sendmsg(NULL, sock, &msg, len); 421 err = sock->ops->sendmsg(NULL, sock, &msg, len);
@@ -359,12 +423,10 @@ static void handle_tx(struct vhost_net *net)
359 if (zcopy_used) { 423 if (zcopy_used) {
360 if (ubufs) 424 if (ubufs)
361 vhost_ubuf_put(ubufs); 425 vhost_ubuf_put(ubufs);
362 vq->upend_idx = ((unsigned)vq->upend_idx - 1) % 426 nvq->upend_idx = ((unsigned)nvq->upend_idx - 1)
363 UIO_MAXIOV; 427 % UIO_MAXIOV;
364 } 428 }
365 vhost_discard_vq_desc(vq, 1); 429 vhost_discard_vq_desc(vq, 1);
366 if (err == -EAGAIN || err == -ENOBUFS)
367 tx_poll_start(net, sock);
368 break; 430 break;
369 } 431 }
370 if (err != len) 432 if (err != len)
@@ -469,7 +531,8 @@ err:
469 * read-size critical section for our kind of RCU. */ 531 * read-size critical section for our kind of RCU. */
470static void handle_rx(struct vhost_net *net) 532static void handle_rx(struct vhost_net *net)
471{ 533{
472 struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX]; 534 struct vhost_net_virtqueue *nvq = &net->vqs[VHOST_NET_VQ_RX];
535 struct vhost_virtqueue *vq = &nvq->vq;
473 unsigned uninitialized_var(in), log; 536 unsigned uninitialized_var(in), log;
474 struct vhost_log *vq_log; 537 struct vhost_log *vq_log;
475 struct msghdr msg = { 538 struct msghdr msg = {
@@ -497,8 +560,8 @@ static void handle_rx(struct vhost_net *net)
497 560
498 mutex_lock(&vq->mutex); 561 mutex_lock(&vq->mutex);
499 vhost_disable_notify(&net->dev, vq); 562 vhost_disable_notify(&net->dev, vq);
500 vhost_hlen = vq->vhost_hlen; 563 vhost_hlen = nvq->vhost_hlen;
501 sock_hlen = vq->sock_hlen; 564 sock_hlen = nvq->sock_hlen;
502 565
503 vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ? 566 vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ?
504 vq->log : NULL; 567 vq->log : NULL;
@@ -528,11 +591,11 @@ static void handle_rx(struct vhost_net *net)
528 /* We don't need to be notified again. */ 591 /* We don't need to be notified again. */
529 if (unlikely((vhost_hlen))) 592 if (unlikely((vhost_hlen)))
530 /* Skip header. TODO: support TSO. */ 593 /* Skip header. TODO: support TSO. */
531 move_iovec_hdr(vq->iov, vq->hdr, vhost_hlen, in); 594 move_iovec_hdr(vq->iov, nvq->hdr, vhost_hlen, in);
532 else 595 else
533 /* Copy the header for use in VIRTIO_NET_F_MRG_RXBUF: 596 /* Copy the header for use in VIRTIO_NET_F_MRG_RXBUF:
534 * needed because recvmsg can modify msg_iov. */ 597 * needed because recvmsg can modify msg_iov. */
535 copy_iovec_hdr(vq->iov, vq->hdr, sock_hlen, in); 598 copy_iovec_hdr(vq->iov, nvq->hdr, sock_hlen, in);
536 msg.msg_iovlen = in; 599 msg.msg_iovlen = in;
537 err = sock->ops->recvmsg(NULL, sock, &msg, 600 err = sock->ops->recvmsg(NULL, sock, &msg,
538 sock_len, MSG_DONTWAIT | MSG_TRUNC); 601 sock_len, MSG_DONTWAIT | MSG_TRUNC);
@@ -546,7 +609,7 @@ static void handle_rx(struct vhost_net *net)
546 continue; 609 continue;
547 } 610 }
548 if (unlikely(vhost_hlen) && 611 if (unlikely(vhost_hlen) &&
549 memcpy_toiovecend(vq->hdr, (unsigned char *)&hdr, 0, 612 memcpy_toiovecend(nvq->hdr, (unsigned char *)&hdr, 0,
550 vhost_hlen)) { 613 vhost_hlen)) {
551 vq_err(vq, "Unable to write vnet_hdr at addr %p\n", 614 vq_err(vq, "Unable to write vnet_hdr at addr %p\n",
552 vq->iov->iov_base); 615 vq->iov->iov_base);
@@ -554,7 +617,7 @@ static void handle_rx(struct vhost_net *net)
554 } 617 }
555 /* TODO: Should check and handle checksum. */ 618 /* TODO: Should check and handle checksum. */
556 if (likely(mergeable) && 619 if (likely(mergeable) &&
557 memcpy_toiovecend(vq->hdr, (unsigned char *)&headcount, 620 memcpy_toiovecend(nvq->hdr, (unsigned char *)&headcount,
558 offsetof(typeof(hdr), num_buffers), 621 offsetof(typeof(hdr), num_buffers),
559 sizeof hdr.num_buffers)) { 622 sizeof hdr.num_buffers)) {
560 vq_err(vq, "Failed num_buffers write"); 623 vq_err(vq, "Failed num_buffers write");
@@ -611,23 +674,39 @@ static int vhost_net_open(struct inode *inode, struct file *f)
611{ 674{
612 struct vhost_net *n = kmalloc(sizeof *n, GFP_KERNEL); 675 struct vhost_net *n = kmalloc(sizeof *n, GFP_KERNEL);
613 struct vhost_dev *dev; 676 struct vhost_dev *dev;
614 int r; 677 struct vhost_virtqueue **vqs;
678 int r, i;
615 679
616 if (!n) 680 if (!n)
617 return -ENOMEM; 681 return -ENOMEM;
682 vqs = kmalloc(VHOST_NET_VQ_MAX * sizeof(*vqs), GFP_KERNEL);
683 if (!vqs) {
684 kfree(n);
685 return -ENOMEM;
686 }
618 687
619 dev = &n->dev; 688 dev = &n->dev;
620 n->vqs[VHOST_NET_VQ_TX].handle_kick = handle_tx_kick; 689 vqs[VHOST_NET_VQ_TX] = &n->vqs[VHOST_NET_VQ_TX].vq;
621 n->vqs[VHOST_NET_VQ_RX].handle_kick = handle_rx_kick; 690 vqs[VHOST_NET_VQ_RX] = &n->vqs[VHOST_NET_VQ_RX].vq;
622 r = vhost_dev_init(dev, n->vqs, VHOST_NET_VQ_MAX); 691 n->vqs[VHOST_NET_VQ_TX].vq.handle_kick = handle_tx_kick;
692 n->vqs[VHOST_NET_VQ_RX].vq.handle_kick = handle_rx_kick;
693 for (i = 0; i < VHOST_NET_VQ_MAX; i++) {
694 n->vqs[i].ubufs = NULL;
695 n->vqs[i].ubuf_info = NULL;
696 n->vqs[i].upend_idx = 0;
697 n->vqs[i].done_idx = 0;
698 n->vqs[i].vhost_hlen = 0;
699 n->vqs[i].sock_hlen = 0;
700 }
701 r = vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX);
623 if (r < 0) { 702 if (r < 0) {
624 kfree(n); 703 kfree(n);
704 kfree(vqs);
625 return r; 705 return r;
626 } 706 }
627 707
628 vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT, dev); 708 vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT, dev);
629 vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN, dev); 709 vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN, dev);
630 n->tx_poll_state = VHOST_NET_POLL_DISABLED;
631 710
632 f->private_data = n; 711 f->private_data = n;
633 712
@@ -637,32 +716,28 @@ static int vhost_net_open(struct inode *inode, struct file *f)
637static void vhost_net_disable_vq(struct vhost_net *n, 716static void vhost_net_disable_vq(struct vhost_net *n,
638 struct vhost_virtqueue *vq) 717 struct vhost_virtqueue *vq)
639{ 718{
719 struct vhost_net_virtqueue *nvq =
720 container_of(vq, struct vhost_net_virtqueue, vq);
721 struct vhost_poll *poll = n->poll + (nvq - n->vqs);
640 if (!vq->private_data) 722 if (!vq->private_data)
641 return; 723 return;
642 if (vq == n->vqs + VHOST_NET_VQ_TX) { 724 vhost_poll_stop(poll);
643 tx_poll_stop(n);
644 n->tx_poll_state = VHOST_NET_POLL_DISABLED;
645 } else
646 vhost_poll_stop(n->poll + VHOST_NET_VQ_RX);
647} 725}
648 726
649static int vhost_net_enable_vq(struct vhost_net *n, 727static int vhost_net_enable_vq(struct vhost_net *n,
650 struct vhost_virtqueue *vq) 728 struct vhost_virtqueue *vq)
651{ 729{
730 struct vhost_net_virtqueue *nvq =
731 container_of(vq, struct vhost_net_virtqueue, vq);
732 struct vhost_poll *poll = n->poll + (nvq - n->vqs);
652 struct socket *sock; 733 struct socket *sock;
653 int ret;
654 734
655 sock = rcu_dereference_protected(vq->private_data, 735 sock = rcu_dereference_protected(vq->private_data,
656 lockdep_is_held(&vq->mutex)); 736 lockdep_is_held(&vq->mutex));
657 if (!sock) 737 if (!sock)
658 return 0; 738 return 0;
659 if (vq == n->vqs + VHOST_NET_VQ_TX) {
660 n->tx_poll_state = VHOST_NET_POLL_STOPPED;
661 ret = tx_poll_start(n, sock);
662 } else
663 ret = vhost_poll_start(n->poll + VHOST_NET_VQ_RX, sock->file);
664 739
665 return ret; 740 return vhost_poll_start(poll, sock->file);
666} 741}
667 742
668static struct socket *vhost_net_stop_vq(struct vhost_net *n, 743static struct socket *vhost_net_stop_vq(struct vhost_net *n,
@@ -682,30 +757,30 @@ static struct socket *vhost_net_stop_vq(struct vhost_net *n,
682static void vhost_net_stop(struct vhost_net *n, struct socket **tx_sock, 757static void vhost_net_stop(struct vhost_net *n, struct socket **tx_sock,
683 struct socket **rx_sock) 758 struct socket **rx_sock)
684{ 759{
685 *tx_sock = vhost_net_stop_vq(n, n->vqs + VHOST_NET_VQ_TX); 760 *tx_sock = vhost_net_stop_vq(n, &n->vqs[VHOST_NET_VQ_TX].vq);
686 *rx_sock = vhost_net_stop_vq(n, n->vqs + VHOST_NET_VQ_RX); 761 *rx_sock = vhost_net_stop_vq(n, &n->vqs[VHOST_NET_VQ_RX].vq);
687} 762}
688 763
689static void vhost_net_flush_vq(struct vhost_net *n, int index) 764static void vhost_net_flush_vq(struct vhost_net *n, int index)
690{ 765{
691 vhost_poll_flush(n->poll + index); 766 vhost_poll_flush(n->poll + index);
692 vhost_poll_flush(&n->dev.vqs[index].poll); 767 vhost_poll_flush(&n->vqs[index].vq.poll);
693} 768}
694 769
695static void vhost_net_flush(struct vhost_net *n) 770static void vhost_net_flush(struct vhost_net *n)
696{ 771{
697 vhost_net_flush_vq(n, VHOST_NET_VQ_TX); 772 vhost_net_flush_vq(n, VHOST_NET_VQ_TX);
698 vhost_net_flush_vq(n, VHOST_NET_VQ_RX); 773 vhost_net_flush_vq(n, VHOST_NET_VQ_RX);
699 if (n->dev.vqs[VHOST_NET_VQ_TX].ubufs) { 774 if (n->vqs[VHOST_NET_VQ_TX].ubufs) {
700 mutex_lock(&n->dev.vqs[VHOST_NET_VQ_TX].mutex); 775 mutex_lock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex);
701 n->tx_flush = true; 776 n->tx_flush = true;
702 mutex_unlock(&n->dev.vqs[VHOST_NET_VQ_TX].mutex); 777 mutex_unlock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex);
703 /* Wait for all lower device DMAs done. */ 778 /* Wait for all lower device DMAs done. */
704 vhost_ubuf_put_and_wait(n->dev.vqs[VHOST_NET_VQ_TX].ubufs); 779 vhost_ubuf_put_and_wait(n->vqs[VHOST_NET_VQ_TX].ubufs);
705 mutex_lock(&n->dev.vqs[VHOST_NET_VQ_TX].mutex); 780 mutex_lock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex);
706 n->tx_flush = false; 781 n->tx_flush = false;
707 kref_init(&n->dev.vqs[VHOST_NET_VQ_TX].ubufs->kref); 782 kref_init(&n->vqs[VHOST_NET_VQ_TX].ubufs->kref);
708 mutex_unlock(&n->dev.vqs[VHOST_NET_VQ_TX].mutex); 783 mutex_unlock(&n->vqs[VHOST_NET_VQ_TX].vq.mutex);
709 } 784 }
710} 785}
711 786
@@ -719,6 +794,7 @@ static int vhost_net_release(struct inode *inode, struct file *f)
719 vhost_net_flush(n); 794 vhost_net_flush(n);
720 vhost_dev_stop(&n->dev); 795 vhost_dev_stop(&n->dev);
721 vhost_dev_cleanup(&n->dev, false); 796 vhost_dev_cleanup(&n->dev, false);
797 vhost_net_vq_reset(n);
722 if (tx_sock) 798 if (tx_sock)
723 fput(tx_sock->file); 799 fput(tx_sock->file);
724 if (rx_sock) 800 if (rx_sock)
@@ -726,6 +802,7 @@ static int vhost_net_release(struct inode *inode, struct file *f)
726 /* We do an extra flush before freeing memory, 802 /* We do an extra flush before freeing memory,
727 * since jobs can re-queue themselves. */ 803 * since jobs can re-queue themselves. */
728 vhost_net_flush(n); 804 vhost_net_flush(n);
805 kfree(n->dev.vqs);
729 kfree(n); 806 kfree(n);
730 return 0; 807 return 0;
731} 808}
@@ -799,6 +876,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
799{ 876{
800 struct socket *sock, *oldsock; 877 struct socket *sock, *oldsock;
801 struct vhost_virtqueue *vq; 878 struct vhost_virtqueue *vq;
879 struct vhost_net_virtqueue *nvq;
802 struct vhost_ubuf_ref *ubufs, *oldubufs = NULL; 880 struct vhost_ubuf_ref *ubufs, *oldubufs = NULL;
803 int r; 881 int r;
804 882
@@ -811,7 +889,8 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
811 r = -ENOBUFS; 889 r = -ENOBUFS;
812 goto err; 890 goto err;
813 } 891 }
814 vq = n->vqs + index; 892 vq = &n->vqs[index].vq;
893 nvq = &n->vqs[index];
815 mutex_lock(&vq->mutex); 894 mutex_lock(&vq->mutex);
816 895
817 /* Verify that ring has been setup correctly. */ 896 /* Verify that ring has been setup correctly. */
@@ -844,8 +923,8 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
844 if (r) 923 if (r)
845 goto err_used; 924 goto err_used;
846 925
847 oldubufs = vq->ubufs; 926 oldubufs = nvq->ubufs;
848 vq->ubufs = ubufs; 927 nvq->ubufs = ubufs;
849 928
850 n->tx_packets = 0; 929 n->tx_packets = 0;
851 n->tx_zcopy_err = 0; 930 n->tx_zcopy_err = 0;
@@ -888,14 +967,21 @@ static long vhost_net_reset_owner(struct vhost_net *n)
888 struct socket *tx_sock = NULL; 967 struct socket *tx_sock = NULL;
889 struct socket *rx_sock = NULL; 968 struct socket *rx_sock = NULL;
890 long err; 969 long err;
970 struct vhost_memory *memory;
891 971
892 mutex_lock(&n->dev.mutex); 972 mutex_lock(&n->dev.mutex);
893 err = vhost_dev_check_owner(&n->dev); 973 err = vhost_dev_check_owner(&n->dev);
894 if (err) 974 if (err)
895 goto done; 975 goto done;
976 memory = vhost_dev_reset_owner_prepare();
977 if (!memory) {
978 err = -ENOMEM;
979 goto done;
980 }
896 vhost_net_stop(n, &tx_sock, &rx_sock); 981 vhost_net_stop(n, &tx_sock, &rx_sock);
897 vhost_net_flush(n); 982 vhost_net_flush(n);
898 err = vhost_dev_reset_owner(&n->dev); 983 vhost_dev_reset_owner(&n->dev, memory);
984 vhost_net_vq_reset(n);
899done: 985done:
900 mutex_unlock(&n->dev.mutex); 986 mutex_unlock(&n->dev.mutex);
901 if (tx_sock) 987 if (tx_sock)
@@ -931,10 +1017,10 @@ static int vhost_net_set_features(struct vhost_net *n, u64 features)
931 n->dev.acked_features = features; 1017 n->dev.acked_features = features;
932 smp_wmb(); 1018 smp_wmb();
933 for (i = 0; i < VHOST_NET_VQ_MAX; ++i) { 1019 for (i = 0; i < VHOST_NET_VQ_MAX; ++i) {
934 mutex_lock(&n->vqs[i].mutex); 1020 mutex_lock(&n->vqs[i].vq.mutex);
935 n->vqs[i].vhost_hlen = vhost_hlen; 1021 n->vqs[i].vhost_hlen = vhost_hlen;
936 n->vqs[i].sock_hlen = sock_hlen; 1022 n->vqs[i].sock_hlen = sock_hlen;
937 mutex_unlock(&n->vqs[i].mutex); 1023 mutex_unlock(&n->vqs[i].vq.mutex);
938 } 1024 }
939 vhost_net_flush(n); 1025 vhost_net_flush(n);
940 mutex_unlock(&n->dev.mutex); 1026 mutex_unlock(&n->dev.mutex);
@@ -971,11 +1057,17 @@ static long vhost_net_ioctl(struct file *f, unsigned int ioctl,
971 return vhost_net_reset_owner(n); 1057 return vhost_net_reset_owner(n);
972 default: 1058 default:
973 mutex_lock(&n->dev.mutex); 1059 mutex_lock(&n->dev.mutex);
1060 if (ioctl == VHOST_SET_OWNER) {
1061 r = vhost_net_set_ubuf_info(n);
1062 if (r)
1063 goto out;
1064 }
974 r = vhost_dev_ioctl(&n->dev, ioctl, argp); 1065 r = vhost_dev_ioctl(&n->dev, ioctl, argp);
975 if (r == -ENOIOCTLCMD) 1066 if (r == -ENOIOCTLCMD)
976 r = vhost_vring_ioctl(&n->dev, ioctl, argp); 1067 r = vhost_vring_ioctl(&n->dev, ioctl, argp);
977 else 1068 else
978 vhost_net_flush(n); 1069 vhost_net_flush(n);
1070out:
979 mutex_unlock(&n->dev.mutex); 1071 mutex_unlock(&n->dev.mutex);
980 return r; 1072 return r;
981 } 1073 }
diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/scsi.c
index 9951297b2427..5179f7aa1b0b 100644
--- a/drivers/vhost/tcm_vhost.c
+++ b/drivers/vhost/scsi.c
@@ -45,14 +45,116 @@
45#include <target/target_core_configfs.h> 45#include <target/target_core_configfs.h>
46#include <target/configfs_macros.h> 46#include <target/configfs_macros.h>
47#include <linux/vhost.h> 47#include <linux/vhost.h>
48#include <linux/virtio_net.h> /* TODO vhost.h currently depends on this */
49#include <linux/virtio_scsi.h> 48#include <linux/virtio_scsi.h>
50#include <linux/llist.h> 49#include <linux/llist.h>
51#include <linux/bitmap.h> 50#include <linux/bitmap.h>
52 51
53#include "vhost.c" 52#include "vhost.c"
54#include "vhost.h" 53#include "vhost.h"
55#include "tcm_vhost.h" 54
55#define TCM_VHOST_VERSION "v0.1"
56#define TCM_VHOST_NAMELEN 256
57#define TCM_VHOST_MAX_CDB_SIZE 32
58
59struct vhost_scsi_inflight {
60 /* Wait for the flush operation to finish */
61 struct completion comp;
62 /* Refcount for the inflight reqs */
63 struct kref kref;
64};
65
66struct tcm_vhost_cmd {
67 /* Descriptor from vhost_get_vq_desc() for virt_queue segment */
68 int tvc_vq_desc;
69 /* virtio-scsi initiator task attribute */
70 int tvc_task_attr;
71 /* virtio-scsi initiator data direction */
72 enum dma_data_direction tvc_data_direction;
73 /* Expected data transfer length from virtio-scsi header */
74 u32 tvc_exp_data_len;
75 /* The Tag from include/linux/virtio_scsi.h:struct virtio_scsi_cmd_req */
76 u64 tvc_tag;
77 /* The number of scatterlists associated with this cmd */
78 u32 tvc_sgl_count;
79 /* Saved unpacked SCSI LUN for tcm_vhost_submission_work() */
80 u32 tvc_lun;
81 /* Pointer to the SGL formatted memory from virtio-scsi */
82 struct scatterlist *tvc_sgl;
83 /* Pointer to response */
84 struct virtio_scsi_cmd_resp __user *tvc_resp;
85 /* Pointer to vhost_scsi for our device */
86 struct vhost_scsi *tvc_vhost;
87 /* Pointer to vhost_virtqueue for the cmd */
88 struct vhost_virtqueue *tvc_vq;
89 /* Pointer to vhost nexus memory */
90 struct tcm_vhost_nexus *tvc_nexus;
91 /* The TCM I/O descriptor that is accessed via container_of() */
92 struct se_cmd tvc_se_cmd;
93 /* work item used for cmwq dispatch to tcm_vhost_submission_work() */
94 struct work_struct work;
95 /* Copy of the incoming SCSI command descriptor block (CDB) */
96 unsigned char tvc_cdb[TCM_VHOST_MAX_CDB_SIZE];
97 /* Sense buffer that will be mapped into outgoing status */
98 unsigned char tvc_sense_buf[TRANSPORT_SENSE_BUFFER];
99 /* Completed commands list, serviced from vhost worker thread */
100 struct llist_node tvc_completion_list;
101 /* Used to track inflight cmd */
102 struct vhost_scsi_inflight *inflight;
103};
104
105struct tcm_vhost_nexus {
106 /* Pointer to TCM session for I_T Nexus */
107 struct se_session *tvn_se_sess;
108};
109
110struct tcm_vhost_nacl {
111 /* Binary World Wide unique Port Name for Vhost Initiator port */
112 u64 iport_wwpn;
113 /* ASCII formatted WWPN for Sas Initiator port */
114 char iport_name[TCM_VHOST_NAMELEN];
115 /* Returned by tcm_vhost_make_nodeacl() */
116 struct se_node_acl se_node_acl;
117};
118
119struct vhost_scsi;
120struct tcm_vhost_tpg {
121 /* Vhost port target portal group tag for TCM */
122 u16 tport_tpgt;
123 /* Used to track number of TPG Port/Lun Links wrt to explict I_T Nexus shutdown */
124 int tv_tpg_port_count;
125 /* Used for vhost_scsi device reference to tpg_nexus, protected by tv_tpg_mutex */
126 int tv_tpg_vhost_count;
127 /* list for tcm_vhost_list */
128 struct list_head tv_tpg_list;
129 /* Used to protect access for tpg_nexus */
130 struct mutex tv_tpg_mutex;
131 /* Pointer to the TCM VHost I_T Nexus for this TPG endpoint */
132 struct tcm_vhost_nexus *tpg_nexus;
133 /* Pointer back to tcm_vhost_tport */
134 struct tcm_vhost_tport *tport;
135 /* Returned by tcm_vhost_make_tpg() */
136 struct se_portal_group se_tpg;
137 /* Pointer back to vhost_scsi, protected by tv_tpg_mutex */
138 struct vhost_scsi *vhost_scsi;
139};
140
141struct tcm_vhost_tport {
142 /* SCSI protocol the tport is providing */
143 u8 tport_proto_id;
144 /* Binary World Wide unique Port Name for Vhost Target port */
145 u64 tport_wwpn;
146 /* ASCII formatted WWPN for Vhost Target port */
147 char tport_name[TCM_VHOST_NAMELEN];
148 /* Returned by tcm_vhost_make_tport() */
149 struct se_wwn tport_wwn;
150};
151
152struct tcm_vhost_evt {
153 /* event to be sent to guest */
154 struct virtio_scsi_event event;
155 /* event list, serviced from vhost worker thread */
156 struct llist_node list;
157};
56 158
57enum { 159enum {
58 VHOST_SCSI_VQ_CTL = 0, 160 VHOST_SCSI_VQ_CTL = 0,
@@ -60,20 +162,51 @@ enum {
60 VHOST_SCSI_VQ_IO = 2, 162 VHOST_SCSI_VQ_IO = 2,
61}; 163};
62 164
165/*
166 * VIRTIO_RING_F_EVENT_IDX seems broken. Not sure the bug is in
167 * kernel but disabling it helps.
168 * TODO: debug and remove the workaround.
169 */
170enum {
171 VHOST_SCSI_FEATURES = (VHOST_FEATURES & (~VIRTIO_RING_F_EVENT_IDX)) |
172 (1ULL << VIRTIO_SCSI_F_HOTPLUG)
173};
174
63#define VHOST_SCSI_MAX_TARGET 256 175#define VHOST_SCSI_MAX_TARGET 256
64#define VHOST_SCSI_MAX_VQ 128 176#define VHOST_SCSI_MAX_VQ 128
177#define VHOST_SCSI_MAX_EVENT 128
178
179struct vhost_scsi_virtqueue {
180 struct vhost_virtqueue vq;
181 /*
182 * Reference counting for inflight reqs, used for flush operation. At
183 * each time, one reference tracks new commands submitted, while we
184 * wait for another one to reach 0.
185 */
186 struct vhost_scsi_inflight inflights[2];
187 /*
188 * Indicate current inflight in use, protected by vq->mutex.
189 * Writers must also take dev mutex and flush under it.
190 */
191 int inflight_idx;
192};
65 193
66struct vhost_scsi { 194struct vhost_scsi {
67 /* Protected by vhost_scsi->dev.mutex */ 195 /* Protected by vhost_scsi->dev.mutex */
68 struct tcm_vhost_tpg *vs_tpg[VHOST_SCSI_MAX_TARGET]; 196 struct tcm_vhost_tpg **vs_tpg;
69 char vs_vhost_wwpn[TRANSPORT_IQN_LEN]; 197 char vs_vhost_wwpn[TRANSPORT_IQN_LEN];
70 bool vs_endpoint;
71 198
72 struct vhost_dev dev; 199 struct vhost_dev dev;
73 struct vhost_virtqueue vqs[VHOST_SCSI_MAX_VQ]; 200 struct vhost_scsi_virtqueue vqs[VHOST_SCSI_MAX_VQ];
74 201
75 struct vhost_work vs_completion_work; /* cmd completion work item */ 202 struct vhost_work vs_completion_work; /* cmd completion work item */
76 struct llist_head vs_completion_list; /* cmd completion queue */ 203 struct llist_head vs_completion_list; /* cmd completion queue */
204
205 struct vhost_work vs_event_work; /* evt injection work item */
206 struct llist_head vs_event_list; /* evt injection queue */
207
208 bool vs_events_missed; /* any missed events, protected by vq->mutex */
209 int vs_events_nr; /* num of pending events, protected by vq->mutex */
77}; 210};
78 211
79/* Local pointer to allocated TCM configfs fabric module */ 212/* Local pointer to allocated TCM configfs fabric module */
@@ -91,6 +224,59 @@ static int iov_num_pages(struct iovec *iov)
91 ((unsigned long)iov->iov_base & PAGE_MASK)) >> PAGE_SHIFT; 224 ((unsigned long)iov->iov_base & PAGE_MASK)) >> PAGE_SHIFT;
92} 225}
93 226
227void tcm_vhost_done_inflight(struct kref *kref)
228{
229 struct vhost_scsi_inflight *inflight;
230
231 inflight = container_of(kref, struct vhost_scsi_inflight, kref);
232 complete(&inflight->comp);
233}
234
235static void tcm_vhost_init_inflight(struct vhost_scsi *vs,
236 struct vhost_scsi_inflight *old_inflight[])
237{
238 struct vhost_scsi_inflight *new_inflight;
239 struct vhost_virtqueue *vq;
240 int idx, i;
241
242 for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
243 vq = &vs->vqs[i].vq;
244
245 mutex_lock(&vq->mutex);
246
247 /* store old infight */
248 idx = vs->vqs[i].inflight_idx;
249 if (old_inflight)
250 old_inflight[i] = &vs->vqs[i].inflights[idx];
251
252 /* setup new infight */
253 vs->vqs[i].inflight_idx = idx ^ 1;
254 new_inflight = &vs->vqs[i].inflights[idx ^ 1];
255 kref_init(&new_inflight->kref);
256 init_completion(&new_inflight->comp);
257
258 mutex_unlock(&vq->mutex);
259 }
260}
261
262static struct vhost_scsi_inflight *
263tcm_vhost_get_inflight(struct vhost_virtqueue *vq)
264{
265 struct vhost_scsi_inflight *inflight;
266 struct vhost_scsi_virtqueue *svq;
267
268 svq = container_of(vq, struct vhost_scsi_virtqueue, vq);
269 inflight = &svq->inflights[svq->inflight_idx];
270 kref_get(&inflight->kref);
271
272 return inflight;
273}
274
275static void tcm_vhost_put_inflight(struct vhost_scsi_inflight *inflight)
276{
277 kref_put(&inflight->kref, tcm_vhost_done_inflight);
278}
279
94static int tcm_vhost_check_true(struct se_portal_group *se_tpg) 280static int tcm_vhost_check_true(struct se_portal_group *se_tpg)
95{ 281{
96 return 1; 282 return 1;
@@ -341,6 +527,37 @@ static int tcm_vhost_queue_tm_rsp(struct se_cmd *se_cmd)
341 return 0; 527 return 0;
342} 528}
343 529
530static void tcm_vhost_free_evt(struct vhost_scsi *vs, struct tcm_vhost_evt *evt)
531{
532 vs->vs_events_nr--;
533 kfree(evt);
534}
535
536static struct tcm_vhost_evt *tcm_vhost_allocate_evt(struct vhost_scsi *vs,
537 u32 event, u32 reason)
538{
539 struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq;
540 struct tcm_vhost_evt *evt;
541
542 if (vs->vs_events_nr > VHOST_SCSI_MAX_EVENT) {
543 vs->vs_events_missed = true;
544 return NULL;
545 }
546
547 evt = kzalloc(sizeof(*evt), GFP_KERNEL);
548 if (!evt) {
549 vq_err(vq, "Failed to allocate tcm_vhost_evt\n");
550 vs->vs_events_missed = true;
551 return NULL;
552 }
553
554 evt->event.event = event;
555 evt->event.reason = reason;
556 vs->vs_events_nr++;
557
558 return evt;
559}
560
344static void vhost_scsi_free_cmd(struct tcm_vhost_cmd *tv_cmd) 561static void vhost_scsi_free_cmd(struct tcm_vhost_cmd *tv_cmd)
345{ 562{
346 struct se_cmd *se_cmd = &tv_cmd->tvc_se_cmd; 563 struct se_cmd *se_cmd = &tv_cmd->tvc_se_cmd;
@@ -356,9 +573,80 @@ static void vhost_scsi_free_cmd(struct tcm_vhost_cmd *tv_cmd)
356 kfree(tv_cmd->tvc_sgl); 573 kfree(tv_cmd->tvc_sgl);
357 } 574 }
358 575
576 tcm_vhost_put_inflight(tv_cmd->inflight);
577
359 kfree(tv_cmd); 578 kfree(tv_cmd);
360} 579}
361 580
581static void tcm_vhost_do_evt_work(struct vhost_scsi *vs,
582 struct tcm_vhost_evt *evt)
583{
584 struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq;
585 struct virtio_scsi_event *event = &evt->event;
586 struct virtio_scsi_event __user *eventp;
587 unsigned out, in;
588 int head, ret;
589
590 if (!vq->private_data) {
591 vs->vs_events_missed = true;
592 return;
593 }
594
595again:
596 vhost_disable_notify(&vs->dev, vq);
597 head = vhost_get_vq_desc(&vs->dev, vq, vq->iov,
598 ARRAY_SIZE(vq->iov), &out, &in,
599 NULL, NULL);
600 if (head < 0) {
601 vs->vs_events_missed = true;
602 return;
603 }
604 if (head == vq->num) {
605 if (vhost_enable_notify(&vs->dev, vq))
606 goto again;
607 vs->vs_events_missed = true;
608 return;
609 }
610
611 if ((vq->iov[out].iov_len != sizeof(struct virtio_scsi_event))) {
612 vq_err(vq, "Expecting virtio_scsi_event, got %zu bytes\n",
613 vq->iov[out].iov_len);
614 vs->vs_events_missed = true;
615 return;
616 }
617
618 if (vs->vs_events_missed) {
619 event->event |= VIRTIO_SCSI_T_EVENTS_MISSED;
620 vs->vs_events_missed = false;
621 }
622
623 eventp = vq->iov[out].iov_base;
624 ret = __copy_to_user(eventp, event, sizeof(*event));
625 if (!ret)
626 vhost_add_used_and_signal(&vs->dev, vq, head, 0);
627 else
628 vq_err(vq, "Faulted on tcm_vhost_send_event\n");
629}
630
631static void tcm_vhost_evt_work(struct vhost_work *work)
632{
633 struct vhost_scsi *vs = container_of(work, struct vhost_scsi,
634 vs_event_work);
635 struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq;
636 struct tcm_vhost_evt *evt;
637 struct llist_node *llnode;
638
639 mutex_lock(&vq->mutex);
640 llnode = llist_del_all(&vs->vs_event_list);
641 while (llnode) {
642 evt = llist_entry(llnode, struct tcm_vhost_evt, list);
643 llnode = llist_next(llnode);
644 tcm_vhost_do_evt_work(vs, evt);
645 tcm_vhost_free_evt(vs, evt);
646 }
647 mutex_unlock(&vq->mutex);
648}
649
362/* Fill in status and signal that we are done processing this command 650/* Fill in status and signal that we are done processing this command
363 * 651 *
364 * This is scheduled in the vhost work queue so we are called with the owner 652 * This is scheduled in the vhost work queue so we are called with the owner
@@ -395,8 +683,10 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
395 v_rsp.sense_len); 683 v_rsp.sense_len);
396 ret = copy_to_user(tv_cmd->tvc_resp, &v_rsp, sizeof(v_rsp)); 684 ret = copy_to_user(tv_cmd->tvc_resp, &v_rsp, sizeof(v_rsp));
397 if (likely(ret == 0)) { 685 if (likely(ret == 0)) {
686 struct vhost_scsi_virtqueue *q;
398 vhost_add_used(tv_cmd->tvc_vq, tv_cmd->tvc_vq_desc, 0); 687 vhost_add_used(tv_cmd->tvc_vq, tv_cmd->tvc_vq_desc, 0);
399 vq = tv_cmd->tvc_vq - vs->vqs; 688 q = container_of(tv_cmd->tvc_vq, struct vhost_scsi_virtqueue, vq);
689 vq = q - vs->vqs;
400 __set_bit(vq, signal); 690 __set_bit(vq, signal);
401 } else 691 } else
402 pr_err("Faulted on virtio_scsi_cmd_resp\n"); 692 pr_err("Faulted on virtio_scsi_cmd_resp\n");
@@ -407,10 +697,11 @@ static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
407 vq = -1; 697 vq = -1;
408 while ((vq = find_next_bit(signal, VHOST_SCSI_MAX_VQ, vq + 1)) 698 while ((vq = find_next_bit(signal, VHOST_SCSI_MAX_VQ, vq + 1))
409 < VHOST_SCSI_MAX_VQ) 699 < VHOST_SCSI_MAX_VQ)
410 vhost_signal(&vs->dev, &vs->vqs[vq]); 700 vhost_signal(&vs->dev, &vs->vqs[vq].vq);
411} 701}
412 702
413static struct tcm_vhost_cmd *vhost_scsi_allocate_cmd( 703static struct tcm_vhost_cmd *vhost_scsi_allocate_cmd(
704 struct vhost_virtqueue *vq,
414 struct tcm_vhost_tpg *tv_tpg, 705 struct tcm_vhost_tpg *tv_tpg,
415 struct virtio_scsi_cmd_req *v_req, 706 struct virtio_scsi_cmd_req *v_req,
416 u32 exp_data_len, 707 u32 exp_data_len,
@@ -435,6 +726,7 @@ static struct tcm_vhost_cmd *vhost_scsi_allocate_cmd(
435 tv_cmd->tvc_exp_data_len = exp_data_len; 726 tv_cmd->tvc_exp_data_len = exp_data_len;
436 tv_cmd->tvc_data_direction = data_direction; 727 tv_cmd->tvc_data_direction = data_direction;
437 tv_cmd->tvc_nexus = tv_nexus; 728 tv_cmd->tvc_nexus = tv_nexus;
729 tv_cmd->inflight = tcm_vhost_get_inflight(vq);
438 730
439 return tv_cmd; 731 return tv_cmd;
440} 732}
@@ -570,9 +862,27 @@ static void tcm_vhost_submission_work(struct work_struct *work)
570 } 862 }
571} 863}
572 864
865static void vhost_scsi_send_bad_target(struct vhost_scsi *vs,
866 struct vhost_virtqueue *vq, int head, unsigned out)
867{
868 struct virtio_scsi_cmd_resp __user *resp;
869 struct virtio_scsi_cmd_resp rsp;
870 int ret;
871
872 memset(&rsp, 0, sizeof(rsp));
873 rsp.response = VIRTIO_SCSI_S_BAD_TARGET;
874 resp = vq->iov[out].iov_base;
875 ret = __copy_to_user(resp, &rsp, sizeof(rsp));
876 if (!ret)
877 vhost_add_used_and_signal(&vs->dev, vq, head, 0);
878 else
879 pr_err("Faulted on virtio_scsi_cmd_resp\n");
880}
881
573static void vhost_scsi_handle_vq(struct vhost_scsi *vs, 882static void vhost_scsi_handle_vq(struct vhost_scsi *vs,
574 struct vhost_virtqueue *vq) 883 struct vhost_virtqueue *vq)
575{ 884{
885 struct tcm_vhost_tpg **vs_tpg;
576 struct virtio_scsi_cmd_req v_req; 886 struct virtio_scsi_cmd_req v_req;
577 struct tcm_vhost_tpg *tv_tpg; 887 struct tcm_vhost_tpg *tv_tpg;
578 struct tcm_vhost_cmd *tv_cmd; 888 struct tcm_vhost_cmd *tv_cmd;
@@ -581,8 +891,16 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs,
581 int head, ret; 891 int head, ret;
582 u8 target; 892 u8 target;
583 893
584 /* Must use ioctl VHOST_SCSI_SET_ENDPOINT */ 894 /*
585 if (unlikely(!vs->vs_endpoint)) 895 * We can handle the vq only after the endpoint is setup by calling the
896 * VHOST_SCSI_SET_ENDPOINT ioctl.
897 *
898 * TODO: Check that we are running from vhost_worker which acts
899 * as read-side critical section for vhost kind of RCU.
900 * See the comments in struct vhost_virtqueue in drivers/vhost/vhost.h
901 */
902 vs_tpg = rcu_dereference_check(vq->private_data, 1);
903 if (!vs_tpg)
586 return; 904 return;
587 905
588 mutex_lock(&vq->mutex); 906 mutex_lock(&vq->mutex);
@@ -652,23 +970,11 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs,
652 970
653 /* Extract the tpgt */ 971 /* Extract the tpgt */
654 target = v_req.lun[1]; 972 target = v_req.lun[1];
655 tv_tpg = vs->vs_tpg[target]; 973 tv_tpg = ACCESS_ONCE(vs_tpg[target]);
656 974
657 /* Target does not exist, fail the request */ 975 /* Target does not exist, fail the request */
658 if (unlikely(!tv_tpg)) { 976 if (unlikely(!tv_tpg)) {
659 struct virtio_scsi_cmd_resp __user *resp; 977 vhost_scsi_send_bad_target(vs, vq, head, out);
660 struct virtio_scsi_cmd_resp rsp;
661
662 memset(&rsp, 0, sizeof(rsp));
663 rsp.response = VIRTIO_SCSI_S_BAD_TARGET;
664 resp = vq->iov[out].iov_base;
665 ret = __copy_to_user(resp, &rsp, sizeof(rsp));
666 if (!ret)
667 vhost_add_used_and_signal(&vs->dev,
668 vq, head, 0);
669 else
670 pr_err("Faulted on virtio_scsi_cmd_resp\n");
671
672 continue; 978 continue;
673 } 979 }
674 980
@@ -676,27 +982,18 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs,
676 for (i = 0; i < data_num; i++) 982 for (i = 0; i < data_num; i++)
677 exp_data_len += vq->iov[data_first + i].iov_len; 983 exp_data_len += vq->iov[data_first + i].iov_len;
678 984
679 tv_cmd = vhost_scsi_allocate_cmd(tv_tpg, &v_req, 985 tv_cmd = vhost_scsi_allocate_cmd(vq, tv_tpg, &v_req,
680 exp_data_len, data_direction); 986 exp_data_len, data_direction);
681 if (IS_ERR(tv_cmd)) { 987 if (IS_ERR(tv_cmd)) {
682 vq_err(vq, "vhost_scsi_allocate_cmd failed %ld\n", 988 vq_err(vq, "vhost_scsi_allocate_cmd failed %ld\n",
683 PTR_ERR(tv_cmd)); 989 PTR_ERR(tv_cmd));
684 break; 990 goto err_cmd;
685 } 991 }
686 pr_debug("Allocated tv_cmd: %p exp_data_len: %d, data_direction" 992 pr_debug("Allocated tv_cmd: %p exp_data_len: %d, data_direction"
687 ": %d\n", tv_cmd, exp_data_len, data_direction); 993 ": %d\n", tv_cmd, exp_data_len, data_direction);
688 994
689 tv_cmd->tvc_vhost = vs; 995 tv_cmd->tvc_vhost = vs;
690 tv_cmd->tvc_vq = vq; 996 tv_cmd->tvc_vq = vq;
691
692 if (unlikely(vq->iov[out].iov_len !=
693 sizeof(struct virtio_scsi_cmd_resp))) {
694 vq_err(vq, "Expecting virtio_scsi_cmd_resp, got %zu"
695 " bytes, out: %d, in: %d\n",
696 vq->iov[out].iov_len, out, in);
697 break;
698 }
699
700 tv_cmd->tvc_resp = vq->iov[out].iov_base; 997 tv_cmd->tvc_resp = vq->iov[out].iov_base;
701 998
702 /* 999 /*
@@ -716,7 +1013,7 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs,
716 " exceeds SCSI_MAX_VARLEN_CDB_SIZE: %d\n", 1013 " exceeds SCSI_MAX_VARLEN_CDB_SIZE: %d\n",
717 scsi_command_size(tv_cmd->tvc_cdb), 1014 scsi_command_size(tv_cmd->tvc_cdb),
718 TCM_VHOST_MAX_CDB_SIZE); 1015 TCM_VHOST_MAX_CDB_SIZE);
719 break; /* TODO */ 1016 goto err_free;
720 } 1017 }
721 tv_cmd->tvc_lun = ((v_req.lun[2] << 8) | v_req.lun[3]) & 0x3FFF; 1018 tv_cmd->tvc_lun = ((v_req.lun[2] << 8) | v_req.lun[3]) & 0x3FFF;
722 1019
@@ -729,7 +1026,7 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs,
729 data_direction == DMA_TO_DEVICE); 1026 data_direction == DMA_TO_DEVICE);
730 if (unlikely(ret)) { 1027 if (unlikely(ret)) {
731 vq_err(vq, "Failed to map iov to sgl\n"); 1028 vq_err(vq, "Failed to map iov to sgl\n");
732 break; /* TODO */ 1029 goto err_free;
733 } 1030 }
734 } 1031 }
735 1032
@@ -750,6 +1047,13 @@ static void vhost_scsi_handle_vq(struct vhost_scsi *vs,
750 } 1047 }
751 1048
752 mutex_unlock(&vq->mutex); 1049 mutex_unlock(&vq->mutex);
1050 return;
1051
1052err_free:
1053 vhost_scsi_free_cmd(tv_cmd);
1054err_cmd:
1055 vhost_scsi_send_bad_target(vs, vq, head, out);
1056 mutex_unlock(&vq->mutex);
753} 1057}
754 1058
755static void vhost_scsi_ctl_handle_kick(struct vhost_work *work) 1059static void vhost_scsi_ctl_handle_kick(struct vhost_work *work)
@@ -757,9 +1061,46 @@ static void vhost_scsi_ctl_handle_kick(struct vhost_work *work)
757 pr_debug("%s: The handling func for control queue.\n", __func__); 1061 pr_debug("%s: The handling func for control queue.\n", __func__);
758} 1062}
759 1063
1064static void tcm_vhost_send_evt(struct vhost_scsi *vs, struct tcm_vhost_tpg *tpg,
1065 struct se_lun *lun, u32 event, u32 reason)
1066{
1067 struct tcm_vhost_evt *evt;
1068
1069 evt = tcm_vhost_allocate_evt(vs, event, reason);
1070 if (!evt)
1071 return;
1072
1073 if (tpg && lun) {
1074 /* TODO: share lun setup code with virtio-scsi.ko */
1075 /*
1076 * Note: evt->event is zeroed when we allocate it and
1077 * lun[4-7] need to be zero according to virtio-scsi spec.
1078 */
1079 evt->event.lun[0] = 0x01;
1080 evt->event.lun[1] = tpg->tport_tpgt & 0xFF;
1081 if (lun->unpacked_lun >= 256)
1082 evt->event.lun[2] = lun->unpacked_lun >> 8 | 0x40 ;
1083 evt->event.lun[3] = lun->unpacked_lun & 0xFF;
1084 }
1085
1086 llist_add(&evt->list, &vs->vs_event_list);
1087 vhost_work_queue(&vs->dev, &vs->vs_event_work);
1088}
1089
760static void vhost_scsi_evt_handle_kick(struct vhost_work *work) 1090static void vhost_scsi_evt_handle_kick(struct vhost_work *work)
761{ 1091{
762 pr_debug("%s: The handling func for event queue.\n", __func__); 1092 struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
1093 poll.work);
1094 struct vhost_scsi *vs = container_of(vq->dev, struct vhost_scsi, dev);
1095
1096 mutex_lock(&vq->mutex);
1097 if (!vq->private_data)
1098 goto out;
1099
1100 if (vs->vs_events_missed)
1101 tcm_vhost_send_evt(vs, NULL, NULL, VIRTIO_SCSI_T_NO_EVENT, 0);
1102out:
1103 mutex_unlock(&vq->mutex);
763} 1104}
764 1105
765static void vhost_scsi_handle_kick(struct vhost_work *work) 1106static void vhost_scsi_handle_kick(struct vhost_work *work)
@@ -771,9 +1112,45 @@ static void vhost_scsi_handle_kick(struct vhost_work *work)
771 vhost_scsi_handle_vq(vs, vq); 1112 vhost_scsi_handle_vq(vs, vq);
772} 1113}
773 1114
1115static void vhost_scsi_flush_vq(struct vhost_scsi *vs, int index)
1116{
1117 vhost_poll_flush(&vs->vqs[index].vq.poll);
1118}
1119
1120/* Callers must hold dev mutex */
1121static void vhost_scsi_flush(struct vhost_scsi *vs)
1122{
1123 struct vhost_scsi_inflight *old_inflight[VHOST_SCSI_MAX_VQ];
1124 int i;
1125
1126 /* Init new inflight and remember the old inflight */
1127 tcm_vhost_init_inflight(vs, old_inflight);
1128
1129 /*
1130 * The inflight->kref was initialized to 1. We decrement it here to
1131 * indicate the start of the flush operation so that it will reach 0
1132 * when all the reqs are finished.
1133 */
1134 for (i = 0; i < VHOST_SCSI_MAX_VQ; i++)
1135 kref_put(&old_inflight[i]->kref, tcm_vhost_done_inflight);
1136
1137 /* Flush both the vhost poll and vhost work */
1138 for (i = 0; i < VHOST_SCSI_MAX_VQ; i++)
1139 vhost_scsi_flush_vq(vs, i);
1140 vhost_work_flush(&vs->dev, &vs->vs_completion_work);
1141 vhost_work_flush(&vs->dev, &vs->vs_event_work);
1142
1143 /* Wait for all reqs issued before the flush to be finished */
1144 for (i = 0; i < VHOST_SCSI_MAX_VQ; i++)
1145 wait_for_completion(&old_inflight[i]->comp);
1146}
1147
774/* 1148/*
775 * Called from vhost_scsi_ioctl() context to walk the list of available 1149 * Called from vhost_scsi_ioctl() context to walk the list of available
776 * tcm_vhost_tpg with an active struct tcm_vhost_nexus 1150 * tcm_vhost_tpg with an active struct tcm_vhost_nexus
1151 *
1152 * The lock nesting rule is:
1153 * tcm_vhost_mutex -> vs->dev.mutex -> tpg->tv_tpg_mutex -> vq->mutex
777 */ 1154 */
778static int vhost_scsi_set_endpoint( 1155static int vhost_scsi_set_endpoint(
779 struct vhost_scsi *vs, 1156 struct vhost_scsi *vs,
@@ -781,20 +1158,32 @@ static int vhost_scsi_set_endpoint(
781{ 1158{
782 struct tcm_vhost_tport *tv_tport; 1159 struct tcm_vhost_tport *tv_tport;
783 struct tcm_vhost_tpg *tv_tpg; 1160 struct tcm_vhost_tpg *tv_tpg;
1161 struct tcm_vhost_tpg **vs_tpg;
1162 struct vhost_virtqueue *vq;
1163 int index, ret, i, len;
784 bool match = false; 1164 bool match = false;
785 int index, ret;
786 1165
1166 mutex_lock(&tcm_vhost_mutex);
787 mutex_lock(&vs->dev.mutex); 1167 mutex_lock(&vs->dev.mutex);
1168
788 /* Verify that ring has been setup correctly. */ 1169 /* Verify that ring has been setup correctly. */
789 for (index = 0; index < vs->dev.nvqs; ++index) { 1170 for (index = 0; index < vs->dev.nvqs; ++index) {
790 /* Verify that ring has been setup correctly. */ 1171 /* Verify that ring has been setup correctly. */
791 if (!vhost_vq_access_ok(&vs->vqs[index])) { 1172 if (!vhost_vq_access_ok(&vs->vqs[index].vq)) {
792 mutex_unlock(&vs->dev.mutex); 1173 ret = -EFAULT;
793 return -EFAULT; 1174 goto out;
794 } 1175 }
795 } 1176 }
796 1177
797 mutex_lock(&tcm_vhost_mutex); 1178 len = sizeof(vs_tpg[0]) * VHOST_SCSI_MAX_TARGET;
1179 vs_tpg = kzalloc(len, GFP_KERNEL);
1180 if (!vs_tpg) {
1181 ret = -ENOMEM;
1182 goto out;
1183 }
1184 if (vs->vs_tpg)
1185 memcpy(vs_tpg, vs->vs_tpg, len);
1186
798 list_for_each_entry(tv_tpg, &tcm_vhost_list, tv_tpg_list) { 1187 list_for_each_entry(tv_tpg, &tcm_vhost_list, tv_tpg_list) {
799 mutex_lock(&tv_tpg->tv_tpg_mutex); 1188 mutex_lock(&tv_tpg->tv_tpg_mutex);
800 if (!tv_tpg->tpg_nexus) { 1189 if (!tv_tpg->tpg_nexus) {
@@ -808,31 +1197,48 @@ static int vhost_scsi_set_endpoint(
808 tv_tport = tv_tpg->tport; 1197 tv_tport = tv_tpg->tport;
809 1198
810 if (!strcmp(tv_tport->tport_name, t->vhost_wwpn)) { 1199 if (!strcmp(tv_tport->tport_name, t->vhost_wwpn)) {
811 if (vs->vs_tpg[tv_tpg->tport_tpgt]) { 1200 if (vs->vs_tpg && vs->vs_tpg[tv_tpg->tport_tpgt]) {
1201 kfree(vs_tpg);
812 mutex_unlock(&tv_tpg->tv_tpg_mutex); 1202 mutex_unlock(&tv_tpg->tv_tpg_mutex);
813 mutex_unlock(&tcm_vhost_mutex); 1203 ret = -EEXIST;
814 mutex_unlock(&vs->dev.mutex); 1204 goto out;
815 return -EEXIST;
816 } 1205 }
817 tv_tpg->tv_tpg_vhost_count++; 1206 tv_tpg->tv_tpg_vhost_count++;
818 vs->vs_tpg[tv_tpg->tport_tpgt] = tv_tpg; 1207 tv_tpg->vhost_scsi = vs;
1208 vs_tpg[tv_tpg->tport_tpgt] = tv_tpg;
819 smp_mb__after_atomic_inc(); 1209 smp_mb__after_atomic_inc();
820 match = true; 1210 match = true;
821 } 1211 }
822 mutex_unlock(&tv_tpg->tv_tpg_mutex); 1212 mutex_unlock(&tv_tpg->tv_tpg_mutex);
823 } 1213 }
824 mutex_unlock(&tcm_vhost_mutex);
825 1214
826 if (match) { 1215 if (match) {
827 memcpy(vs->vs_vhost_wwpn, t->vhost_wwpn, 1216 memcpy(vs->vs_vhost_wwpn, t->vhost_wwpn,
828 sizeof(vs->vs_vhost_wwpn)); 1217 sizeof(vs->vs_vhost_wwpn));
829 vs->vs_endpoint = true; 1218 for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
1219 vq = &vs->vqs[i].vq;
1220 /* Flushing the vhost_work acts as synchronize_rcu */
1221 mutex_lock(&vq->mutex);
1222 rcu_assign_pointer(vq->private_data, vs_tpg);
1223 vhost_init_used(vq);
1224 mutex_unlock(&vq->mutex);
1225 }
830 ret = 0; 1226 ret = 0;
831 } else { 1227 } else {
832 ret = -EEXIST; 1228 ret = -EEXIST;
833 } 1229 }
834 1230
1231 /*
1232 * Act as synchronize_rcu to make sure access to
1233 * old vs->vs_tpg is finished.
1234 */
1235 vhost_scsi_flush(vs);
1236 kfree(vs->vs_tpg);
1237 vs->vs_tpg = vs_tpg;
1238
1239out:
835 mutex_unlock(&vs->dev.mutex); 1240 mutex_unlock(&vs->dev.mutex);
1241 mutex_unlock(&tcm_vhost_mutex);
836 return ret; 1242 return ret;
837} 1243}
838 1244
@@ -842,28 +1248,37 @@ static int vhost_scsi_clear_endpoint(
842{ 1248{
843 struct tcm_vhost_tport *tv_tport; 1249 struct tcm_vhost_tport *tv_tport;
844 struct tcm_vhost_tpg *tv_tpg; 1250 struct tcm_vhost_tpg *tv_tpg;
1251 struct vhost_virtqueue *vq;
1252 bool match = false;
845 int index, ret, i; 1253 int index, ret, i;
846 u8 target; 1254 u8 target;
847 1255
1256 mutex_lock(&tcm_vhost_mutex);
848 mutex_lock(&vs->dev.mutex); 1257 mutex_lock(&vs->dev.mutex);
849 /* Verify that ring has been setup correctly. */ 1258 /* Verify that ring has been setup correctly. */
850 for (index = 0; index < vs->dev.nvqs; ++index) { 1259 for (index = 0; index < vs->dev.nvqs; ++index) {
851 if (!vhost_vq_access_ok(&vs->vqs[index])) { 1260 if (!vhost_vq_access_ok(&vs->vqs[index].vq)) {
852 ret = -EFAULT; 1261 ret = -EFAULT;
853 goto err; 1262 goto err_dev;
854 } 1263 }
855 } 1264 }
1265
1266 if (!vs->vs_tpg) {
1267 ret = 0;
1268 goto err_dev;
1269 }
1270
856 for (i = 0; i < VHOST_SCSI_MAX_TARGET; i++) { 1271 for (i = 0; i < VHOST_SCSI_MAX_TARGET; i++) {
857 target = i; 1272 target = i;
858
859 tv_tpg = vs->vs_tpg[target]; 1273 tv_tpg = vs->vs_tpg[target];
860 if (!tv_tpg) 1274 if (!tv_tpg)
861 continue; 1275 continue;
862 1276
1277 mutex_lock(&tv_tpg->tv_tpg_mutex);
863 tv_tport = tv_tpg->tport; 1278 tv_tport = tv_tpg->tport;
864 if (!tv_tport) { 1279 if (!tv_tport) {
865 ret = -ENODEV; 1280 ret = -ENODEV;
866 goto err; 1281 goto err_tpg;
867 } 1282 }
868 1283
869 if (strcmp(tv_tport->tport_name, t->vhost_wwpn)) { 1284 if (strcmp(tv_tport->tport_name, t->vhost_wwpn)) {
@@ -872,37 +1287,97 @@ static int vhost_scsi_clear_endpoint(
872 tv_tport->tport_name, tv_tpg->tport_tpgt, 1287 tv_tport->tport_name, tv_tpg->tport_tpgt,
873 t->vhost_wwpn, t->vhost_tpgt); 1288 t->vhost_wwpn, t->vhost_tpgt);
874 ret = -EINVAL; 1289 ret = -EINVAL;
875 goto err; 1290 goto err_tpg;
876 } 1291 }
877 tv_tpg->tv_tpg_vhost_count--; 1292 tv_tpg->tv_tpg_vhost_count--;
1293 tv_tpg->vhost_scsi = NULL;
878 vs->vs_tpg[target] = NULL; 1294 vs->vs_tpg[target] = NULL;
879 vs->vs_endpoint = false; 1295 match = true;
1296 mutex_unlock(&tv_tpg->tv_tpg_mutex);
880 } 1297 }
1298 if (match) {
1299 for (i = 0; i < VHOST_SCSI_MAX_VQ; i++) {
1300 vq = &vs->vqs[i].vq;
1301 /* Flushing the vhost_work acts as synchronize_rcu */
1302 mutex_lock(&vq->mutex);
1303 rcu_assign_pointer(vq->private_data, NULL);
1304 mutex_unlock(&vq->mutex);
1305 }
1306 }
1307 /*
1308 * Act as synchronize_rcu to make sure access to
1309 * old vs->vs_tpg is finished.
1310 */
1311 vhost_scsi_flush(vs);
1312 kfree(vs->vs_tpg);
1313 vs->vs_tpg = NULL;
1314 WARN_ON(vs->vs_events_nr);
881 mutex_unlock(&vs->dev.mutex); 1315 mutex_unlock(&vs->dev.mutex);
1316 mutex_unlock(&tcm_vhost_mutex);
882 return 0; 1317 return 0;
883 1318
884err: 1319err_tpg:
1320 mutex_unlock(&tv_tpg->tv_tpg_mutex);
1321err_dev:
885 mutex_unlock(&vs->dev.mutex); 1322 mutex_unlock(&vs->dev.mutex);
1323 mutex_unlock(&tcm_vhost_mutex);
886 return ret; 1324 return ret;
887} 1325}
888 1326
1327static int vhost_scsi_set_features(struct vhost_scsi *vs, u64 features)
1328{
1329 if (features & ~VHOST_SCSI_FEATURES)
1330 return -EOPNOTSUPP;
1331
1332 mutex_lock(&vs->dev.mutex);
1333 if ((features & (1 << VHOST_F_LOG_ALL)) &&
1334 !vhost_log_access_ok(&vs->dev)) {
1335 mutex_unlock(&vs->dev.mutex);
1336 return -EFAULT;
1337 }
1338 vs->dev.acked_features = features;
1339 smp_wmb();
1340 vhost_scsi_flush(vs);
1341 mutex_unlock(&vs->dev.mutex);
1342 return 0;
1343}
1344
889static int vhost_scsi_open(struct inode *inode, struct file *f) 1345static int vhost_scsi_open(struct inode *inode, struct file *f)
890{ 1346{
891 struct vhost_scsi *s; 1347 struct vhost_scsi *s;
1348 struct vhost_virtqueue **vqs;
892 int r, i; 1349 int r, i;
893 1350
894 s = kzalloc(sizeof(*s), GFP_KERNEL); 1351 s = kzalloc(sizeof(*s), GFP_KERNEL);
895 if (!s) 1352 if (!s)
896 return -ENOMEM; 1353 return -ENOMEM;
897 1354
1355 vqs = kmalloc(VHOST_SCSI_MAX_VQ * sizeof(*vqs), GFP_KERNEL);
1356 if (!vqs) {
1357 kfree(s);
1358 return -ENOMEM;
1359 }
1360
898 vhost_work_init(&s->vs_completion_work, vhost_scsi_complete_cmd_work); 1361 vhost_work_init(&s->vs_completion_work, vhost_scsi_complete_cmd_work);
1362 vhost_work_init(&s->vs_event_work, tcm_vhost_evt_work);
1363
1364 s->vs_events_nr = 0;
1365 s->vs_events_missed = false;
1366
1367 vqs[VHOST_SCSI_VQ_CTL] = &s->vqs[VHOST_SCSI_VQ_CTL].vq;
1368 vqs[VHOST_SCSI_VQ_EVT] = &s->vqs[VHOST_SCSI_VQ_EVT].vq;
1369 s->vqs[VHOST_SCSI_VQ_CTL].vq.handle_kick = vhost_scsi_ctl_handle_kick;
1370 s->vqs[VHOST_SCSI_VQ_EVT].vq.handle_kick = vhost_scsi_evt_handle_kick;
1371 for (i = VHOST_SCSI_VQ_IO; i < VHOST_SCSI_MAX_VQ; i++) {
1372 vqs[i] = &s->vqs[i].vq;
1373 s->vqs[i].vq.handle_kick = vhost_scsi_handle_kick;
1374 }
1375 r = vhost_dev_init(&s->dev, vqs, VHOST_SCSI_MAX_VQ);
1376
1377 tcm_vhost_init_inflight(s, NULL);
899 1378
900 s->vqs[VHOST_SCSI_VQ_CTL].handle_kick = vhost_scsi_ctl_handle_kick;
901 s->vqs[VHOST_SCSI_VQ_EVT].handle_kick = vhost_scsi_evt_handle_kick;
902 for (i = VHOST_SCSI_VQ_IO; i < VHOST_SCSI_MAX_VQ; i++)
903 s->vqs[i].handle_kick = vhost_scsi_handle_kick;
904 r = vhost_dev_init(&s->dev, s->vqs, VHOST_SCSI_MAX_VQ);
905 if (r < 0) { 1379 if (r < 0) {
1380 kfree(vqs);
906 kfree(s); 1381 kfree(s);
907 return r; 1382 return r;
908 } 1383 }
@@ -922,41 +1397,13 @@ static int vhost_scsi_release(struct inode *inode, struct file *f)
922 vhost_scsi_clear_endpoint(s, &t); 1397 vhost_scsi_clear_endpoint(s, &t);
923 vhost_dev_stop(&s->dev); 1398 vhost_dev_stop(&s->dev);
924 vhost_dev_cleanup(&s->dev, false); 1399 vhost_dev_cleanup(&s->dev, false);
1400 /* Jobs can re-queue themselves in evt kick handler. Do extra flush. */
1401 vhost_scsi_flush(s);
1402 kfree(s->dev.vqs);
925 kfree(s); 1403 kfree(s);
926 return 0; 1404 return 0;
927} 1405}
928 1406
929static void vhost_scsi_flush_vq(struct vhost_scsi *vs, int index)
930{
931 vhost_poll_flush(&vs->dev.vqs[index].poll);
932}
933
934static void vhost_scsi_flush(struct vhost_scsi *vs)
935{
936 int i;
937
938 for (i = 0; i < VHOST_SCSI_MAX_VQ; i++)
939 vhost_scsi_flush_vq(vs, i);
940}
941
942static int vhost_scsi_set_features(struct vhost_scsi *vs, u64 features)
943{
944 if (features & ~VHOST_FEATURES)
945 return -EOPNOTSUPP;
946
947 mutex_lock(&vs->dev.mutex);
948 if ((features & (1 << VHOST_F_LOG_ALL)) &&
949 !vhost_log_access_ok(&vs->dev)) {
950 mutex_unlock(&vs->dev.mutex);
951 return -EFAULT;
952 }
953 vs->dev.acked_features = features;
954 smp_wmb();
955 vhost_scsi_flush(vs);
956 mutex_unlock(&vs->dev.mutex);
957 return 0;
958}
959
960static long vhost_scsi_ioctl(struct file *f, unsigned int ioctl, 1407static long vhost_scsi_ioctl(struct file *f, unsigned int ioctl,
961 unsigned long arg) 1408 unsigned long arg)
962{ 1409{
@@ -964,8 +1411,11 @@ static long vhost_scsi_ioctl(struct file *f, unsigned int ioctl,
964 struct vhost_scsi_target backend; 1411 struct vhost_scsi_target backend;
965 void __user *argp = (void __user *)arg; 1412 void __user *argp = (void __user *)arg;
966 u64 __user *featurep = argp; 1413 u64 __user *featurep = argp;
1414 u32 __user *eventsp = argp;
1415 u32 events_missed;
967 u64 features; 1416 u64 features;
968 int r, abi_version = VHOST_SCSI_ABI_VERSION; 1417 int r, abi_version = VHOST_SCSI_ABI_VERSION;
1418 struct vhost_virtqueue *vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq;
969 1419
970 switch (ioctl) { 1420 switch (ioctl) {
971 case VHOST_SCSI_SET_ENDPOINT: 1421 case VHOST_SCSI_SET_ENDPOINT:
@@ -986,8 +1436,22 @@ static long vhost_scsi_ioctl(struct file *f, unsigned int ioctl,
986 if (copy_to_user(argp, &abi_version, sizeof abi_version)) 1436 if (copy_to_user(argp, &abi_version, sizeof abi_version))
987 return -EFAULT; 1437 return -EFAULT;
988 return 0; 1438 return 0;
1439 case VHOST_SCSI_SET_EVENTS_MISSED:
1440 if (get_user(events_missed, eventsp))
1441 return -EFAULT;
1442 mutex_lock(&vq->mutex);
1443 vs->vs_events_missed = events_missed;
1444 mutex_unlock(&vq->mutex);
1445 return 0;
1446 case VHOST_SCSI_GET_EVENTS_MISSED:
1447 mutex_lock(&vq->mutex);
1448 events_missed = vs->vs_events_missed;
1449 mutex_unlock(&vq->mutex);
1450 if (put_user(events_missed, eventsp))
1451 return -EFAULT;
1452 return 0;
989 case VHOST_GET_FEATURES: 1453 case VHOST_GET_FEATURES:
990 features = VHOST_FEATURES; 1454 features = VHOST_SCSI_FEATURES;
991 if (copy_to_user(featurep, &features, sizeof features)) 1455 if (copy_to_user(featurep, &features, sizeof features))
992 return -EFAULT; 1456 return -EFAULT;
993 return 0; 1457 return 0;
@@ -1057,28 +1521,80 @@ static char *tcm_vhost_dump_proto_id(struct tcm_vhost_tport *tport)
1057 return "Unknown"; 1521 return "Unknown";
1058} 1522}
1059 1523
1524static void tcm_vhost_do_plug(struct tcm_vhost_tpg *tpg,
1525 struct se_lun *lun, bool plug)
1526{
1527
1528 struct vhost_scsi *vs = tpg->vhost_scsi;
1529 struct vhost_virtqueue *vq;
1530 u32 reason;
1531
1532 if (!vs)
1533 return;
1534
1535 mutex_lock(&vs->dev.mutex);
1536 if (!vhost_has_feature(&vs->dev, VIRTIO_SCSI_F_HOTPLUG)) {
1537 mutex_unlock(&vs->dev.mutex);
1538 return;
1539 }
1540
1541 if (plug)
1542 reason = VIRTIO_SCSI_EVT_RESET_RESCAN;
1543 else
1544 reason = VIRTIO_SCSI_EVT_RESET_REMOVED;
1545
1546 vq = &vs->vqs[VHOST_SCSI_VQ_EVT].vq;
1547 mutex_lock(&vq->mutex);
1548 tcm_vhost_send_evt(vs, tpg, lun,
1549 VIRTIO_SCSI_T_TRANSPORT_RESET, reason);
1550 mutex_unlock(&vq->mutex);
1551 mutex_unlock(&vs->dev.mutex);
1552}
1553
1554static void tcm_vhost_hotplug(struct tcm_vhost_tpg *tpg, struct se_lun *lun)
1555{
1556 tcm_vhost_do_plug(tpg, lun, true);
1557}
1558
1559static void tcm_vhost_hotunplug(struct tcm_vhost_tpg *tpg, struct se_lun *lun)
1560{
1561 tcm_vhost_do_plug(tpg, lun, false);
1562}
1563
1060static int tcm_vhost_port_link(struct se_portal_group *se_tpg, 1564static int tcm_vhost_port_link(struct se_portal_group *se_tpg,
1061 struct se_lun *lun) 1565 struct se_lun *lun)
1062{ 1566{
1063 struct tcm_vhost_tpg *tv_tpg = container_of(se_tpg, 1567 struct tcm_vhost_tpg *tv_tpg = container_of(se_tpg,
1064 struct tcm_vhost_tpg, se_tpg); 1568 struct tcm_vhost_tpg, se_tpg);
1065 1569
1570 mutex_lock(&tcm_vhost_mutex);
1571
1066 mutex_lock(&tv_tpg->tv_tpg_mutex); 1572 mutex_lock(&tv_tpg->tv_tpg_mutex);
1067 tv_tpg->tv_tpg_port_count++; 1573 tv_tpg->tv_tpg_port_count++;
1068 mutex_unlock(&tv_tpg->tv_tpg_mutex); 1574 mutex_unlock(&tv_tpg->tv_tpg_mutex);
1069 1575
1576 tcm_vhost_hotplug(tv_tpg, lun);
1577
1578 mutex_unlock(&tcm_vhost_mutex);
1579
1070 return 0; 1580 return 0;
1071} 1581}
1072 1582
1073static void tcm_vhost_port_unlink(struct se_portal_group *se_tpg, 1583static void tcm_vhost_port_unlink(struct se_portal_group *se_tpg,
1074 struct se_lun *se_lun) 1584 struct se_lun *lun)
1075{ 1585{
1076 struct tcm_vhost_tpg *tv_tpg = container_of(se_tpg, 1586 struct tcm_vhost_tpg *tv_tpg = container_of(se_tpg,
1077 struct tcm_vhost_tpg, se_tpg); 1587 struct tcm_vhost_tpg, se_tpg);
1078 1588
1589 mutex_lock(&tcm_vhost_mutex);
1590
1079 mutex_lock(&tv_tpg->tv_tpg_mutex); 1591 mutex_lock(&tv_tpg->tv_tpg_mutex);
1080 tv_tpg->tv_tpg_port_count--; 1592 tv_tpg->tv_tpg_port_count--;
1081 mutex_unlock(&tv_tpg->tv_tpg_mutex); 1593 mutex_unlock(&tv_tpg->tv_tpg_mutex);
1594
1595 tcm_vhost_hotunplug(tv_tpg, lun);
1596
1597 mutex_unlock(&tcm_vhost_mutex);
1082} 1598}
1083 1599
1084static struct se_node_acl *tcm_vhost_make_nodeacl( 1600static struct se_node_acl *tcm_vhost_make_nodeacl(
@@ -1620,7 +2136,8 @@ static void tcm_vhost_exit(void)
1620 destroy_workqueue(tcm_vhost_workqueue); 2136 destroy_workqueue(tcm_vhost_workqueue);
1621}; 2137};
1622 2138
1623MODULE_DESCRIPTION("TCM_VHOST series fabric driver"); 2139MODULE_DESCRIPTION("VHOST_SCSI series fabric driver");
2140MODULE_ALIAS("tcm_vhost");
1624MODULE_LICENSE("GPL"); 2141MODULE_LICENSE("GPL");
1625module_init(tcm_vhost_init); 2142module_init(tcm_vhost_init);
1626module_exit(tcm_vhost_exit); 2143module_exit(tcm_vhost_exit);
diff --git a/drivers/vhost/tcm_vhost.h b/drivers/vhost/tcm_vhost.h
deleted file mode 100644
index 1d2ae7a60e11..000000000000
--- a/drivers/vhost/tcm_vhost.h
+++ /dev/null
@@ -1,115 +0,0 @@
1#define TCM_VHOST_VERSION "v0.1"
2#define TCM_VHOST_NAMELEN 256
3#define TCM_VHOST_MAX_CDB_SIZE 32
4
5struct tcm_vhost_cmd {
6 /* Descriptor from vhost_get_vq_desc() for virt_queue segment */
7 int tvc_vq_desc;
8 /* virtio-scsi initiator task attribute */
9 int tvc_task_attr;
10 /* virtio-scsi initiator data direction */
11 enum dma_data_direction tvc_data_direction;
12 /* Expected data transfer length from virtio-scsi header */
13 u32 tvc_exp_data_len;
14 /* The Tag from include/linux/virtio_scsi.h:struct virtio_scsi_cmd_req */
15 u64 tvc_tag;
16 /* The number of scatterlists associated with this cmd */
17 u32 tvc_sgl_count;
18 /* Saved unpacked SCSI LUN for tcm_vhost_submission_work() */
19 u32 tvc_lun;
20 /* Pointer to the SGL formatted memory from virtio-scsi */
21 struct scatterlist *tvc_sgl;
22 /* Pointer to response */
23 struct virtio_scsi_cmd_resp __user *tvc_resp;
24 /* Pointer to vhost_scsi for our device */
25 struct vhost_scsi *tvc_vhost;
26 /* Pointer to vhost_virtqueue for the cmd */
27 struct vhost_virtqueue *tvc_vq;
28 /* Pointer to vhost nexus memory */
29 struct tcm_vhost_nexus *tvc_nexus;
30 /* The TCM I/O descriptor that is accessed via container_of() */
31 struct se_cmd tvc_se_cmd;
32 /* work item used for cmwq dispatch to tcm_vhost_submission_work() */
33 struct work_struct work;
34 /* Copy of the incoming SCSI command descriptor block (CDB) */
35 unsigned char tvc_cdb[TCM_VHOST_MAX_CDB_SIZE];
36 /* Sense buffer that will be mapped into outgoing status */
37 unsigned char tvc_sense_buf[TRANSPORT_SENSE_BUFFER];
38 /* Completed commands list, serviced from vhost worker thread */
39 struct llist_node tvc_completion_list;
40};
41
42struct tcm_vhost_nexus {
43 /* Pointer to TCM session for I_T Nexus */
44 struct se_session *tvn_se_sess;
45};
46
47struct tcm_vhost_nacl {
48 /* Binary World Wide unique Port Name for Vhost Initiator port */
49 u64 iport_wwpn;
50 /* ASCII formatted WWPN for Sas Initiator port */
51 char iport_name[TCM_VHOST_NAMELEN];
52 /* Returned by tcm_vhost_make_nodeacl() */
53 struct se_node_acl se_node_acl;
54};
55
56struct tcm_vhost_tpg {
57 /* Vhost port target portal group tag for TCM */
58 u16 tport_tpgt;
59 /* Used to track number of TPG Port/Lun Links wrt to explict I_T Nexus shutdown */
60 int tv_tpg_port_count;
61 /* Used for vhost_scsi device reference to tpg_nexus, protected by tv_tpg_mutex */
62 int tv_tpg_vhost_count;
63 /* list for tcm_vhost_list */
64 struct list_head tv_tpg_list;
65 /* Used to protect access for tpg_nexus */
66 struct mutex tv_tpg_mutex;
67 /* Pointer to the TCM VHost I_T Nexus for this TPG endpoint */
68 struct tcm_vhost_nexus *tpg_nexus;
69 /* Pointer back to tcm_vhost_tport */
70 struct tcm_vhost_tport *tport;
71 /* Returned by tcm_vhost_make_tpg() */
72 struct se_portal_group se_tpg;
73};
74
75struct tcm_vhost_tport {
76 /* SCSI protocol the tport is providing */
77 u8 tport_proto_id;
78 /* Binary World Wide unique Port Name for Vhost Target port */
79 u64 tport_wwpn;
80 /* ASCII formatted WWPN for Vhost Target port */
81 char tport_name[TCM_VHOST_NAMELEN];
82 /* Returned by tcm_vhost_make_tport() */
83 struct se_wwn tport_wwn;
84};
85
86/*
87 * As per request from MST, keep TCM_VHOST related ioctl defines out of
88 * linux/vhost.h (user-space) for now..
89 */
90
91#include <linux/vhost.h>
92
93/*
94 * Used by QEMU userspace to ensure a consistent vhost-scsi ABI.
95 *
96 * ABI Rev 0: July 2012 version starting point for v3.6-rc merge candidate +
97 * RFC-v2 vhost-scsi userspace. Add GET_ABI_VERSION ioctl usage
98 * ABI Rev 1: January 2013. Ignore vhost_tpgt filed in struct vhost_scsi_target.
99 * All the targets under vhost_wwpn can be seen and used by guset.
100 */
101
102#define VHOST_SCSI_ABI_VERSION 1
103
104struct vhost_scsi_target {
105 int abi_version;
106 char vhost_wwpn[TRANSPORT_IQN_LEN];
107 unsigned short vhost_tpgt;
108 unsigned short reserved;
109};
110
111/* VHOST_SCSI specific defines */
112#define VHOST_SCSI_SET_ENDPOINT _IOW(VHOST_VIRTIO, 0x40, struct vhost_scsi_target)
113#define VHOST_SCSI_CLEAR_ENDPOINT _IOW(VHOST_VIRTIO, 0x41, struct vhost_scsi_target)
114/* Changing this breaks userspace. */
115#define VHOST_SCSI_GET_ABI_VERSION _IOW(VHOST_VIRTIO, 0x42, int)
diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
index 91d6f060aade..be65414d5bb1 100644
--- a/drivers/vhost/test.c
+++ b/drivers/vhost/test.c
@@ -219,13 +219,20 @@ static long vhost_test_reset_owner(struct vhost_test *n)
219{ 219{
220 void *priv = NULL; 220 void *priv = NULL;
221 long err; 221 long err;
222 struct vhost_memory *memory;
223
222 mutex_lock(&n->dev.mutex); 224 mutex_lock(&n->dev.mutex);
223 err = vhost_dev_check_owner(&n->dev); 225 err = vhost_dev_check_owner(&n->dev);
224 if (err) 226 if (err)
225 goto done; 227 goto done;
228 memory = vhost_dev_reset_owner_prepare();
229 if (!memory) {
230 err = -ENOMEM;
231 goto done;
232 }
226 vhost_test_stop(n, &priv); 233 vhost_test_stop(n, &priv);
227 vhost_test_flush(n); 234 vhost_test_flush(n);
228 err = vhost_dev_reset_owner(&n->dev); 235 vhost_dev_reset_owner(&n->dev, memory);
229done: 236done:
230 mutex_unlock(&n->dev.mutex); 237 mutex_unlock(&n->dev.mutex);
231 return err; 238 return err;
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 9759249e6d90..749b5ab5bfbb 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -33,8 +33,6 @@ enum {
33 VHOST_MEMORY_F_LOG = 0x1, 33 VHOST_MEMORY_F_LOG = 0x1,
34}; 34};
35 35
36static unsigned vhost_zcopy_mask __read_mostly;
37
38#define vhost_used_event(vq) ((u16 __user *)&vq->avail->ring[vq->num]) 36#define vhost_used_event(vq) ((u16 __user *)&vq->avail->ring[vq->num])
39#define vhost_avail_event(vq) ((u16 __user *)&vq->used->ring[vq->num]) 37#define vhost_avail_event(vq) ((u16 __user *)&vq->used->ring[vq->num])
40 38
@@ -89,6 +87,9 @@ int vhost_poll_start(struct vhost_poll *poll, struct file *file)
89 unsigned long mask; 87 unsigned long mask;
90 int ret = 0; 88 int ret = 0;
91 89
90 if (poll->wqh)
91 return 0;
92
92 mask = file->f_op->poll(file, &poll->table); 93 mask = file->f_op->poll(file, &poll->table);
93 if (mask) 94 if (mask)
94 vhost_poll_wakeup(&poll->wait, 0, 0, (void *)mask); 95 vhost_poll_wakeup(&poll->wait, 0, 0, (void *)mask);
@@ -178,8 +179,6 @@ static void vhost_vq_reset(struct vhost_dev *dev,
178 vq->used_flags = 0; 179 vq->used_flags = 0;
179 vq->log_used = false; 180 vq->log_used = false;
180 vq->log_addr = -1ull; 181 vq->log_addr = -1ull;
181 vq->vhost_hlen = 0;
182 vq->sock_hlen = 0;
183 vq->private_data = NULL; 182 vq->private_data = NULL;
184 vq->log_base = NULL; 183 vq->log_base = NULL;
185 vq->error_ctx = NULL; 184 vq->error_ctx = NULL;
@@ -188,9 +187,6 @@ static void vhost_vq_reset(struct vhost_dev *dev,
188 vq->call_ctx = NULL; 187 vq->call_ctx = NULL;
189 vq->call = NULL; 188 vq->call = NULL;
190 vq->log_ctx = NULL; 189 vq->log_ctx = NULL;
191 vq->upend_idx = 0;
192 vq->done_idx = 0;
193 vq->ubufs = NULL;
194} 190}
195 191
196static int vhost_worker(void *data) 192static int vhost_worker(void *data)
@@ -250,43 +246,29 @@ static void vhost_vq_free_iovecs(struct vhost_virtqueue *vq)
250 vq->log = NULL; 246 vq->log = NULL;
251 kfree(vq->heads); 247 kfree(vq->heads);
252 vq->heads = NULL; 248 vq->heads = NULL;
253 kfree(vq->ubuf_info);
254 vq->ubuf_info = NULL;
255}
256
257void vhost_enable_zcopy(int vq)
258{
259 vhost_zcopy_mask |= 0x1 << vq;
260} 249}
261 250
262/* Helper to allocate iovec buffers for all vqs. */ 251/* Helper to allocate iovec buffers for all vqs. */
263static long vhost_dev_alloc_iovecs(struct vhost_dev *dev) 252static long vhost_dev_alloc_iovecs(struct vhost_dev *dev)
264{ 253{
265 int i; 254 int i;
266 bool zcopy;
267 255
268 for (i = 0; i < dev->nvqs; ++i) { 256 for (i = 0; i < dev->nvqs; ++i) {
269 dev->vqs[i].indirect = kmalloc(sizeof *dev->vqs[i].indirect * 257 dev->vqs[i]->indirect = kmalloc(sizeof *dev->vqs[i]->indirect *
270 UIO_MAXIOV, GFP_KERNEL); 258 UIO_MAXIOV, GFP_KERNEL);
271 dev->vqs[i].log = kmalloc(sizeof *dev->vqs[i].log * UIO_MAXIOV, 259 dev->vqs[i]->log = kmalloc(sizeof *dev->vqs[i]->log * UIO_MAXIOV,
272 GFP_KERNEL); 260 GFP_KERNEL);
273 dev->vqs[i].heads = kmalloc(sizeof *dev->vqs[i].heads * 261 dev->vqs[i]->heads = kmalloc(sizeof *dev->vqs[i]->heads *
274 UIO_MAXIOV, GFP_KERNEL); 262 UIO_MAXIOV, GFP_KERNEL);
275 zcopy = vhost_zcopy_mask & (0x1 << i); 263 if (!dev->vqs[i]->indirect || !dev->vqs[i]->log ||
276 if (zcopy) 264 !dev->vqs[i]->heads)
277 dev->vqs[i].ubuf_info =
278 kmalloc(sizeof *dev->vqs[i].ubuf_info *
279 UIO_MAXIOV, GFP_KERNEL);
280 if (!dev->vqs[i].indirect || !dev->vqs[i].log ||
281 !dev->vqs[i].heads ||
282 (zcopy && !dev->vqs[i].ubuf_info))
283 goto err_nomem; 265 goto err_nomem;
284 } 266 }
285 return 0; 267 return 0;
286 268
287err_nomem: 269err_nomem:
288 for (; i >= 0; --i) 270 for (; i >= 0; --i)
289 vhost_vq_free_iovecs(&dev->vqs[i]); 271 vhost_vq_free_iovecs(dev->vqs[i]);
290 return -ENOMEM; 272 return -ENOMEM;
291} 273}
292 274
@@ -295,11 +277,11 @@ static void vhost_dev_free_iovecs(struct vhost_dev *dev)
295 int i; 277 int i;
296 278
297 for (i = 0; i < dev->nvqs; ++i) 279 for (i = 0; i < dev->nvqs; ++i)
298 vhost_vq_free_iovecs(&dev->vqs[i]); 280 vhost_vq_free_iovecs(dev->vqs[i]);
299} 281}
300 282
301long vhost_dev_init(struct vhost_dev *dev, 283long vhost_dev_init(struct vhost_dev *dev,
302 struct vhost_virtqueue *vqs, int nvqs) 284 struct vhost_virtqueue **vqs, int nvqs)
303{ 285{
304 int i; 286 int i;
305 287
@@ -315,16 +297,15 @@ long vhost_dev_init(struct vhost_dev *dev,
315 dev->worker = NULL; 297 dev->worker = NULL;
316 298
317 for (i = 0; i < dev->nvqs; ++i) { 299 for (i = 0; i < dev->nvqs; ++i) {
318 dev->vqs[i].log = NULL; 300 dev->vqs[i]->log = NULL;
319 dev->vqs[i].indirect = NULL; 301 dev->vqs[i]->indirect = NULL;
320 dev->vqs[i].heads = NULL; 302 dev->vqs[i]->heads = NULL;
321 dev->vqs[i].ubuf_info = NULL; 303 dev->vqs[i]->dev = dev;
322 dev->vqs[i].dev = dev; 304 mutex_init(&dev->vqs[i]->mutex);
323 mutex_init(&dev->vqs[i].mutex); 305 vhost_vq_reset(dev, dev->vqs[i]);
324 vhost_vq_reset(dev, dev->vqs + i); 306 if (dev->vqs[i]->handle_kick)
325 if (dev->vqs[i].handle_kick) 307 vhost_poll_init(&dev->vqs[i]->poll,
326 vhost_poll_init(&dev->vqs[i].poll, 308 dev->vqs[i]->handle_kick, POLLIN, dev);
327 dev->vqs[i].handle_kick, POLLIN, dev);
328 } 309 }
329 310
330 return 0; 311 return 0;
@@ -405,21 +386,19 @@ err_mm:
405 return err; 386 return err;
406} 387}
407 388
408/* Caller should have device mutex */ 389struct vhost_memory *vhost_dev_reset_owner_prepare(void)
409long vhost_dev_reset_owner(struct vhost_dev *dev)
410{ 390{
411 struct vhost_memory *memory; 391 return kmalloc(offsetof(struct vhost_memory, regions), GFP_KERNEL);
412 392}
413 /* Restore memory to default empty mapping. */
414 memory = kmalloc(offsetof(struct vhost_memory, regions), GFP_KERNEL);
415 if (!memory)
416 return -ENOMEM;
417 393
394/* Caller should have device mutex */
395void vhost_dev_reset_owner(struct vhost_dev *dev, struct vhost_memory *memory)
396{
418 vhost_dev_cleanup(dev, true); 397 vhost_dev_cleanup(dev, true);
419 398
399 /* Restore memory to default empty mapping. */
420 memory->nregions = 0; 400 memory->nregions = 0;
421 RCU_INIT_POINTER(dev->memory, memory); 401 RCU_INIT_POINTER(dev->memory, memory);
422 return 0;
423} 402}
424 403
425void vhost_dev_stop(struct vhost_dev *dev) 404void vhost_dev_stop(struct vhost_dev *dev)
@@ -427,9 +406,9 @@ void vhost_dev_stop(struct vhost_dev *dev)
427 int i; 406 int i;
428 407
429 for (i = 0; i < dev->nvqs; ++i) { 408 for (i = 0; i < dev->nvqs; ++i) {
430 if (dev->vqs[i].kick && dev->vqs[i].handle_kick) { 409 if (dev->vqs[i]->kick && dev->vqs[i]->handle_kick) {
431 vhost_poll_stop(&dev->vqs[i].poll); 410 vhost_poll_stop(&dev->vqs[i]->poll);
432 vhost_poll_flush(&dev->vqs[i].poll); 411 vhost_poll_flush(&dev->vqs[i]->poll);
433 } 412 }
434 } 413 }
435} 414}
@@ -440,17 +419,17 @@ void vhost_dev_cleanup(struct vhost_dev *dev, bool locked)
440 int i; 419 int i;
441 420
442 for (i = 0; i < dev->nvqs; ++i) { 421 for (i = 0; i < dev->nvqs; ++i) {
443 if (dev->vqs[i].error_ctx) 422 if (dev->vqs[i]->error_ctx)
444 eventfd_ctx_put(dev->vqs[i].error_ctx); 423 eventfd_ctx_put(dev->vqs[i]->error_ctx);
445 if (dev->vqs[i].error) 424 if (dev->vqs[i]->error)
446 fput(dev->vqs[i].error); 425 fput(dev->vqs[i]->error);
447 if (dev->vqs[i].kick) 426 if (dev->vqs[i]->kick)
448 fput(dev->vqs[i].kick); 427 fput(dev->vqs[i]->kick);
449 if (dev->vqs[i].call_ctx) 428 if (dev->vqs[i]->call_ctx)
450 eventfd_ctx_put(dev->vqs[i].call_ctx); 429 eventfd_ctx_put(dev->vqs[i]->call_ctx);
451 if (dev->vqs[i].call) 430 if (dev->vqs[i]->call)
452 fput(dev->vqs[i].call); 431 fput(dev->vqs[i]->call);
453 vhost_vq_reset(dev, dev->vqs + i); 432 vhost_vq_reset(dev, dev->vqs[i]);
454 } 433 }
455 vhost_dev_free_iovecs(dev); 434 vhost_dev_free_iovecs(dev);
456 if (dev->log_ctx) 435 if (dev->log_ctx)
@@ -521,14 +500,14 @@ static int memory_access_ok(struct vhost_dev *d, struct vhost_memory *mem,
521 500
522 for (i = 0; i < d->nvqs; ++i) { 501 for (i = 0; i < d->nvqs; ++i) {
523 int ok; 502 int ok;
524 mutex_lock(&d->vqs[i].mutex); 503 mutex_lock(&d->vqs[i]->mutex);
525 /* If ring is inactive, will check when it's enabled. */ 504 /* If ring is inactive, will check when it's enabled. */
526 if (d->vqs[i].private_data) 505 if (d->vqs[i]->private_data)
527 ok = vq_memory_access_ok(d->vqs[i].log_base, mem, 506 ok = vq_memory_access_ok(d->vqs[i]->log_base, mem,
528 log_all); 507 log_all);
529 else 508 else
530 ok = 1; 509 ok = 1;
531 mutex_unlock(&d->vqs[i].mutex); 510 mutex_unlock(&d->vqs[i]->mutex);
532 if (!ok) 511 if (!ok)
533 return 0; 512 return 0;
534 } 513 }
@@ -638,7 +617,7 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp)
638 if (idx >= d->nvqs) 617 if (idx >= d->nvqs)
639 return -ENOBUFS; 618 return -ENOBUFS;
640 619
641 vq = d->vqs + idx; 620 vq = d->vqs[idx];
642 621
643 mutex_lock(&vq->mutex); 622 mutex_lock(&vq->mutex);
644 623
@@ -849,7 +828,7 @@ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp)
849 for (i = 0; i < d->nvqs; ++i) { 828 for (i = 0; i < d->nvqs; ++i) {
850 struct vhost_virtqueue *vq; 829 struct vhost_virtqueue *vq;
851 void __user *base = (void __user *)(unsigned long)p; 830 void __user *base = (void __user *)(unsigned long)p;
852 vq = d->vqs + i; 831 vq = d->vqs[i];
853 mutex_lock(&vq->mutex); 832 mutex_lock(&vq->mutex);
854 /* If ring is inactive, will check when it's enabled. */ 833 /* If ring is inactive, will check when it's enabled. */
855 if (vq->private_data && !vq_log_access_ok(d, vq, base)) 834 if (vq->private_data && !vq_log_access_ok(d, vq, base))
@@ -876,9 +855,9 @@ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp)
876 } else 855 } else
877 filep = eventfp; 856 filep = eventfp;
878 for (i = 0; i < d->nvqs; ++i) { 857 for (i = 0; i < d->nvqs; ++i) {
879 mutex_lock(&d->vqs[i].mutex); 858 mutex_lock(&d->vqs[i]->mutex);
880 d->vqs[i].log_ctx = d->log_ctx; 859 d->vqs[i]->log_ctx = d->log_ctx;
881 mutex_unlock(&d->vqs[i].mutex); 860 mutex_unlock(&d->vqs[i]->mutex);
882 } 861 }
883 if (ctx) 862 if (ctx)
884 eventfd_ctx_put(ctx); 863 eventfd_ctx_put(ctx);
@@ -1548,38 +1527,3 @@ void vhost_disable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq)
1548 &vq->used->flags, r); 1527 &vq->used->flags, r);
1549 } 1528 }
1550} 1529}
1551
1552static void vhost_zerocopy_done_signal(struct kref *kref)
1553{
1554 struct vhost_ubuf_ref *ubufs = container_of(kref, struct vhost_ubuf_ref,
1555 kref);
1556 wake_up(&ubufs->wait);
1557}
1558
1559struct vhost_ubuf_ref *vhost_ubuf_alloc(struct vhost_virtqueue *vq,
1560 bool zcopy)
1561{
1562 struct vhost_ubuf_ref *ubufs;
1563 /* No zero copy backend? Nothing to count. */
1564 if (!zcopy)
1565 return NULL;
1566 ubufs = kmalloc(sizeof *ubufs, GFP_KERNEL);
1567 if (!ubufs)
1568 return ERR_PTR(-ENOMEM);
1569 kref_init(&ubufs->kref);
1570 init_waitqueue_head(&ubufs->wait);
1571 ubufs->vq = vq;
1572 return ubufs;
1573}
1574
1575void vhost_ubuf_put(struct vhost_ubuf_ref *ubufs)
1576{
1577 kref_put(&ubufs->kref, vhost_zerocopy_done_signal);
1578}
1579
1580void vhost_ubuf_put_and_wait(struct vhost_ubuf_ref *ubufs)
1581{
1582 kref_put(&ubufs->kref, vhost_zerocopy_done_signal);
1583 wait_event(ubufs->wait, !atomic_read(&ubufs->kref.refcount));
1584 kfree(ubufs);
1585}
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 17261e277c02..b58f4ae82cb8 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -54,18 +54,6 @@ struct vhost_log {
54 54
55struct vhost_virtqueue; 55struct vhost_virtqueue;
56 56
57struct vhost_ubuf_ref {
58 struct kref kref;
59 wait_queue_head_t wait;
60 struct vhost_virtqueue *vq;
61};
62
63struct vhost_ubuf_ref *vhost_ubuf_alloc(struct vhost_virtqueue *, bool zcopy);
64void vhost_ubuf_put(struct vhost_ubuf_ref *);
65void vhost_ubuf_put_and_wait(struct vhost_ubuf_ref *);
66
67struct ubuf_info;
68
69/* The virtqueue structure describes a queue attached to a device. */ 57/* The virtqueue structure describes a queue attached to a device. */
70struct vhost_virtqueue { 58struct vhost_virtqueue {
71 struct vhost_dev *dev; 59 struct vhost_dev *dev;
@@ -114,10 +102,7 @@ struct vhost_virtqueue {
114 /* hdr is used to store the virtio header. 102 /* hdr is used to store the virtio header.
115 * Since each iovec has >= 1 byte length, we never need more than 103 * Since each iovec has >= 1 byte length, we never need more than
116 * header length entries to store the header. */ 104 * header length entries to store the header. */
117 struct iovec hdr[sizeof(struct virtio_net_hdr_mrg_rxbuf)];
118 struct iovec *indirect; 105 struct iovec *indirect;
119 size_t vhost_hlen;
120 size_t sock_hlen;
121 struct vring_used_elem *heads; 106 struct vring_used_elem *heads;
122 /* We use a kind of RCU to access private pointer. 107 /* We use a kind of RCU to access private pointer.
123 * All readers access it from worker, which makes it possible to 108 * All readers access it from worker, which makes it possible to
@@ -130,16 +115,6 @@ struct vhost_virtqueue {
130 /* Log write descriptors */ 115 /* Log write descriptors */
131 void __user *log_base; 116 void __user *log_base;
132 struct vhost_log *log; 117 struct vhost_log *log;
133 /* vhost zerocopy support fields below: */
134 /* last used idx for outstanding DMA zerocopy buffers */
135 int upend_idx;
136 /* first used idx for DMA done zerocopy buffers */
137 int done_idx;
138 /* an array of userspace buffers info */
139 struct ubuf_info *ubuf_info;
140 /* Reference counting for outstanding ubufs.
141 * Protected by vq mutex. Writers must also take device mutex. */
142 struct vhost_ubuf_ref *ubufs;
143}; 118};
144 119
145struct vhost_dev { 120struct vhost_dev {
@@ -150,7 +125,7 @@ struct vhost_dev {
150 struct mm_struct *mm; 125 struct mm_struct *mm;
151 struct mutex mutex; 126 struct mutex mutex;
152 unsigned acked_features; 127 unsigned acked_features;
153 struct vhost_virtqueue *vqs; 128 struct vhost_virtqueue **vqs;
154 int nvqs; 129 int nvqs;
155 struct file *log_file; 130 struct file *log_file;
156 struct eventfd_ctx *log_ctx; 131 struct eventfd_ctx *log_ctx;
@@ -159,9 +134,10 @@ struct vhost_dev {
159 struct task_struct *worker; 134 struct task_struct *worker;
160}; 135};
161 136
162long vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue *vqs, int nvqs); 137long vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue **vqs, int nvqs);
163long vhost_dev_check_owner(struct vhost_dev *); 138long vhost_dev_check_owner(struct vhost_dev *);
164long vhost_dev_reset_owner(struct vhost_dev *); 139struct vhost_memory *vhost_dev_reset_owner_prepare(void);
140void vhost_dev_reset_owner(struct vhost_dev *, struct vhost_memory *);
165void vhost_dev_cleanup(struct vhost_dev *, bool locked); 141void vhost_dev_cleanup(struct vhost_dev *, bool locked);
166void vhost_dev_stop(struct vhost_dev *); 142void vhost_dev_stop(struct vhost_dev *);
167long vhost_dev_ioctl(struct vhost_dev *, unsigned int ioctl, void __user *argp); 143long vhost_dev_ioctl(struct vhost_dev *, unsigned int ioctl, void __user *argp);