aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/vhost
diff options
context:
space:
mode:
authorJonathan Herman <hermanjl@cs.unc.edu>2013-01-17 16:15:55 -0500
committerJonathan Herman <hermanjl@cs.unc.edu>2013-01-17 16:15:55 -0500
commit8dea78da5cee153b8af9c07a2745f6c55057fe12 (patch)
treea8f4d49d63b1ecc92f2fddceba0655b2472c5bd9 /drivers/vhost
parent406089d01562f1e2bf9f089fd7637009ebaad589 (diff)
Patched in Tegra support.
Diffstat (limited to 'drivers/vhost')
-rw-r--r--drivers/vhost/Kconfig3
-rw-r--r--drivers/vhost/Kconfig.tcm6
-rw-r--r--drivers/vhost/Makefile2
-rw-r--r--drivers/vhost/net.c178
-rw-r--r--drivers/vhost/tcm_vhost.c1593
-rw-r--r--drivers/vhost/tcm_vhost.h111
-rw-r--r--drivers/vhost/test.c6
-rw-r--r--drivers/vhost/vhost.c90
-rw-r--r--drivers/vhost/vhost.h22
9 files changed, 99 insertions, 1912 deletions
diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig
index 202bba6c997..e4e2fd1b510 100644
--- a/drivers/vhost/Kconfig
+++ b/drivers/vhost/Kconfig
@@ -9,6 +9,3 @@ config VHOST_NET
9 To compile this driver as a module, choose M here: the module will 9 To compile this driver as a module, choose M here: the module will
10 be called vhost_net. 10 be called vhost_net.
11 11
12if STAGING
13source "drivers/vhost/Kconfig.tcm"
14endif
diff --git a/drivers/vhost/Kconfig.tcm b/drivers/vhost/Kconfig.tcm
deleted file mode 100644
index a9c6f76e320..00000000000
--- a/drivers/vhost/Kconfig.tcm
+++ /dev/null
@@ -1,6 +0,0 @@
1config TCM_VHOST
2 tristate "TCM_VHOST fabric module (EXPERIMENTAL)"
3 depends on TARGET_CORE && EVENTFD && EXPERIMENTAL && m
4 default n
5 ---help---
6 Say M here to enable the TCM_VHOST fabric module for use with virtio-scsi guests
diff --git a/drivers/vhost/Makefile b/drivers/vhost/Makefile
index a27b053bc9a..72dd02050bb 100644
--- a/drivers/vhost/Makefile
+++ b/drivers/vhost/Makefile
@@ -1,4 +1,2 @@
1obj-$(CONFIG_VHOST_NET) += vhost_net.o 1obj-$(CONFIG_VHOST_NET) += vhost_net.o
2vhost_net-y := vhost.o net.o 2vhost_net-y := vhost.o net.o
3
4obj-$(CONFIG_TCM_VHOST) += tcm_vhost.o
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index ebd08b21b23..882a51fe7b3 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -24,16 +24,14 @@
24#include <linux/if_arp.h> 24#include <linux/if_arp.h>
25#include <linux/if_tun.h> 25#include <linux/if_tun.h>
26#include <linux/if_macvlan.h> 26#include <linux/if_macvlan.h>
27#include <linux/if_vlan.h>
28 27
29#include <net/sock.h> 28#include <net/sock.h>
30 29
31#include "vhost.h" 30#include "vhost.h"
32 31
33static int experimental_zcopytx = 1; 32static int experimental_zcopytx;
34module_param(experimental_zcopytx, int, 0444); 33module_param(experimental_zcopytx, int, 0444);
35MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;" 34MODULE_PARM_DESC(experimental_zcopytx, "Enable Experimental Zero Copy TX");
36 " 1 -Enable; 0 - Disable");
37 35
38/* Max number of bytes transferred before requeueing the job. 36/* Max number of bytes transferred before requeueing the job.
39 * Using this limit prevents one virtqueue from starving others. */ 37 * Using this limit prevents one virtqueue from starving others. */
@@ -43,21 +41,6 @@ MODULE_PARM_DESC(experimental_zcopytx, "Enable Zero Copy TX;"
43#define VHOST_MAX_PEND 128 41#define VHOST_MAX_PEND 128
44#define VHOST_GOODCOPY_LEN 256 42#define VHOST_GOODCOPY_LEN 256
45 43
46/*
47 * For transmit, used buffer len is unused; we override it to track buffer
48 * status internally; used for zerocopy tx only.
49 */
50/* Lower device DMA failed */
51#define VHOST_DMA_FAILED_LEN 3
52/* Lower device DMA done */
53#define VHOST_DMA_DONE_LEN 2
54/* Lower device DMA in progress */
55#define VHOST_DMA_IN_PROGRESS 1
56/* Buffer unused */
57#define VHOST_DMA_CLEAR_LEN 0
58
59#define VHOST_DMA_IS_DONE(len) ((len) >= VHOST_DMA_DONE_LEN)
60
61enum { 44enum {
62 VHOST_NET_VQ_RX = 0, 45 VHOST_NET_VQ_RX = 0,
63 VHOST_NET_VQ_TX = 1, 46 VHOST_NET_VQ_TX = 1,
@@ -78,39 +61,8 @@ struct vhost_net {
78 * We only do this when socket buffer fills up. 61 * We only do this when socket buffer fills up.
79 * Protected by tx vq lock. */ 62 * Protected by tx vq lock. */
80 enum vhost_net_poll_state tx_poll_state; 63 enum vhost_net_poll_state tx_poll_state;
81 /* Number of TX recently submitted.
82 * Protected by tx vq lock. */
83 unsigned tx_packets;
84 /* Number of times zerocopy TX recently failed.
85 * Protected by tx vq lock. */
86 unsigned tx_zcopy_err;
87 /* Flush in progress. Protected by tx vq lock. */
88 bool tx_flush;
89}; 64};
90 65
91static void vhost_net_tx_packet(struct vhost_net *net)
92{
93 ++net->tx_packets;
94 if (net->tx_packets < 1024)
95 return;
96 net->tx_packets = 0;
97 net->tx_zcopy_err = 0;
98}
99
100static void vhost_net_tx_err(struct vhost_net *net)
101{
102 ++net->tx_zcopy_err;
103}
104
105static bool vhost_net_tx_select_zcopy(struct vhost_net *net)
106{
107 /* TX flush waits for outstanding DMAs to be done.
108 * Don't start new DMAs.
109 */
110 return !net->tx_flush &&
111 net->tx_packets / 64 >= net->tx_zcopy_err;
112}
113
114static bool vhost_sock_zcopy(struct socket *sock) 66static bool vhost_sock_zcopy(struct socket *sock)
115{ 67{
116 return unlikely(experimental_zcopytx) && 68 return unlikely(experimental_zcopytx) &&
@@ -173,55 +125,6 @@ static void tx_poll_start(struct vhost_net *net, struct socket *sock)
173 net->tx_poll_state = VHOST_NET_POLL_STARTED; 125 net->tx_poll_state = VHOST_NET_POLL_STARTED;
174} 126}
175 127
176/* In case of DMA done not in order in lower device driver for some reason.
177 * upend_idx is used to track end of used idx, done_idx is used to track head
178 * of used idx. Once lower device DMA done contiguously, we will signal KVM
179 * guest used idx.
180 */
181static int vhost_zerocopy_signal_used(struct vhost_net *net,
182 struct vhost_virtqueue *vq)
183{
184 int i;
185 int j = 0;
186
187 for (i = vq->done_idx; i != vq->upend_idx; i = (i + 1) % UIO_MAXIOV) {
188 if (vq->heads[i].len == VHOST_DMA_FAILED_LEN)
189 vhost_net_tx_err(net);
190 if (VHOST_DMA_IS_DONE(vq->heads[i].len)) {
191 vq->heads[i].len = VHOST_DMA_CLEAR_LEN;
192 vhost_add_used_and_signal(vq->dev, vq,
193 vq->heads[i].id, 0);
194 ++j;
195 } else
196 break;
197 }
198 if (j)
199 vq->done_idx = i;
200 return j;
201}
202
203static void vhost_zerocopy_callback(struct ubuf_info *ubuf, bool success)
204{
205 struct vhost_ubuf_ref *ubufs = ubuf->ctx;
206 struct vhost_virtqueue *vq = ubufs->vq;
207 int cnt = atomic_read(&ubufs->kref.refcount);
208
209 /*
210 * Trigger polling thread if guest stopped submitting new buffers:
211 * in this case, the refcount after decrement will eventually reach 1
212 * so here it is 2.
213 * We also trigger polling periodically after each 16 packets
214 * (the value 16 here is more or less arbitrary, it's tuned to trigger
215 * less than 10% of times).
216 */
217 if (cnt <= 2 || !(cnt % 16))
218 vhost_poll_queue(&vq->poll);
219 /* set len to mark this desc buffers done DMA */
220 vq->heads[ubuf->desc].len = success ?
221 VHOST_DMA_DONE_LEN : VHOST_DMA_FAILED_LEN;
222 vhost_ubuf_put(ubufs);
223}
224
225/* Expects to be always run from workqueue - which acts as 128/* Expects to be always run from workqueue - which acts as
226 * read-size critical section for our kind of RCU. */ 129 * read-size critical section for our kind of RCU. */
227static void handle_tx(struct vhost_net *net) 130static void handle_tx(struct vhost_net *net)
@@ -242,7 +145,7 @@ static void handle_tx(struct vhost_net *net)
242 size_t hdr_size; 145 size_t hdr_size;
243 struct socket *sock; 146 struct socket *sock;
244 struct vhost_ubuf_ref *uninitialized_var(ubufs); 147 struct vhost_ubuf_ref *uninitialized_var(ubufs);
245 bool zcopy, zcopy_used; 148 bool zcopy;
246 149
247 /* TODO: check that we are running from vhost_worker? */ 150 /* TODO: check that we are running from vhost_worker? */
248 sock = rcu_dereference_check(vq->private_data, 1); 151 sock = rcu_dereference_check(vq->private_data, 1);
@@ -263,12 +166,12 @@ static void handle_tx(struct vhost_net *net)
263 if (wmem < sock->sk->sk_sndbuf / 2) 166 if (wmem < sock->sk->sk_sndbuf / 2)
264 tx_poll_stop(net); 167 tx_poll_stop(net);
265 hdr_size = vq->vhost_hlen; 168 hdr_size = vq->vhost_hlen;
266 zcopy = vq->ubufs; 169 zcopy = vhost_sock_zcopy(sock);
267 170
268 for (;;) { 171 for (;;) {
269 /* Release DMAs done buffers first */ 172 /* Release DMAs done buffers first */
270 if (zcopy) 173 if (zcopy)
271 vhost_zerocopy_signal_used(net, vq); 174 vhost_zerocopy_signal_used(vq);
272 175
273 head = vhost_get_vq_desc(&net->dev, vq, vq->iov, 176 head = vhost_get_vq_desc(&net->dev, vq, vq->iov,
274 ARRAY_SIZE(vq->iov), 177 ARRAY_SIZE(vq->iov),
@@ -320,14 +223,10 @@ static void handle_tx(struct vhost_net *net)
320 iov_length(vq->hdr, s), hdr_size); 223 iov_length(vq->hdr, s), hdr_size);
321 break; 224 break;
322 } 225 }
323 zcopy_used = zcopy && (len >= VHOST_GOODCOPY_LEN ||
324 vq->upend_idx != vq->done_idx);
325
326 /* use msg_control to pass vhost zerocopy ubuf info to skb */ 226 /* use msg_control to pass vhost zerocopy ubuf info to skb */
327 if (zcopy_used) { 227 if (zcopy) {
328 vq->heads[vq->upend_idx].id = head; 228 vq->heads[vq->upend_idx].id = head;
329 if (!vhost_net_tx_select_zcopy(net) || 229 if (len < VHOST_GOODCOPY_LEN) {
330 len < VHOST_GOODCOPY_LEN) {
331 /* copy don't need to wait for DMA done */ 230 /* copy don't need to wait for DMA done */
332 vq->heads[vq->upend_idx].len = 231 vq->heads[vq->upend_idx].len =
333 VHOST_DMA_DONE_LEN; 232 VHOST_DMA_DONE_LEN;
@@ -337,10 +236,9 @@ static void handle_tx(struct vhost_net *net)
337 } else { 236 } else {
338 struct ubuf_info *ubuf = &vq->ubuf_info[head]; 237 struct ubuf_info *ubuf = &vq->ubuf_info[head];
339 238
340 vq->heads[vq->upend_idx].len = 239 vq->heads[vq->upend_idx].len = len;
341 VHOST_DMA_IN_PROGRESS;
342 ubuf->callback = vhost_zerocopy_callback; 240 ubuf->callback = vhost_zerocopy_callback;
343 ubuf->ctx = vq->ubufs; 241 ubuf->arg = vq->ubufs;
344 ubuf->desc = vq->upend_idx; 242 ubuf->desc = vq->upend_idx;
345 msg.msg_control = ubuf; 243 msg.msg_control = ubuf;
346 msg.msg_controllen = sizeof(ubuf); 244 msg.msg_controllen = sizeof(ubuf);
@@ -352,26 +250,22 @@ static void handle_tx(struct vhost_net *net)
352 /* TODO: Check specific error and bomb out unless ENOBUFS? */ 250 /* TODO: Check specific error and bomb out unless ENOBUFS? */
353 err = sock->ops->sendmsg(NULL, sock, &msg, len); 251 err = sock->ops->sendmsg(NULL, sock, &msg, len);
354 if (unlikely(err < 0)) { 252 if (unlikely(err < 0)) {
355 if (zcopy_used) { 253 if (zcopy) {
356 if (ubufs) 254 if (ubufs)
357 vhost_ubuf_put(ubufs); 255 vhost_ubuf_put(ubufs);
358 vq->upend_idx = ((unsigned)vq->upend_idx - 1) % 256 vq->upend_idx = ((unsigned)vq->upend_idx - 1) %
359 UIO_MAXIOV; 257 UIO_MAXIOV;
360 } 258 }
361 vhost_discard_vq_desc(vq, 1); 259 vhost_discard_vq_desc(vq, 1);
362 if (err == -EAGAIN || err == -ENOBUFS) 260 tx_poll_start(net, sock);
363 tx_poll_start(net, sock);
364 break; 261 break;
365 } 262 }
366 if (err != len) 263 if (err != len)
367 pr_debug("Truncated TX packet: " 264 pr_debug("Truncated TX packet: "
368 " len %d != %zd\n", err, len); 265 " len %d != %zd\n", err, len);
369 if (!zcopy_used) 266 if (!zcopy)
370 vhost_add_used_and_signal(&net->dev, vq, head, 0); 267 vhost_add_used_and_signal(&net->dev, vq, head, 0);
371 else
372 vhost_zerocopy_signal_used(net, vq);
373 total_len += len; 268 total_len += len;
374 vhost_net_tx_packet(net);
375 if (unlikely(total_len >= VHOST_NET_WEIGHT)) { 269 if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
376 vhost_poll_queue(&vq->poll); 270 vhost_poll_queue(&vq->poll);
377 break; 271 break;
@@ -389,12 +283,8 @@ static int peek_head_len(struct sock *sk)
389 283
390 spin_lock_irqsave(&sk->sk_receive_queue.lock, flags); 284 spin_lock_irqsave(&sk->sk_receive_queue.lock, flags);
391 head = skb_peek(&sk->sk_receive_queue); 285 head = skb_peek(&sk->sk_receive_queue);
392 if (likely(head)) { 286 if (likely(head))
393 len = head->len; 287 len = head->len;
394 if (vlan_tx_tag_present(head))
395 len += VLAN_HLEN;
396 }
397
398 spin_unlock_irqrestore(&sk->sk_receive_queue.lock, flags); 288 spin_unlock_irqrestore(&sk->sk_receive_queue.lock, flags);
399 return len; 289 return len;
400} 290}
@@ -481,8 +371,7 @@ static void handle_rx(struct vhost_net *net)
481 .hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE 371 .hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE
482 }; 372 };
483 size_t total_len = 0; 373 size_t total_len = 0;
484 int err, mergeable; 374 int err, headcount, mergeable;
485 s16 headcount;
486 size_t vhost_hlen, sock_hlen; 375 size_t vhost_hlen, sock_hlen;
487 size_t vhost_len, sock_len; 376 size_t vhost_len, sock_len;
488 /* TODO: check that we are running from vhost_worker? */ 377 /* TODO: check that we are running from vhost_worker? */
@@ -689,17 +578,6 @@ static void vhost_net_flush(struct vhost_net *n)
689{ 578{
690 vhost_net_flush_vq(n, VHOST_NET_VQ_TX); 579 vhost_net_flush_vq(n, VHOST_NET_VQ_TX);
691 vhost_net_flush_vq(n, VHOST_NET_VQ_RX); 580 vhost_net_flush_vq(n, VHOST_NET_VQ_RX);
692 if (n->dev.vqs[VHOST_NET_VQ_TX].ubufs) {
693 mutex_lock(&n->dev.vqs[VHOST_NET_VQ_TX].mutex);
694 n->tx_flush = true;
695 mutex_unlock(&n->dev.vqs[VHOST_NET_VQ_TX].mutex);
696 /* Wait for all lower device DMAs done. */
697 vhost_ubuf_put_and_wait(n->dev.vqs[VHOST_NET_VQ_TX].ubufs);
698 mutex_lock(&n->dev.vqs[VHOST_NET_VQ_TX].mutex);
699 n->tx_flush = false;
700 kref_init(&n->dev.vqs[VHOST_NET_VQ_TX].ubufs->kref);
701 mutex_unlock(&n->dev.vqs[VHOST_NET_VQ_TX].mutex);
702 }
703} 581}
704 582
705static int vhost_net_release(struct inode *inode, struct file *f) 583static int vhost_net_release(struct inode *inode, struct file *f)
@@ -710,8 +588,7 @@ static int vhost_net_release(struct inode *inode, struct file *f)
710 588
711 vhost_net_stop(n, &tx_sock, &rx_sock); 589 vhost_net_stop(n, &tx_sock, &rx_sock);
712 vhost_net_flush(n); 590 vhost_net_flush(n);
713 vhost_dev_stop(&n->dev); 591 vhost_dev_cleanup(&n->dev);
714 vhost_dev_cleanup(&n->dev, false);
715 if (tx_sock) 592 if (tx_sock)
716 fput(tx_sock->file); 593 fput(tx_sock->file);
717 if (rx_sock) 594 if (rx_sock)
@@ -836,10 +713,6 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
836 r = vhost_init_used(vq); 713 r = vhost_init_used(vq);
837 if (r) 714 if (r)
838 goto err_vq; 715 goto err_vq;
839
840 n->tx_packets = 0;
841 n->tx_zcopy_err = 0;
842 n->tx_flush = false;
843 } 716 }
844 717
845 mutex_unlock(&vq->mutex); 718 mutex_unlock(&vq->mutex);
@@ -847,7 +720,7 @@ static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
847 if (oldubufs) { 720 if (oldubufs) {
848 vhost_ubuf_put_and_wait(oldubufs); 721 vhost_ubuf_put_and_wait(oldubufs);
849 mutex_lock(&vq->mutex); 722 mutex_lock(&vq->mutex);
850 vhost_zerocopy_signal_used(n, vq); 723 vhost_zerocopy_signal_used(vq);
851 mutex_unlock(&vq->mutex); 724 mutex_unlock(&vq->mutex);
852 } 725 }
853 726
@@ -942,25 +815,22 @@ static long vhost_net_ioctl(struct file *f, unsigned int ioctl,
942 return -EFAULT; 815 return -EFAULT;
943 return vhost_net_set_backend(n, backend.index, backend.fd); 816 return vhost_net_set_backend(n, backend.index, backend.fd);
944 case VHOST_GET_FEATURES: 817 case VHOST_GET_FEATURES:
945 features = VHOST_NET_FEATURES; 818 features = VHOST_FEATURES;
946 if (copy_to_user(featurep, &features, sizeof features)) 819 if (copy_to_user(featurep, &features, sizeof features))
947 return -EFAULT; 820 return -EFAULT;
948 return 0; 821 return 0;
949 case VHOST_SET_FEATURES: 822 case VHOST_SET_FEATURES:
950 if (copy_from_user(&features, featurep, sizeof features)) 823 if (copy_from_user(&features, featurep, sizeof features))
951 return -EFAULT; 824 return -EFAULT;
952 if (features & ~VHOST_NET_FEATURES) 825 if (features & ~VHOST_FEATURES)
953 return -EOPNOTSUPP; 826 return -EOPNOTSUPP;
954 return vhost_net_set_features(n, features); 827 return vhost_net_set_features(n, features);
955 case VHOST_RESET_OWNER: 828 case VHOST_RESET_OWNER:
956 return vhost_net_reset_owner(n); 829 return vhost_net_reset_owner(n);
957 default: 830 default:
958 mutex_lock(&n->dev.mutex); 831 mutex_lock(&n->dev.mutex);
959 r = vhost_dev_ioctl(&n->dev, ioctl, argp); 832 r = vhost_dev_ioctl(&n->dev, ioctl, arg);
960 if (r == -ENOIOCTLCMD) 833 vhost_net_flush(n);
961 r = vhost_vring_ioctl(&n->dev, ioctl, argp);
962 else
963 vhost_net_flush(n);
964 mutex_unlock(&n->dev.mutex); 834 mutex_unlock(&n->dev.mutex);
965 return r; 835 return r;
966 } 836 }
@@ -986,9 +856,9 @@ static const struct file_operations vhost_net_fops = {
986}; 856};
987 857
988static struct miscdevice vhost_net_misc = { 858static struct miscdevice vhost_net_misc = {
989 .minor = VHOST_NET_MINOR, 859 MISC_DYNAMIC_MINOR,
990 .name = "vhost-net", 860 "vhost-net",
991 .fops = &vhost_net_fops, 861 &vhost_net_fops,
992}; 862};
993 863
994static int vhost_net_init(void) 864static int vhost_net_init(void)
@@ -1009,5 +879,3 @@ MODULE_VERSION("0.0.1");
1009MODULE_LICENSE("GPL v2"); 879MODULE_LICENSE("GPL v2");
1010MODULE_AUTHOR("Michael S. Tsirkin"); 880MODULE_AUTHOR("Michael S. Tsirkin");
1011MODULE_DESCRIPTION("Host kernel accelerator for virtio net"); 881MODULE_DESCRIPTION("Host kernel accelerator for virtio net");
1012MODULE_ALIAS_MISCDEV(VHOST_NET_MINOR);
1013MODULE_ALIAS("devname:vhost-net");
diff --git a/drivers/vhost/tcm_vhost.c b/drivers/vhost/tcm_vhost.c
deleted file mode 100644
index b20df5c829f..00000000000
--- a/drivers/vhost/tcm_vhost.c
+++ /dev/null
@@ -1,1593 +0,0 @@
1/*******************************************************************************
2 * Vhost kernel TCM fabric driver for virtio SCSI initiators
3 *
4 * (C) Copyright 2010-2012 RisingTide Systems LLC.
5 * (C) Copyright 2010-2012 IBM Corp.
6 *
7 * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
8 *
9 * Authors: Nicholas A. Bellinger <nab@risingtidesystems.com>
10 * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
11 *
12 * This program is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU General Public License as published by
14 * the Free Software Foundation; either version 2 of the License, or
15 * (at your option) any later version.
16 *
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 *
22 ****************************************************************************/
23
24#include <linux/module.h>
25#include <linux/moduleparam.h>
26#include <generated/utsrelease.h>
27#include <linux/utsname.h>
28#include <linux/init.h>
29#include <linux/slab.h>
30#include <linux/kthread.h>
31#include <linux/types.h>
32#include <linux/string.h>
33#include <linux/configfs.h>
34#include <linux/ctype.h>
35#include <linux/compat.h>
36#include <linux/eventfd.h>
37#include <linux/fs.h>
38#include <linux/miscdevice.h>
39#include <asm/unaligned.h>
40#include <scsi/scsi.h>
41#include <scsi/scsi_tcq.h>
42#include <target/target_core_base.h>
43#include <target/target_core_fabric.h>
44#include <target/target_core_fabric_configfs.h>
45#include <target/target_core_configfs.h>
46#include <target/configfs_macros.h>
47#include <linux/vhost.h>
48#include <linux/virtio_net.h> /* TODO vhost.h currently depends on this */
49#include <linux/virtio_scsi.h>
50
51#include "vhost.c"
52#include "vhost.h"
53#include "tcm_vhost.h"
54
55enum {
56 VHOST_SCSI_VQ_CTL = 0,
57 VHOST_SCSI_VQ_EVT = 1,
58 VHOST_SCSI_VQ_IO = 2,
59};
60
61struct vhost_scsi {
62 struct tcm_vhost_tpg *vs_tpg; /* Protected by vhost_scsi->dev.mutex */
63 struct vhost_dev dev;
64 struct vhost_virtqueue vqs[3];
65
66 struct vhost_work vs_completion_work; /* cmd completion work item */
67 struct list_head vs_completion_list; /* cmd completion queue */
68 spinlock_t vs_completion_lock; /* protects s_completion_list */
69};
70
71/* Local pointer to allocated TCM configfs fabric module */
72static struct target_fabric_configfs *tcm_vhost_fabric_configfs;
73
74static struct workqueue_struct *tcm_vhost_workqueue;
75
76/* Global spinlock to protect tcm_vhost TPG list for vhost IOCTL access */
77static DEFINE_MUTEX(tcm_vhost_mutex);
78static LIST_HEAD(tcm_vhost_list);
79
80static int tcm_vhost_check_true(struct se_portal_group *se_tpg)
81{
82 return 1;
83}
84
85static int tcm_vhost_check_false(struct se_portal_group *se_tpg)
86{
87 return 0;
88}
89
90static char *tcm_vhost_get_fabric_name(void)
91{
92 return "vhost";
93}
94
95static u8 tcm_vhost_get_fabric_proto_ident(struct se_portal_group *se_tpg)
96{
97 struct tcm_vhost_tpg *tpg = container_of(se_tpg,
98 struct tcm_vhost_tpg, se_tpg);
99 struct tcm_vhost_tport *tport = tpg->tport;
100
101 switch (tport->tport_proto_id) {
102 case SCSI_PROTOCOL_SAS:
103 return sas_get_fabric_proto_ident(se_tpg);
104 case SCSI_PROTOCOL_FCP:
105 return fc_get_fabric_proto_ident(se_tpg);
106 case SCSI_PROTOCOL_ISCSI:
107 return iscsi_get_fabric_proto_ident(se_tpg);
108 default:
109 pr_err("Unknown tport_proto_id: 0x%02x, using"
110 " SAS emulation\n", tport->tport_proto_id);
111 break;
112 }
113
114 return sas_get_fabric_proto_ident(se_tpg);
115}
116
117static char *tcm_vhost_get_fabric_wwn(struct se_portal_group *se_tpg)
118{
119 struct tcm_vhost_tpg *tpg = container_of(se_tpg,
120 struct tcm_vhost_tpg, se_tpg);
121 struct tcm_vhost_tport *tport = tpg->tport;
122
123 return &tport->tport_name[0];
124}
125
126static u16 tcm_vhost_get_tag(struct se_portal_group *se_tpg)
127{
128 struct tcm_vhost_tpg *tpg = container_of(se_tpg,
129 struct tcm_vhost_tpg, se_tpg);
130 return tpg->tport_tpgt;
131}
132
133static u32 tcm_vhost_get_default_depth(struct se_portal_group *se_tpg)
134{
135 return 1;
136}
137
138static u32 tcm_vhost_get_pr_transport_id(struct se_portal_group *se_tpg,
139 struct se_node_acl *se_nacl,
140 struct t10_pr_registration *pr_reg,
141 int *format_code,
142 unsigned char *buf)
143{
144 struct tcm_vhost_tpg *tpg = container_of(se_tpg,
145 struct tcm_vhost_tpg, se_tpg);
146 struct tcm_vhost_tport *tport = tpg->tport;
147
148 switch (tport->tport_proto_id) {
149 case SCSI_PROTOCOL_SAS:
150 return sas_get_pr_transport_id(se_tpg, se_nacl, pr_reg,
151 format_code, buf);
152 case SCSI_PROTOCOL_FCP:
153 return fc_get_pr_transport_id(se_tpg, se_nacl, pr_reg,
154 format_code, buf);
155 case SCSI_PROTOCOL_ISCSI:
156 return iscsi_get_pr_transport_id(se_tpg, se_nacl, pr_reg,
157 format_code, buf);
158 default:
159 pr_err("Unknown tport_proto_id: 0x%02x, using"
160 " SAS emulation\n", tport->tport_proto_id);
161 break;
162 }
163
164 return sas_get_pr_transport_id(se_tpg, se_nacl, pr_reg,
165 format_code, buf);
166}
167
168static u32 tcm_vhost_get_pr_transport_id_len(struct se_portal_group *se_tpg,
169 struct se_node_acl *se_nacl,
170 struct t10_pr_registration *pr_reg,
171 int *format_code)
172{
173 struct tcm_vhost_tpg *tpg = container_of(se_tpg,
174 struct tcm_vhost_tpg, se_tpg);
175 struct tcm_vhost_tport *tport = tpg->tport;
176
177 switch (tport->tport_proto_id) {
178 case SCSI_PROTOCOL_SAS:
179 return sas_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg,
180 format_code);
181 case SCSI_PROTOCOL_FCP:
182 return fc_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg,
183 format_code);
184 case SCSI_PROTOCOL_ISCSI:
185 return iscsi_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg,
186 format_code);
187 default:
188 pr_err("Unknown tport_proto_id: 0x%02x, using"
189 " SAS emulation\n", tport->tport_proto_id);
190 break;
191 }
192
193 return sas_get_pr_transport_id_len(se_tpg, se_nacl, pr_reg,
194 format_code);
195}
196
197static char *tcm_vhost_parse_pr_out_transport_id(struct se_portal_group *se_tpg,
198 const char *buf,
199 u32 *out_tid_len,
200 char **port_nexus_ptr)
201{
202 struct tcm_vhost_tpg *tpg = container_of(se_tpg,
203 struct tcm_vhost_tpg, se_tpg);
204 struct tcm_vhost_tport *tport = tpg->tport;
205
206 switch (tport->tport_proto_id) {
207 case SCSI_PROTOCOL_SAS:
208 return sas_parse_pr_out_transport_id(se_tpg, buf, out_tid_len,
209 port_nexus_ptr);
210 case SCSI_PROTOCOL_FCP:
211 return fc_parse_pr_out_transport_id(se_tpg, buf, out_tid_len,
212 port_nexus_ptr);
213 case SCSI_PROTOCOL_ISCSI:
214 return iscsi_parse_pr_out_transport_id(se_tpg, buf, out_tid_len,
215 port_nexus_ptr);
216 default:
217 pr_err("Unknown tport_proto_id: 0x%02x, using"
218 " SAS emulation\n", tport->tport_proto_id);
219 break;
220 }
221
222 return sas_parse_pr_out_transport_id(se_tpg, buf, out_tid_len,
223 port_nexus_ptr);
224}
225
226static struct se_node_acl *tcm_vhost_alloc_fabric_acl(
227 struct se_portal_group *se_tpg)
228{
229 struct tcm_vhost_nacl *nacl;
230
231 nacl = kzalloc(sizeof(struct tcm_vhost_nacl), GFP_KERNEL);
232 if (!nacl) {
233 pr_err("Unable to allocate struct tcm_vhost_nacl\n");
234 return NULL;
235 }
236
237 return &nacl->se_node_acl;
238}
239
240static void tcm_vhost_release_fabric_acl(struct se_portal_group *se_tpg,
241 struct se_node_acl *se_nacl)
242{
243 struct tcm_vhost_nacl *nacl = container_of(se_nacl,
244 struct tcm_vhost_nacl, se_node_acl);
245 kfree(nacl);
246}
247
248static u32 tcm_vhost_tpg_get_inst_index(struct se_portal_group *se_tpg)
249{
250 return 1;
251}
252
253static void tcm_vhost_release_cmd(struct se_cmd *se_cmd)
254{
255 return;
256}
257
258static int tcm_vhost_shutdown_session(struct se_session *se_sess)
259{
260 return 0;
261}
262
263static void tcm_vhost_close_session(struct se_session *se_sess)
264{
265 return;
266}
267
268static u32 tcm_vhost_sess_get_index(struct se_session *se_sess)
269{
270 return 0;
271}
272
273static int tcm_vhost_write_pending(struct se_cmd *se_cmd)
274{
275 /* Go ahead and process the write immediately */
276 target_execute_cmd(se_cmd);
277 return 0;
278}
279
280static int tcm_vhost_write_pending_status(struct se_cmd *se_cmd)
281{
282 return 0;
283}
284
285static void tcm_vhost_set_default_node_attrs(struct se_node_acl *nacl)
286{
287 return;
288}
289
290static u32 tcm_vhost_get_task_tag(struct se_cmd *se_cmd)
291{
292 return 0;
293}
294
295static int tcm_vhost_get_cmd_state(struct se_cmd *se_cmd)
296{
297 return 0;
298}
299
300static void vhost_scsi_complete_cmd(struct tcm_vhost_cmd *tv_cmd)
301{
302 struct vhost_scsi *vs = tv_cmd->tvc_vhost;
303
304 spin_lock_bh(&vs->vs_completion_lock);
305 list_add_tail(&tv_cmd->tvc_completion_list, &vs->vs_completion_list);
306 spin_unlock_bh(&vs->vs_completion_lock);
307
308 vhost_work_queue(&vs->dev, &vs->vs_completion_work);
309}
310
311static int tcm_vhost_queue_data_in(struct se_cmd *se_cmd)
312{
313 struct tcm_vhost_cmd *tv_cmd = container_of(se_cmd,
314 struct tcm_vhost_cmd, tvc_se_cmd);
315 vhost_scsi_complete_cmd(tv_cmd);
316 return 0;
317}
318
319static int tcm_vhost_queue_status(struct se_cmd *se_cmd)
320{
321 struct tcm_vhost_cmd *tv_cmd = container_of(se_cmd,
322 struct tcm_vhost_cmd, tvc_se_cmd);
323 vhost_scsi_complete_cmd(tv_cmd);
324 return 0;
325}
326
327static int tcm_vhost_queue_tm_rsp(struct se_cmd *se_cmd)
328{
329 return 0;
330}
331
332static void vhost_scsi_free_cmd(struct tcm_vhost_cmd *tv_cmd)
333{
334 struct se_cmd *se_cmd = &tv_cmd->tvc_se_cmd;
335
336 /* TODO locking against target/backend threads? */
337 transport_generic_free_cmd(se_cmd, 1);
338
339 if (tv_cmd->tvc_sgl_count) {
340 u32 i;
341 for (i = 0; i < tv_cmd->tvc_sgl_count; i++)
342 put_page(sg_page(&tv_cmd->tvc_sgl[i]));
343
344 kfree(tv_cmd->tvc_sgl);
345 }
346
347 kfree(tv_cmd);
348}
349
350/* Dequeue a command from the completion list */
351static struct tcm_vhost_cmd *vhost_scsi_get_cmd_from_completion(
352 struct vhost_scsi *vs)
353{
354 struct tcm_vhost_cmd *tv_cmd = NULL;
355
356 spin_lock_bh(&vs->vs_completion_lock);
357 if (list_empty(&vs->vs_completion_list)) {
358 spin_unlock_bh(&vs->vs_completion_lock);
359 return NULL;
360 }
361
362 list_for_each_entry(tv_cmd, &vs->vs_completion_list,
363 tvc_completion_list) {
364 list_del(&tv_cmd->tvc_completion_list);
365 break;
366 }
367 spin_unlock_bh(&vs->vs_completion_lock);
368 return tv_cmd;
369}
370
371/* Fill in status and signal that we are done processing this command
372 *
373 * This is scheduled in the vhost work queue so we are called with the owner
374 * process mm and can access the vring.
375 */
376static void vhost_scsi_complete_cmd_work(struct vhost_work *work)
377{
378 struct vhost_scsi *vs = container_of(work, struct vhost_scsi,
379 vs_completion_work);
380 struct tcm_vhost_cmd *tv_cmd;
381
382 while ((tv_cmd = vhost_scsi_get_cmd_from_completion(vs))) {
383 struct virtio_scsi_cmd_resp v_rsp;
384 struct se_cmd *se_cmd = &tv_cmd->tvc_se_cmd;
385 int ret;
386
387 pr_debug("%s tv_cmd %p resid %u status %#02x\n", __func__,
388 tv_cmd, se_cmd->residual_count, se_cmd->scsi_status);
389
390 memset(&v_rsp, 0, sizeof(v_rsp));
391 v_rsp.resid = se_cmd->residual_count;
392 /* TODO is status_qualifier field needed? */
393 v_rsp.status = se_cmd->scsi_status;
394 v_rsp.sense_len = se_cmd->scsi_sense_length;
395 memcpy(v_rsp.sense, tv_cmd->tvc_sense_buf,
396 v_rsp.sense_len);
397 ret = copy_to_user(tv_cmd->tvc_resp, &v_rsp, sizeof(v_rsp));
398 if (likely(ret == 0))
399 vhost_add_used(&vs->vqs[2], tv_cmd->tvc_vq_desc, 0);
400 else
401 pr_err("Faulted on virtio_scsi_cmd_resp\n");
402
403 vhost_scsi_free_cmd(tv_cmd);
404 }
405
406 vhost_signal(&vs->dev, &vs->vqs[2]);
407}
408
409static struct tcm_vhost_cmd *vhost_scsi_allocate_cmd(
410 struct tcm_vhost_tpg *tv_tpg,
411 struct virtio_scsi_cmd_req *v_req,
412 u32 exp_data_len,
413 int data_direction)
414{
415 struct tcm_vhost_cmd *tv_cmd;
416 struct tcm_vhost_nexus *tv_nexus;
417
418 tv_nexus = tv_tpg->tpg_nexus;
419 if (!tv_nexus) {
420 pr_err("Unable to locate active struct tcm_vhost_nexus\n");
421 return ERR_PTR(-EIO);
422 }
423
424 tv_cmd = kzalloc(sizeof(struct tcm_vhost_cmd), GFP_ATOMIC);
425 if (!tv_cmd) {
426 pr_err("Unable to allocate struct tcm_vhost_cmd\n");
427 return ERR_PTR(-ENOMEM);
428 }
429 INIT_LIST_HEAD(&tv_cmd->tvc_completion_list);
430 tv_cmd->tvc_tag = v_req->tag;
431 tv_cmd->tvc_task_attr = v_req->task_attr;
432 tv_cmd->tvc_exp_data_len = exp_data_len;
433 tv_cmd->tvc_data_direction = data_direction;
434 tv_cmd->tvc_nexus = tv_nexus;
435
436 return tv_cmd;
437}
438
439/*
440 * Map a user memory range into a scatterlist
441 *
442 * Returns the number of scatterlist entries used or -errno on error.
443 */
444static int vhost_scsi_map_to_sgl(struct scatterlist *sgl,
445 unsigned int sgl_count, void __user *ptr, size_t len, int write)
446{
447 struct scatterlist *sg = sgl;
448 unsigned int npages = 0;
449 int ret;
450
451 while (len > 0) {
452 struct page *page;
453 unsigned int offset = (uintptr_t)ptr & ~PAGE_MASK;
454 unsigned int nbytes = min_t(unsigned int,
455 PAGE_SIZE - offset, len);
456
457 if (npages == sgl_count) {
458 ret = -ENOBUFS;
459 goto err;
460 }
461
462 ret = get_user_pages_fast((unsigned long)ptr, 1, write, &page);
463 BUG_ON(ret == 0); /* we should either get our page or fail */
464 if (ret < 0)
465 goto err;
466
467 sg_set_page(sg, page, nbytes, offset);
468 ptr += nbytes;
469 len -= nbytes;
470 sg++;
471 npages++;
472 }
473 return npages;
474
475err:
476 /* Put pages that we hold */
477 for (sg = sgl; sg != &sgl[npages]; sg++)
478 put_page(sg_page(sg));
479 return ret;
480}
481
482static int vhost_scsi_map_iov_to_sgl(struct tcm_vhost_cmd *tv_cmd,
483 struct iovec *iov, unsigned int niov, int write)
484{
485 int ret;
486 unsigned int i;
487 u32 sgl_count;
488 struct scatterlist *sg;
489
490 /*
491 * Find out how long sglist needs to be
492 */
493 sgl_count = 0;
494 for (i = 0; i < niov; i++) {
495 sgl_count += (((uintptr_t)iov[i].iov_base + iov[i].iov_len +
496 PAGE_SIZE - 1) >> PAGE_SHIFT) -
497 ((uintptr_t)iov[i].iov_base >> PAGE_SHIFT);
498 }
499 /* TODO overflow checking */
500
501 sg = kmalloc(sizeof(tv_cmd->tvc_sgl[0]) * sgl_count, GFP_ATOMIC);
502 if (!sg)
503 return -ENOMEM;
504 pr_debug("%s sg %p sgl_count %u is_err %d\n", __func__,
505 sg, sgl_count, !sg);
506 sg_init_table(sg, sgl_count);
507
508 tv_cmd->tvc_sgl = sg;
509 tv_cmd->tvc_sgl_count = sgl_count;
510
511 pr_debug("Mapping %u iovecs for %u pages\n", niov, sgl_count);
512 for (i = 0; i < niov; i++) {
513 ret = vhost_scsi_map_to_sgl(sg, sgl_count, iov[i].iov_base,
514 iov[i].iov_len, write);
515 if (ret < 0) {
516 for (i = 0; i < tv_cmd->tvc_sgl_count; i++)
517 put_page(sg_page(&tv_cmd->tvc_sgl[i]));
518 kfree(tv_cmd->tvc_sgl);
519 tv_cmd->tvc_sgl = NULL;
520 tv_cmd->tvc_sgl_count = 0;
521 return ret;
522 }
523
524 sg += ret;
525 sgl_count -= ret;
526 }
527 return 0;
528}
529
530static void tcm_vhost_submission_work(struct work_struct *work)
531{
532 struct tcm_vhost_cmd *tv_cmd =
533 container_of(work, struct tcm_vhost_cmd, work);
534 struct tcm_vhost_nexus *tv_nexus;
535 struct se_cmd *se_cmd = &tv_cmd->tvc_se_cmd;
536 struct scatterlist *sg_ptr, *sg_bidi_ptr = NULL;
537 int rc, sg_no_bidi = 0;
538
539 if (tv_cmd->tvc_sgl_count) {
540 sg_ptr = tv_cmd->tvc_sgl;
541/* FIXME: Fix BIDI operation in tcm_vhost_submission_work() */
542#if 0
543 if (se_cmd->se_cmd_flags & SCF_BIDI) {
544 sg_bidi_ptr = NULL;
545 sg_no_bidi = 0;
546 }
547#endif
548 } else {
549 sg_ptr = NULL;
550 }
551 tv_nexus = tv_cmd->tvc_nexus;
552
553 rc = target_submit_cmd_map_sgls(se_cmd, tv_nexus->tvn_se_sess,
554 tv_cmd->tvc_cdb, &tv_cmd->tvc_sense_buf[0],
555 tv_cmd->tvc_lun, tv_cmd->tvc_exp_data_len,
556 tv_cmd->tvc_task_attr, tv_cmd->tvc_data_direction,
557 0, sg_ptr, tv_cmd->tvc_sgl_count,
558 sg_bidi_ptr, sg_no_bidi);
559 if (rc < 0) {
560 transport_send_check_condition_and_sense(se_cmd,
561 TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE, 0);
562 transport_generic_free_cmd(se_cmd, 0);
563 }
564}
565
566static void vhost_scsi_handle_vq(struct vhost_scsi *vs)
567{
568 struct vhost_virtqueue *vq = &vs->vqs[2];
569 struct virtio_scsi_cmd_req v_req;
570 struct tcm_vhost_tpg *tv_tpg;
571 struct tcm_vhost_cmd *tv_cmd;
572 u32 exp_data_len, data_first, data_num, data_direction;
573 unsigned out, in, i;
574 int head, ret;
575
576 /* Must use ioctl VHOST_SCSI_SET_ENDPOINT */
577 tv_tpg = vs->vs_tpg;
578 if (unlikely(!tv_tpg)) {
579 pr_err("%s endpoint not set\n", __func__);
580 return;
581 }
582
583 mutex_lock(&vq->mutex);
584 vhost_disable_notify(&vs->dev, vq);
585
586 for (;;) {
587 head = vhost_get_vq_desc(&vs->dev, vq, vq->iov,
588 ARRAY_SIZE(vq->iov), &out, &in,
589 NULL, NULL);
590 pr_debug("vhost_get_vq_desc: head: %d, out: %u in: %u\n",
591 head, out, in);
592 /* On error, stop handling until the next kick. */
593 if (unlikely(head < 0))
594 break;
595 /* Nothing new? Wait for eventfd to tell us they refilled. */
596 if (head == vq->num) {
597 if (unlikely(vhost_enable_notify(&vs->dev, vq))) {
598 vhost_disable_notify(&vs->dev, vq);
599 continue;
600 }
601 break;
602 }
603
604/* FIXME: BIDI operation */
605 if (out == 1 && in == 1) {
606 data_direction = DMA_NONE;
607 data_first = 0;
608 data_num = 0;
609 } else if (out == 1 && in > 1) {
610 data_direction = DMA_FROM_DEVICE;
611 data_first = out + 1;
612 data_num = in - 1;
613 } else if (out > 1 && in == 1) {
614 data_direction = DMA_TO_DEVICE;
615 data_first = 1;
616 data_num = out - 1;
617 } else {
618 vq_err(vq, "Invalid buffer layout out: %u in: %u\n",
619 out, in);
620 break;
621 }
622
623 /*
624 * Check for a sane resp buffer so we can report errors to
625 * the guest.
626 */
627 if (unlikely(vq->iov[out].iov_len !=
628 sizeof(struct virtio_scsi_cmd_resp))) {
629 vq_err(vq, "Expecting virtio_scsi_cmd_resp, got %zu"
630 " bytes\n", vq->iov[out].iov_len);
631 break;
632 }
633
634 if (unlikely(vq->iov[0].iov_len != sizeof(v_req))) {
635 vq_err(vq, "Expecting virtio_scsi_cmd_req, got %zu"
636 " bytes\n", vq->iov[0].iov_len);
637 break;
638 }
639 pr_debug("Calling __copy_from_user: vq->iov[0].iov_base: %p,"
640 " len: %zu\n", vq->iov[0].iov_base, sizeof(v_req));
641 ret = __copy_from_user(&v_req, vq->iov[0].iov_base,
642 sizeof(v_req));
643 if (unlikely(ret)) {
644 vq_err(vq, "Faulted on virtio_scsi_cmd_req\n");
645 break;
646 }
647
648 exp_data_len = 0;
649 for (i = 0; i < data_num; i++)
650 exp_data_len += vq->iov[data_first + i].iov_len;
651
652 tv_cmd = vhost_scsi_allocate_cmd(tv_tpg, &v_req,
653 exp_data_len, data_direction);
654 if (IS_ERR(tv_cmd)) {
655 vq_err(vq, "vhost_scsi_allocate_cmd failed %ld\n",
656 PTR_ERR(tv_cmd));
657 break;
658 }
659 pr_debug("Allocated tv_cmd: %p exp_data_len: %d, data_direction"
660 ": %d\n", tv_cmd, exp_data_len, data_direction);
661
662 tv_cmd->tvc_vhost = vs;
663
664 if (unlikely(vq->iov[out].iov_len !=
665 sizeof(struct virtio_scsi_cmd_resp))) {
666 vq_err(vq, "Expecting virtio_scsi_cmd_resp, got %zu"
667 " bytes, out: %d, in: %d\n",
668 vq->iov[out].iov_len, out, in);
669 break;
670 }
671
672 tv_cmd->tvc_resp = vq->iov[out].iov_base;
673
674 /*
675 * Copy in the recieved CDB descriptor into tv_cmd->tvc_cdb
676 * that will be used by tcm_vhost_new_cmd_map() and down into
677 * target_setup_cmd_from_cdb()
678 */
679 memcpy(tv_cmd->tvc_cdb, v_req.cdb, TCM_VHOST_MAX_CDB_SIZE);
680 /*
681 * Check that the recieved CDB size does not exceeded our
682 * hardcoded max for tcm_vhost
683 */
684 /* TODO what if cdb was too small for varlen cdb header? */
685 if (unlikely(scsi_command_size(tv_cmd->tvc_cdb) >
686 TCM_VHOST_MAX_CDB_SIZE)) {
687 vq_err(vq, "Received SCSI CDB with command_size: %d that"
688 " exceeds SCSI_MAX_VARLEN_CDB_SIZE: %d\n",
689 scsi_command_size(tv_cmd->tvc_cdb),
690 TCM_VHOST_MAX_CDB_SIZE);
691 break; /* TODO */
692 }
693 tv_cmd->tvc_lun = ((v_req.lun[2] << 8) | v_req.lun[3]) & 0x3FFF;
694
695 pr_debug("vhost_scsi got command opcode: %#02x, lun: %d\n",
696 tv_cmd->tvc_cdb[0], tv_cmd->tvc_lun);
697
698 if (data_direction != DMA_NONE) {
699 ret = vhost_scsi_map_iov_to_sgl(tv_cmd,
700 &vq->iov[data_first], data_num,
701 data_direction == DMA_TO_DEVICE);
702 if (unlikely(ret)) {
703 vq_err(vq, "Failed to map iov to sgl\n");
704 break; /* TODO */
705 }
706 }
707
708 /*
709 * Save the descriptor from vhost_get_vq_desc() to be used to
710 * complete the virtio-scsi request in TCM callback context via
711 * tcm_vhost_queue_data_in() and tcm_vhost_queue_status()
712 */
713 tv_cmd->tvc_vq_desc = head;
714 /*
715 * Dispatch tv_cmd descriptor for cmwq execution in process
716 * context provided by tcm_vhost_workqueue. This also ensures
717 * tv_cmd is executed on the same kworker CPU as this vhost
718 * thread to gain positive L2 cache locality effects..
719 */
720 INIT_WORK(&tv_cmd->work, tcm_vhost_submission_work);
721 queue_work(tcm_vhost_workqueue, &tv_cmd->work);
722 }
723
724 mutex_unlock(&vq->mutex);
725}
726
727static void vhost_scsi_ctl_handle_kick(struct vhost_work *work)
728{
729 pr_debug("%s: The handling func for control queue.\n", __func__);
730}
731
732static void vhost_scsi_evt_handle_kick(struct vhost_work *work)
733{
734 pr_debug("%s: The handling func for event queue.\n", __func__);
735}
736
737static void vhost_scsi_handle_kick(struct vhost_work *work)
738{
739 struct vhost_virtqueue *vq = container_of(work, struct vhost_virtqueue,
740 poll.work);
741 struct vhost_scsi *vs = container_of(vq->dev, struct vhost_scsi, dev);
742
743 vhost_scsi_handle_vq(vs);
744}
745
746/*
747 * Called from vhost_scsi_ioctl() context to walk the list of available
748 * tcm_vhost_tpg with an active struct tcm_vhost_nexus
749 */
750static int vhost_scsi_set_endpoint(
751 struct vhost_scsi *vs,
752 struct vhost_scsi_target *t)
753{
754 struct tcm_vhost_tport *tv_tport;
755 struct tcm_vhost_tpg *tv_tpg;
756 int index;
757
758 mutex_lock(&vs->dev.mutex);
759 /* Verify that ring has been setup correctly. */
760 for (index = 0; index < vs->dev.nvqs; ++index) {
761 /* Verify that ring has been setup correctly. */
762 if (!vhost_vq_access_ok(&vs->vqs[index])) {
763 mutex_unlock(&vs->dev.mutex);
764 return -EFAULT;
765 }
766 }
767 mutex_unlock(&vs->dev.mutex);
768
769 mutex_lock(&tcm_vhost_mutex);
770 list_for_each_entry(tv_tpg, &tcm_vhost_list, tv_tpg_list) {
771 mutex_lock(&tv_tpg->tv_tpg_mutex);
772 if (!tv_tpg->tpg_nexus) {
773 mutex_unlock(&tv_tpg->tv_tpg_mutex);
774 continue;
775 }
776 if (tv_tpg->tv_tpg_vhost_count != 0) {
777 mutex_unlock(&tv_tpg->tv_tpg_mutex);
778 continue;
779 }
780 tv_tport = tv_tpg->tport;
781
782 if (!strcmp(tv_tport->tport_name, t->vhost_wwpn) &&
783 (tv_tpg->tport_tpgt == t->vhost_tpgt)) {
784 tv_tpg->tv_tpg_vhost_count++;
785 mutex_unlock(&tv_tpg->tv_tpg_mutex);
786 mutex_unlock(&tcm_vhost_mutex);
787
788 mutex_lock(&vs->dev.mutex);
789 if (vs->vs_tpg) {
790 mutex_unlock(&vs->dev.mutex);
791 mutex_lock(&tv_tpg->tv_tpg_mutex);
792 tv_tpg->tv_tpg_vhost_count--;
793 mutex_unlock(&tv_tpg->tv_tpg_mutex);
794 return -EEXIST;
795 }
796
797 vs->vs_tpg = tv_tpg;
798 smp_mb__after_atomic_inc();
799 mutex_unlock(&vs->dev.mutex);
800 return 0;
801 }
802 mutex_unlock(&tv_tpg->tv_tpg_mutex);
803 }
804 mutex_unlock(&tcm_vhost_mutex);
805 return -EINVAL;
806}
807
808static int vhost_scsi_clear_endpoint(
809 struct vhost_scsi *vs,
810 struct vhost_scsi_target *t)
811{
812 struct tcm_vhost_tport *tv_tport;
813 struct tcm_vhost_tpg *tv_tpg;
814 int index, ret;
815
816 mutex_lock(&vs->dev.mutex);
817 /* Verify that ring has been setup correctly. */
818 for (index = 0; index < vs->dev.nvqs; ++index) {
819 if (!vhost_vq_access_ok(&vs->vqs[index])) {
820 ret = -EFAULT;
821 goto err;
822 }
823 }
824
825 if (!vs->vs_tpg) {
826 ret = -ENODEV;
827 goto err;
828 }
829 tv_tpg = vs->vs_tpg;
830 tv_tport = tv_tpg->tport;
831
832 if (strcmp(tv_tport->tport_name, t->vhost_wwpn) ||
833 (tv_tpg->tport_tpgt != t->vhost_tpgt)) {
834 pr_warn("tv_tport->tport_name: %s, tv_tpg->tport_tpgt: %hu"
835 " does not match t->vhost_wwpn: %s, t->vhost_tpgt: %hu\n",
836 tv_tport->tport_name, tv_tpg->tport_tpgt,
837 t->vhost_wwpn, t->vhost_tpgt);
838 ret = -EINVAL;
839 goto err;
840 }
841 tv_tpg->tv_tpg_vhost_count--;
842 vs->vs_tpg = NULL;
843 mutex_unlock(&vs->dev.mutex);
844
845 return 0;
846
847err:
848 mutex_unlock(&vs->dev.mutex);
849 return ret;
850}
851
852static int vhost_scsi_open(struct inode *inode, struct file *f)
853{
854 struct vhost_scsi *s;
855 int r;
856
857 s = kzalloc(sizeof(*s), GFP_KERNEL);
858 if (!s)
859 return -ENOMEM;
860
861 vhost_work_init(&s->vs_completion_work, vhost_scsi_complete_cmd_work);
862 INIT_LIST_HEAD(&s->vs_completion_list);
863 spin_lock_init(&s->vs_completion_lock);
864
865 s->vqs[VHOST_SCSI_VQ_CTL].handle_kick = vhost_scsi_ctl_handle_kick;
866 s->vqs[VHOST_SCSI_VQ_EVT].handle_kick = vhost_scsi_evt_handle_kick;
867 s->vqs[VHOST_SCSI_VQ_IO].handle_kick = vhost_scsi_handle_kick;
868 r = vhost_dev_init(&s->dev, s->vqs, 3);
869 if (r < 0) {
870 kfree(s);
871 return r;
872 }
873
874 f->private_data = s;
875 return 0;
876}
877
878static int vhost_scsi_release(struct inode *inode, struct file *f)
879{
880 struct vhost_scsi *s = f->private_data;
881
882 if (s->vs_tpg && s->vs_tpg->tport) {
883 struct vhost_scsi_target backend;
884
885 memcpy(backend.vhost_wwpn, s->vs_tpg->tport->tport_name,
886 sizeof(backend.vhost_wwpn));
887 backend.vhost_tpgt = s->vs_tpg->tport_tpgt;
888 vhost_scsi_clear_endpoint(s, &backend);
889 }
890
891 vhost_dev_stop(&s->dev);
892 vhost_dev_cleanup(&s->dev, false);
893 kfree(s);
894 return 0;
895}
896
897static void vhost_scsi_flush_vq(struct vhost_scsi *vs, int index)
898{
899 vhost_poll_flush(&vs->dev.vqs[index].poll);
900}
901
902static void vhost_scsi_flush(struct vhost_scsi *vs)
903{
904 vhost_scsi_flush_vq(vs, VHOST_SCSI_VQ_CTL);
905 vhost_scsi_flush_vq(vs, VHOST_SCSI_VQ_EVT);
906 vhost_scsi_flush_vq(vs, VHOST_SCSI_VQ_IO);
907}
908
909static int vhost_scsi_set_features(struct vhost_scsi *vs, u64 features)
910{
911 if (features & ~VHOST_FEATURES)
912 return -EOPNOTSUPP;
913
914 mutex_lock(&vs->dev.mutex);
915 if ((features & (1 << VHOST_F_LOG_ALL)) &&
916 !vhost_log_access_ok(&vs->dev)) {
917 mutex_unlock(&vs->dev.mutex);
918 return -EFAULT;
919 }
920 vs->dev.acked_features = features;
921 smp_wmb();
922 vhost_scsi_flush(vs);
923 mutex_unlock(&vs->dev.mutex);
924 return 0;
925}
926
927static long vhost_scsi_ioctl(struct file *f, unsigned int ioctl,
928 unsigned long arg)
929{
930 struct vhost_scsi *vs = f->private_data;
931 struct vhost_scsi_target backend;
932 void __user *argp = (void __user *)arg;
933 u64 __user *featurep = argp;
934 u64 features;
935 int r, abi_version = VHOST_SCSI_ABI_VERSION;
936
937 switch (ioctl) {
938 case VHOST_SCSI_SET_ENDPOINT:
939 if (copy_from_user(&backend, argp, sizeof backend))
940 return -EFAULT;
941 if (backend.reserved != 0)
942 return -EOPNOTSUPP;
943
944 return vhost_scsi_set_endpoint(vs, &backend);
945 case VHOST_SCSI_CLEAR_ENDPOINT:
946 if (copy_from_user(&backend, argp, sizeof backend))
947 return -EFAULT;
948 if (backend.reserved != 0)
949 return -EOPNOTSUPP;
950
951 return vhost_scsi_clear_endpoint(vs, &backend);
952 case VHOST_SCSI_GET_ABI_VERSION:
953 if (copy_to_user(argp, &abi_version, sizeof abi_version))
954 return -EFAULT;
955 return 0;
956 case VHOST_GET_FEATURES:
957 features = VHOST_FEATURES;
958 if (copy_to_user(featurep, &features, sizeof features))
959 return -EFAULT;
960 return 0;
961 case VHOST_SET_FEATURES:
962 if (copy_from_user(&features, featurep, sizeof features))
963 return -EFAULT;
964 return vhost_scsi_set_features(vs, features);
965 default:
966 mutex_lock(&vs->dev.mutex);
967 r = vhost_dev_ioctl(&vs->dev, ioctl, argp);
968 /* TODO: flush backend after dev ioctl. */
969 if (r == -ENOIOCTLCMD)
970 r = vhost_vring_ioctl(&vs->dev, ioctl, argp);
971 mutex_unlock(&vs->dev.mutex);
972 return r;
973 }
974}
975
976#ifdef CONFIG_COMPAT
977static long vhost_scsi_compat_ioctl(struct file *f, unsigned int ioctl,
978 unsigned long arg)
979{
980 return vhost_scsi_ioctl(f, ioctl, (unsigned long)compat_ptr(arg));
981}
982#endif
983
984static const struct file_operations vhost_scsi_fops = {
985 .owner = THIS_MODULE,
986 .release = vhost_scsi_release,
987 .unlocked_ioctl = vhost_scsi_ioctl,
988#ifdef CONFIG_COMPAT
989 .compat_ioctl = vhost_scsi_compat_ioctl,
990#endif
991 .open = vhost_scsi_open,
992 .llseek = noop_llseek,
993};
994
995static struct miscdevice vhost_scsi_misc = {
996 MISC_DYNAMIC_MINOR,
997 "vhost-scsi",
998 &vhost_scsi_fops,
999};
1000
1001static int __init vhost_scsi_register(void)
1002{
1003 return misc_register(&vhost_scsi_misc);
1004}
1005
1006static int vhost_scsi_deregister(void)
1007{
1008 return misc_deregister(&vhost_scsi_misc);
1009}
1010
1011static char *tcm_vhost_dump_proto_id(struct tcm_vhost_tport *tport)
1012{
1013 switch (tport->tport_proto_id) {
1014 case SCSI_PROTOCOL_SAS:
1015 return "SAS";
1016 case SCSI_PROTOCOL_FCP:
1017 return "FCP";
1018 case SCSI_PROTOCOL_ISCSI:
1019 return "iSCSI";
1020 default:
1021 break;
1022 }
1023
1024 return "Unknown";
1025}
1026
1027static int tcm_vhost_port_link(struct se_portal_group *se_tpg,
1028 struct se_lun *lun)
1029{
1030 struct tcm_vhost_tpg *tv_tpg = container_of(se_tpg,
1031 struct tcm_vhost_tpg, se_tpg);
1032
1033 mutex_lock(&tv_tpg->tv_tpg_mutex);
1034 tv_tpg->tv_tpg_port_count++;
1035 mutex_unlock(&tv_tpg->tv_tpg_mutex);
1036
1037 return 0;
1038}
1039
1040static void tcm_vhost_port_unlink(struct se_portal_group *se_tpg,
1041 struct se_lun *se_lun)
1042{
1043 struct tcm_vhost_tpg *tv_tpg = container_of(se_tpg,
1044 struct tcm_vhost_tpg, se_tpg);
1045
1046 mutex_lock(&tv_tpg->tv_tpg_mutex);
1047 tv_tpg->tv_tpg_port_count--;
1048 mutex_unlock(&tv_tpg->tv_tpg_mutex);
1049}
1050
1051static struct se_node_acl *tcm_vhost_make_nodeacl(
1052 struct se_portal_group *se_tpg,
1053 struct config_group *group,
1054 const char *name)
1055{
1056 struct se_node_acl *se_nacl, *se_nacl_new;
1057 struct tcm_vhost_nacl *nacl;
1058 u64 wwpn = 0;
1059 u32 nexus_depth;
1060
1061 /* tcm_vhost_parse_wwn(name, &wwpn, 1) < 0)
1062 return ERR_PTR(-EINVAL); */
1063 se_nacl_new = tcm_vhost_alloc_fabric_acl(se_tpg);
1064 if (!se_nacl_new)
1065 return ERR_PTR(-ENOMEM);
1066
1067 nexus_depth = 1;
1068 /*
1069 * se_nacl_new may be released by core_tpg_add_initiator_node_acl()
1070 * when converting a NodeACL from demo mode -> explict
1071 */
1072 se_nacl = core_tpg_add_initiator_node_acl(se_tpg, se_nacl_new,
1073 name, nexus_depth);
1074 if (IS_ERR(se_nacl)) {
1075 tcm_vhost_release_fabric_acl(se_tpg, se_nacl_new);
1076 return se_nacl;
1077 }
1078 /*
1079 * Locate our struct tcm_vhost_nacl and set the FC Nport WWPN
1080 */
1081 nacl = container_of(se_nacl, struct tcm_vhost_nacl, se_node_acl);
1082 nacl->iport_wwpn = wwpn;
1083
1084 return se_nacl;
1085}
1086
1087static void tcm_vhost_drop_nodeacl(struct se_node_acl *se_acl)
1088{
1089 struct tcm_vhost_nacl *nacl = container_of(se_acl,
1090 struct tcm_vhost_nacl, se_node_acl);
1091 core_tpg_del_initiator_node_acl(se_acl->se_tpg, se_acl, 1);
1092 kfree(nacl);
1093}
1094
1095static int tcm_vhost_make_nexus(struct tcm_vhost_tpg *tv_tpg,
1096 const char *name)
1097{
1098 struct se_portal_group *se_tpg;
1099 struct tcm_vhost_nexus *tv_nexus;
1100
1101 mutex_lock(&tv_tpg->tv_tpg_mutex);
1102 if (tv_tpg->tpg_nexus) {
1103 mutex_unlock(&tv_tpg->tv_tpg_mutex);
1104 pr_debug("tv_tpg->tpg_nexus already exists\n");
1105 return -EEXIST;
1106 }
1107 se_tpg = &tv_tpg->se_tpg;
1108
1109 tv_nexus = kzalloc(sizeof(struct tcm_vhost_nexus), GFP_KERNEL);
1110 if (!tv_nexus) {
1111 mutex_unlock(&tv_tpg->tv_tpg_mutex);
1112 pr_err("Unable to allocate struct tcm_vhost_nexus\n");
1113 return -ENOMEM;
1114 }
1115 /*
1116 * Initialize the struct se_session pointer
1117 */
1118 tv_nexus->tvn_se_sess = transport_init_session();
1119 if (IS_ERR(tv_nexus->tvn_se_sess)) {
1120 mutex_unlock(&tv_tpg->tv_tpg_mutex);
1121 kfree(tv_nexus);
1122 return -ENOMEM;
1123 }
1124 /*
1125 * Since we are running in 'demo mode' this call with generate a
1126 * struct se_node_acl for the tcm_vhost struct se_portal_group with
1127 * the SCSI Initiator port name of the passed configfs group 'name'.
1128 */
1129 tv_nexus->tvn_se_sess->se_node_acl = core_tpg_check_initiator_node_acl(
1130 se_tpg, (unsigned char *)name);
1131 if (!tv_nexus->tvn_se_sess->se_node_acl) {
1132 mutex_unlock(&tv_tpg->tv_tpg_mutex);
1133 pr_debug("core_tpg_check_initiator_node_acl() failed"
1134 " for %s\n", name);
1135 transport_free_session(tv_nexus->tvn_se_sess);
1136 kfree(tv_nexus);
1137 return -ENOMEM;
1138 }
1139 /*
1140 * Now register the TCM vhost virtual I_T Nexus as active with the
1141 * call to __transport_register_session()
1142 */
1143 __transport_register_session(se_tpg, tv_nexus->tvn_se_sess->se_node_acl,
1144 tv_nexus->tvn_se_sess, tv_nexus);
1145 tv_tpg->tpg_nexus = tv_nexus;
1146
1147 mutex_unlock(&tv_tpg->tv_tpg_mutex);
1148 return 0;
1149}
1150
1151static int tcm_vhost_drop_nexus(struct tcm_vhost_tpg *tpg)
1152{
1153 struct se_session *se_sess;
1154 struct tcm_vhost_nexus *tv_nexus;
1155
1156 mutex_lock(&tpg->tv_tpg_mutex);
1157 tv_nexus = tpg->tpg_nexus;
1158 if (!tv_nexus) {
1159 mutex_unlock(&tpg->tv_tpg_mutex);
1160 return -ENODEV;
1161 }
1162
1163 se_sess = tv_nexus->tvn_se_sess;
1164 if (!se_sess) {
1165 mutex_unlock(&tpg->tv_tpg_mutex);
1166 return -ENODEV;
1167 }
1168
1169 if (tpg->tv_tpg_port_count != 0) {
1170 mutex_unlock(&tpg->tv_tpg_mutex);
1171 pr_err("Unable to remove TCM_vhost I_T Nexus with"
1172 " active TPG port count: %d\n",
1173 tpg->tv_tpg_port_count);
1174 return -EBUSY;
1175 }
1176
1177 if (tpg->tv_tpg_vhost_count != 0) {
1178 mutex_unlock(&tpg->tv_tpg_mutex);
1179 pr_err("Unable to remove TCM_vhost I_T Nexus with"
1180 " active TPG vhost count: %d\n",
1181 tpg->tv_tpg_vhost_count);
1182 return -EBUSY;
1183 }
1184
1185 pr_debug("TCM_vhost_ConfigFS: Removing I_T Nexus to emulated"
1186 " %s Initiator Port: %s\n", tcm_vhost_dump_proto_id(tpg->tport),
1187 tv_nexus->tvn_se_sess->se_node_acl->initiatorname);
1188 /*
1189 * Release the SCSI I_T Nexus to the emulated vhost Target Port
1190 */
1191 transport_deregister_session(tv_nexus->tvn_se_sess);
1192 tpg->tpg_nexus = NULL;
1193 mutex_unlock(&tpg->tv_tpg_mutex);
1194
1195 kfree(tv_nexus);
1196 return 0;
1197}
1198
1199static ssize_t tcm_vhost_tpg_show_nexus(struct se_portal_group *se_tpg,
1200 char *page)
1201{
1202 struct tcm_vhost_tpg *tv_tpg = container_of(se_tpg,
1203 struct tcm_vhost_tpg, se_tpg);
1204 struct tcm_vhost_nexus *tv_nexus;
1205 ssize_t ret;
1206
1207 mutex_lock(&tv_tpg->tv_tpg_mutex);
1208 tv_nexus = tv_tpg->tpg_nexus;
1209 if (!tv_nexus) {
1210 mutex_unlock(&tv_tpg->tv_tpg_mutex);
1211 return -ENODEV;
1212 }
1213 ret = snprintf(page, PAGE_SIZE, "%s\n",
1214 tv_nexus->tvn_se_sess->se_node_acl->initiatorname);
1215 mutex_unlock(&tv_tpg->tv_tpg_mutex);
1216
1217 return ret;
1218}
1219
1220static ssize_t tcm_vhost_tpg_store_nexus(struct se_portal_group *se_tpg,
1221 const char *page,
1222 size_t count)
1223{
1224 struct tcm_vhost_tpg *tv_tpg = container_of(se_tpg,
1225 struct tcm_vhost_tpg, se_tpg);
1226 struct tcm_vhost_tport *tport_wwn = tv_tpg->tport;
1227 unsigned char i_port[TCM_VHOST_NAMELEN], *ptr, *port_ptr;
1228 int ret;
1229 /*
1230 * Shutdown the active I_T nexus if 'NULL' is passed..
1231 */
1232 if (!strncmp(page, "NULL", 4)) {
1233 ret = tcm_vhost_drop_nexus(tv_tpg);
1234 return (!ret) ? count : ret;
1235 }
1236 /*
1237 * Otherwise make sure the passed virtual Initiator port WWN matches
1238 * the fabric protocol_id set in tcm_vhost_make_tport(), and call
1239 * tcm_vhost_make_nexus().
1240 */
1241 if (strlen(page) >= TCM_VHOST_NAMELEN) {
1242 pr_err("Emulated NAA Sas Address: %s, exceeds"
1243 " max: %d\n", page, TCM_VHOST_NAMELEN);
1244 return -EINVAL;
1245 }
1246 snprintf(&i_port[0], TCM_VHOST_NAMELEN, "%s", page);
1247
1248 ptr = strstr(i_port, "naa.");
1249 if (ptr) {
1250 if (tport_wwn->tport_proto_id != SCSI_PROTOCOL_SAS) {
1251 pr_err("Passed SAS Initiator Port %s does not"
1252 " match target port protoid: %s\n", i_port,
1253 tcm_vhost_dump_proto_id(tport_wwn));
1254 return -EINVAL;
1255 }
1256 port_ptr = &i_port[0];
1257 goto check_newline;
1258 }
1259 ptr = strstr(i_port, "fc.");
1260 if (ptr) {
1261 if (tport_wwn->tport_proto_id != SCSI_PROTOCOL_FCP) {
1262 pr_err("Passed FCP Initiator Port %s does not"
1263 " match target port protoid: %s\n", i_port,
1264 tcm_vhost_dump_proto_id(tport_wwn));
1265 return -EINVAL;
1266 }
1267 port_ptr = &i_port[3]; /* Skip over "fc." */
1268 goto check_newline;
1269 }
1270 ptr = strstr(i_port, "iqn.");
1271 if (ptr) {
1272 if (tport_wwn->tport_proto_id != SCSI_PROTOCOL_ISCSI) {
1273 pr_err("Passed iSCSI Initiator Port %s does not"
1274 " match target port protoid: %s\n", i_port,
1275 tcm_vhost_dump_proto_id(tport_wwn));
1276 return -EINVAL;
1277 }
1278 port_ptr = &i_port[0];
1279 goto check_newline;
1280 }
1281 pr_err("Unable to locate prefix for emulated Initiator Port:"
1282 " %s\n", i_port);
1283 return -EINVAL;
1284 /*
1285 * Clear any trailing newline for the NAA WWN
1286 */
1287check_newline:
1288 if (i_port[strlen(i_port)-1] == '\n')
1289 i_port[strlen(i_port)-1] = '\0';
1290
1291 ret = tcm_vhost_make_nexus(tv_tpg, port_ptr);
1292 if (ret < 0)
1293 return ret;
1294
1295 return count;
1296}
1297
1298TF_TPG_BASE_ATTR(tcm_vhost, nexus, S_IRUGO | S_IWUSR);
1299
1300static struct configfs_attribute *tcm_vhost_tpg_attrs[] = {
1301 &tcm_vhost_tpg_nexus.attr,
1302 NULL,
1303};
1304
1305static struct se_portal_group *tcm_vhost_make_tpg(struct se_wwn *wwn,
1306 struct config_group *group,
1307 const char *name)
1308{
1309 struct tcm_vhost_tport *tport = container_of(wwn,
1310 struct tcm_vhost_tport, tport_wwn);
1311
1312 struct tcm_vhost_tpg *tpg;
1313 unsigned long tpgt;
1314 int ret;
1315
1316 if (strstr(name, "tpgt_") != name)
1317 return ERR_PTR(-EINVAL);
1318 if (kstrtoul(name + 5, 10, &tpgt) || tpgt > UINT_MAX)
1319 return ERR_PTR(-EINVAL);
1320
1321 tpg = kzalloc(sizeof(struct tcm_vhost_tpg), GFP_KERNEL);
1322 if (!tpg) {
1323 pr_err("Unable to allocate struct tcm_vhost_tpg");
1324 return ERR_PTR(-ENOMEM);
1325 }
1326 mutex_init(&tpg->tv_tpg_mutex);
1327 INIT_LIST_HEAD(&tpg->tv_tpg_list);
1328 tpg->tport = tport;
1329 tpg->tport_tpgt = tpgt;
1330
1331 ret = core_tpg_register(&tcm_vhost_fabric_configfs->tf_ops, wwn,
1332 &tpg->se_tpg, tpg, TRANSPORT_TPG_TYPE_NORMAL);
1333 if (ret < 0) {
1334 kfree(tpg);
1335 return NULL;
1336 }
1337 mutex_lock(&tcm_vhost_mutex);
1338 list_add_tail(&tpg->tv_tpg_list, &tcm_vhost_list);
1339 mutex_unlock(&tcm_vhost_mutex);
1340
1341 return &tpg->se_tpg;
1342}
1343
1344static void tcm_vhost_drop_tpg(struct se_portal_group *se_tpg)
1345{
1346 struct tcm_vhost_tpg *tpg = container_of(se_tpg,
1347 struct tcm_vhost_tpg, se_tpg);
1348
1349 mutex_lock(&tcm_vhost_mutex);
1350 list_del(&tpg->tv_tpg_list);
1351 mutex_unlock(&tcm_vhost_mutex);
1352 /*
1353 * Release the virtual I_T Nexus for this vhost TPG
1354 */
1355 tcm_vhost_drop_nexus(tpg);
1356 /*
1357 * Deregister the se_tpg from TCM..
1358 */
1359 core_tpg_deregister(se_tpg);
1360 kfree(tpg);
1361}
1362
1363static struct se_wwn *tcm_vhost_make_tport(struct target_fabric_configfs *tf,
1364 struct config_group *group,
1365 const char *name)
1366{
1367 struct tcm_vhost_tport *tport;
1368 char *ptr;
1369 u64 wwpn = 0;
1370 int off = 0;
1371
1372 /* if (tcm_vhost_parse_wwn(name, &wwpn, 1) < 0)
1373 return ERR_PTR(-EINVAL); */
1374
1375 tport = kzalloc(sizeof(struct tcm_vhost_tport), GFP_KERNEL);
1376 if (!tport) {
1377 pr_err("Unable to allocate struct tcm_vhost_tport");
1378 return ERR_PTR(-ENOMEM);
1379 }
1380 tport->tport_wwpn = wwpn;
1381 /*
1382 * Determine the emulated Protocol Identifier and Target Port Name
1383 * based on the incoming configfs directory name.
1384 */
1385 ptr = strstr(name, "naa.");
1386 if (ptr) {
1387 tport->tport_proto_id = SCSI_PROTOCOL_SAS;
1388 goto check_len;
1389 }
1390 ptr = strstr(name, "fc.");
1391 if (ptr) {
1392 tport->tport_proto_id = SCSI_PROTOCOL_FCP;
1393 off = 3; /* Skip over "fc." */
1394 goto check_len;
1395 }
1396 ptr = strstr(name, "iqn.");
1397 if (ptr) {
1398 tport->tport_proto_id = SCSI_PROTOCOL_ISCSI;
1399 goto check_len;
1400 }
1401
1402 pr_err("Unable to locate prefix for emulated Target Port:"
1403 " %s\n", name);
1404 kfree(tport);
1405 return ERR_PTR(-EINVAL);
1406
1407check_len:
1408 if (strlen(name) >= TCM_VHOST_NAMELEN) {
1409 pr_err("Emulated %s Address: %s, exceeds"
1410 " max: %d\n", name, tcm_vhost_dump_proto_id(tport),
1411 TCM_VHOST_NAMELEN);
1412 kfree(tport);
1413 return ERR_PTR(-EINVAL);
1414 }
1415 snprintf(&tport->tport_name[0], TCM_VHOST_NAMELEN, "%s", &name[off]);
1416
1417 pr_debug("TCM_VHost_ConfigFS: Allocated emulated Target"
1418 " %s Address: %s\n", tcm_vhost_dump_proto_id(tport), name);
1419
1420 return &tport->tport_wwn;
1421}
1422
1423static void tcm_vhost_drop_tport(struct se_wwn *wwn)
1424{
1425 struct tcm_vhost_tport *tport = container_of(wwn,
1426 struct tcm_vhost_tport, tport_wwn);
1427
1428 pr_debug("TCM_VHost_ConfigFS: Deallocating emulated Target"
1429 " %s Address: %s\n", tcm_vhost_dump_proto_id(tport),
1430 tport->tport_name);
1431
1432 kfree(tport);
1433}
1434
1435static ssize_t tcm_vhost_wwn_show_attr_version(
1436 struct target_fabric_configfs *tf,
1437 char *page)
1438{
1439 return sprintf(page, "TCM_VHOST fabric module %s on %s/%s"
1440 "on "UTS_RELEASE"\n", TCM_VHOST_VERSION, utsname()->sysname,
1441 utsname()->machine);
1442}
1443
1444TF_WWN_ATTR_RO(tcm_vhost, version);
1445
1446static struct configfs_attribute *tcm_vhost_wwn_attrs[] = {
1447 &tcm_vhost_wwn_version.attr,
1448 NULL,
1449};
1450
1451static struct target_core_fabric_ops tcm_vhost_ops = {
1452 .get_fabric_name = tcm_vhost_get_fabric_name,
1453 .get_fabric_proto_ident = tcm_vhost_get_fabric_proto_ident,
1454 .tpg_get_wwn = tcm_vhost_get_fabric_wwn,
1455 .tpg_get_tag = tcm_vhost_get_tag,
1456 .tpg_get_default_depth = tcm_vhost_get_default_depth,
1457 .tpg_get_pr_transport_id = tcm_vhost_get_pr_transport_id,
1458 .tpg_get_pr_transport_id_len = tcm_vhost_get_pr_transport_id_len,
1459 .tpg_parse_pr_out_transport_id = tcm_vhost_parse_pr_out_transport_id,
1460 .tpg_check_demo_mode = tcm_vhost_check_true,
1461 .tpg_check_demo_mode_cache = tcm_vhost_check_true,
1462 .tpg_check_demo_mode_write_protect = tcm_vhost_check_false,
1463 .tpg_check_prod_mode_write_protect = tcm_vhost_check_false,
1464 .tpg_alloc_fabric_acl = tcm_vhost_alloc_fabric_acl,
1465 .tpg_release_fabric_acl = tcm_vhost_release_fabric_acl,
1466 .tpg_get_inst_index = tcm_vhost_tpg_get_inst_index,
1467 .release_cmd = tcm_vhost_release_cmd,
1468 .shutdown_session = tcm_vhost_shutdown_session,
1469 .close_session = tcm_vhost_close_session,
1470 .sess_get_index = tcm_vhost_sess_get_index,
1471 .sess_get_initiator_sid = NULL,
1472 .write_pending = tcm_vhost_write_pending,
1473 .write_pending_status = tcm_vhost_write_pending_status,
1474 .set_default_node_attributes = tcm_vhost_set_default_node_attrs,
1475 .get_task_tag = tcm_vhost_get_task_tag,
1476 .get_cmd_state = tcm_vhost_get_cmd_state,
1477 .queue_data_in = tcm_vhost_queue_data_in,
1478 .queue_status = tcm_vhost_queue_status,
1479 .queue_tm_rsp = tcm_vhost_queue_tm_rsp,
1480 /*
1481 * Setup callers for generic logic in target_core_fabric_configfs.c
1482 */
1483 .fabric_make_wwn = tcm_vhost_make_tport,
1484 .fabric_drop_wwn = tcm_vhost_drop_tport,
1485 .fabric_make_tpg = tcm_vhost_make_tpg,
1486 .fabric_drop_tpg = tcm_vhost_drop_tpg,
1487 .fabric_post_link = tcm_vhost_port_link,
1488 .fabric_pre_unlink = tcm_vhost_port_unlink,
1489 .fabric_make_np = NULL,
1490 .fabric_drop_np = NULL,
1491 .fabric_make_nodeacl = tcm_vhost_make_nodeacl,
1492 .fabric_drop_nodeacl = tcm_vhost_drop_nodeacl,
1493};
1494
1495static int tcm_vhost_register_configfs(void)
1496{
1497 struct target_fabric_configfs *fabric;
1498 int ret;
1499
1500 pr_debug("TCM_VHOST fabric module %s on %s/%s"
1501 " on "UTS_RELEASE"\n", TCM_VHOST_VERSION, utsname()->sysname,
1502 utsname()->machine);
1503 /*
1504 * Register the top level struct config_item_type with TCM core
1505 */
1506 fabric = target_fabric_configfs_init(THIS_MODULE, "vhost");
1507 if (IS_ERR(fabric)) {
1508 pr_err("target_fabric_configfs_init() failed\n");
1509 return PTR_ERR(fabric);
1510 }
1511 /*
1512 * Setup fabric->tf_ops from our local tcm_vhost_ops
1513 */
1514 fabric->tf_ops = tcm_vhost_ops;
1515 /*
1516 * Setup default attribute lists for various fabric->tf_cit_tmpl
1517 */
1518 TF_CIT_TMPL(fabric)->tfc_wwn_cit.ct_attrs = tcm_vhost_wwn_attrs;
1519 TF_CIT_TMPL(fabric)->tfc_tpg_base_cit.ct_attrs = tcm_vhost_tpg_attrs;
1520 TF_CIT_TMPL(fabric)->tfc_tpg_attrib_cit.ct_attrs = NULL;
1521 TF_CIT_TMPL(fabric)->tfc_tpg_param_cit.ct_attrs = NULL;
1522 TF_CIT_TMPL(fabric)->tfc_tpg_np_base_cit.ct_attrs = NULL;
1523 TF_CIT_TMPL(fabric)->tfc_tpg_nacl_base_cit.ct_attrs = NULL;
1524 TF_CIT_TMPL(fabric)->tfc_tpg_nacl_attrib_cit.ct_attrs = NULL;
1525 TF_CIT_TMPL(fabric)->tfc_tpg_nacl_auth_cit.ct_attrs = NULL;
1526 TF_CIT_TMPL(fabric)->tfc_tpg_nacl_param_cit.ct_attrs = NULL;
1527 /*
1528 * Register the fabric for use within TCM
1529 */
1530 ret = target_fabric_configfs_register(fabric);
1531 if (ret < 0) {
1532 pr_err("target_fabric_configfs_register() failed"
1533 " for TCM_VHOST\n");
1534 return ret;
1535 }
1536 /*
1537 * Setup our local pointer to *fabric
1538 */
1539 tcm_vhost_fabric_configfs = fabric;
1540 pr_debug("TCM_VHOST[0] - Set fabric -> tcm_vhost_fabric_configfs\n");
1541 return 0;
1542};
1543
1544static void tcm_vhost_deregister_configfs(void)
1545{
1546 if (!tcm_vhost_fabric_configfs)
1547 return;
1548
1549 target_fabric_configfs_deregister(tcm_vhost_fabric_configfs);
1550 tcm_vhost_fabric_configfs = NULL;
1551 pr_debug("TCM_VHOST[0] - Cleared tcm_vhost_fabric_configfs\n");
1552};
1553
1554static int __init tcm_vhost_init(void)
1555{
1556 int ret = -ENOMEM;
1557 /*
1558 * Use our own dedicated workqueue for submitting I/O into
1559 * target core to avoid contention within system_wq.
1560 */
1561 tcm_vhost_workqueue = alloc_workqueue("tcm_vhost", 0, 0);
1562 if (!tcm_vhost_workqueue)
1563 goto out;
1564
1565 ret = vhost_scsi_register();
1566 if (ret < 0)
1567 goto out_destroy_workqueue;
1568
1569 ret = tcm_vhost_register_configfs();
1570 if (ret < 0)
1571 goto out_vhost_scsi_deregister;
1572
1573 return 0;
1574
1575out_vhost_scsi_deregister:
1576 vhost_scsi_deregister();
1577out_destroy_workqueue:
1578 destroy_workqueue(tcm_vhost_workqueue);
1579out:
1580 return ret;
1581};
1582
1583static void tcm_vhost_exit(void)
1584{
1585 tcm_vhost_deregister_configfs();
1586 vhost_scsi_deregister();
1587 destroy_workqueue(tcm_vhost_workqueue);
1588};
1589
1590MODULE_DESCRIPTION("TCM_VHOST series fabric driver");
1591MODULE_LICENSE("GPL");
1592module_init(tcm_vhost_init);
1593module_exit(tcm_vhost_exit);
diff --git a/drivers/vhost/tcm_vhost.h b/drivers/vhost/tcm_vhost.h
deleted file mode 100644
index 7e87c63ecbc..00000000000
--- a/drivers/vhost/tcm_vhost.h
+++ /dev/null
@@ -1,111 +0,0 @@
1#define TCM_VHOST_VERSION "v0.1"
2#define TCM_VHOST_NAMELEN 256
3#define TCM_VHOST_MAX_CDB_SIZE 32
4
5struct tcm_vhost_cmd {
6 /* Descriptor from vhost_get_vq_desc() for virt_queue segment */
7 int tvc_vq_desc;
8 /* virtio-scsi initiator task attribute */
9 int tvc_task_attr;
10 /* virtio-scsi initiator data direction */
11 enum dma_data_direction tvc_data_direction;
12 /* Expected data transfer length from virtio-scsi header */
13 u32 tvc_exp_data_len;
14 /* The Tag from include/linux/virtio_scsi.h:struct virtio_scsi_cmd_req */
15 u64 tvc_tag;
16 /* The number of scatterlists associated with this cmd */
17 u32 tvc_sgl_count;
18 /* Saved unpacked SCSI LUN for tcm_vhost_submission_work() */
19 u32 tvc_lun;
20 /* Pointer to the SGL formatted memory from virtio-scsi */
21 struct scatterlist *tvc_sgl;
22 /* Pointer to response */
23 struct virtio_scsi_cmd_resp __user *tvc_resp;
24 /* Pointer to vhost_scsi for our device */
25 struct vhost_scsi *tvc_vhost;
26 /* Pointer to vhost nexus memory */
27 struct tcm_vhost_nexus *tvc_nexus;
28 /* The TCM I/O descriptor that is accessed via container_of() */
29 struct se_cmd tvc_se_cmd;
30 /* work item used for cmwq dispatch to tcm_vhost_submission_work() */
31 struct work_struct work;
32 /* Copy of the incoming SCSI command descriptor block (CDB) */
33 unsigned char tvc_cdb[TCM_VHOST_MAX_CDB_SIZE];
34 /* Sense buffer that will be mapped into outgoing status */
35 unsigned char tvc_sense_buf[TRANSPORT_SENSE_BUFFER];
36 /* Completed commands list, serviced from vhost worker thread */
37 struct list_head tvc_completion_list;
38};
39
40struct tcm_vhost_nexus {
41 /* Pointer to TCM session for I_T Nexus */
42 struct se_session *tvn_se_sess;
43};
44
45struct tcm_vhost_nacl {
46 /* Binary World Wide unique Port Name for Vhost Initiator port */
47 u64 iport_wwpn;
48 /* ASCII formatted WWPN for Sas Initiator port */
49 char iport_name[TCM_VHOST_NAMELEN];
50 /* Returned by tcm_vhost_make_nodeacl() */
51 struct se_node_acl se_node_acl;
52};
53
54struct tcm_vhost_tpg {
55 /* Vhost port target portal group tag for TCM */
56 u16 tport_tpgt;
57 /* Used to track number of TPG Port/Lun Links wrt to explict I_T Nexus shutdown */
58 int tv_tpg_port_count;
59 /* Used for vhost_scsi device reference to tpg_nexus, protected by tv_tpg_mutex */
60 int tv_tpg_vhost_count;
61 /* list for tcm_vhost_list */
62 struct list_head tv_tpg_list;
63 /* Used to protect access for tpg_nexus */
64 struct mutex tv_tpg_mutex;
65 /* Pointer to the TCM VHost I_T Nexus for this TPG endpoint */
66 struct tcm_vhost_nexus *tpg_nexus;
67 /* Pointer back to tcm_vhost_tport */
68 struct tcm_vhost_tport *tport;
69 /* Returned by tcm_vhost_make_tpg() */
70 struct se_portal_group se_tpg;
71};
72
73struct tcm_vhost_tport {
74 /* SCSI protocol the tport is providing */
75 u8 tport_proto_id;
76 /* Binary World Wide unique Port Name for Vhost Target port */
77 u64 tport_wwpn;
78 /* ASCII formatted WWPN for Vhost Target port */
79 char tport_name[TCM_VHOST_NAMELEN];
80 /* Returned by tcm_vhost_make_tport() */
81 struct se_wwn tport_wwn;
82};
83
84/*
85 * As per request from MST, keep TCM_VHOST related ioctl defines out of
86 * linux/vhost.h (user-space) for now..
87 */
88
89#include <linux/vhost.h>
90
91/*
92 * Used by QEMU userspace to ensure a consistent vhost-scsi ABI.
93 *
94 * ABI Rev 0: July 2012 version starting point for v3.6-rc merge candidate +
95 * RFC-v2 vhost-scsi userspace. Add GET_ABI_VERSION ioctl usage
96 */
97
98#define VHOST_SCSI_ABI_VERSION 0
99
100struct vhost_scsi_target {
101 int abi_version;
102 char vhost_wwpn[TRANSPORT_IQN_LEN];
103 unsigned short vhost_tpgt;
104 unsigned short reserved;
105};
106
107/* VHOST_SCSI specific defines */
108#define VHOST_SCSI_SET_ENDPOINT _IOW(VHOST_VIRTIO, 0x40, struct vhost_scsi_target)
109#define VHOST_SCSI_CLEAR_ENDPOINT _IOW(VHOST_VIRTIO, 0x41, struct vhost_scsi_target)
110/* Changing this breaks userspace. */
111#define VHOST_SCSI_GET_ABI_VERSION _IOW(VHOST_VIRTIO, 0x42, int)
diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c
index 91d6f060aad..fc9a1d75281 100644
--- a/drivers/vhost/test.c
+++ b/drivers/vhost/test.c
@@ -155,7 +155,7 @@ static int vhost_test_release(struct inode *inode, struct file *f)
155 155
156 vhost_test_stop(n, &private); 156 vhost_test_stop(n, &private);
157 vhost_test_flush(n); 157 vhost_test_flush(n);
158 vhost_dev_cleanup(&n->dev, false); 158 vhost_dev_cleanup(&n->dev);
159 /* We do an extra flush before freeing memory, 159 /* We do an extra flush before freeing memory,
160 * since jobs can re-queue themselves. */ 160 * since jobs can re-queue themselves. */
161 vhost_test_flush(n); 161 vhost_test_flush(n);
@@ -261,14 +261,14 @@ static long vhost_test_ioctl(struct file *f, unsigned int ioctl,
261 return -EFAULT; 261 return -EFAULT;
262 return vhost_test_run(n, test); 262 return vhost_test_run(n, test);
263 case VHOST_GET_FEATURES: 263 case VHOST_GET_FEATURES:
264 features = VHOST_NET_FEATURES; 264 features = VHOST_FEATURES;
265 if (copy_to_user(featurep, &features, sizeof features)) 265 if (copy_to_user(featurep, &features, sizeof features))
266 return -EFAULT; 266 return -EFAULT;
267 return 0; 267 return 0;
268 case VHOST_SET_FEATURES: 268 case VHOST_SET_FEATURES:
269 if (copy_from_user(&features, featurep, sizeof features)) 269 if (copy_from_user(&features, featurep, sizeof features))
270 return -EFAULT; 270 return -EFAULT;
271 if (features & ~VHOST_NET_FEATURES) 271 if (features & ~VHOST_FEATURES)
272 return -EOPNOTSUPP; 272 return -EOPNOTSUPP;
273 return vhost_test_set_features(n, features); 273 return vhost_test_set_features(n, features);
274 case VHOST_RESET_OWNER: 274 case VHOST_RESET_OWNER:
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index 34389f75fe6..c14c42b95ab 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -26,6 +26,10 @@
26#include <linux/kthread.h> 26#include <linux/kthread.h>
27#include <linux/cgroup.h> 27#include <linux/cgroup.h>
28 28
29#include <linux/net.h>
30#include <linux/if_packet.h>
31#include <linux/if_arp.h>
32
29#include "vhost.h" 33#include "vhost.h"
30 34
31enum { 35enum {
@@ -60,7 +64,7 @@ static int vhost_poll_wakeup(wait_queue_t *wait, unsigned mode, int sync,
60 return 0; 64 return 0;
61} 65}
62 66
63void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn) 67static void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn)
64{ 68{
65 INIT_LIST_HEAD(&work->node); 69 INIT_LIST_HEAD(&work->node);
66 work->fn = fn; 70 work->fn = fn;
@@ -133,7 +137,8 @@ void vhost_poll_flush(struct vhost_poll *poll)
133 vhost_work_flush(poll->dev, &poll->work); 137 vhost_work_flush(poll->dev, &poll->work);
134} 138}
135 139
136void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work) 140static inline void vhost_work_queue(struct vhost_dev *dev,
141 struct vhost_work *work)
137{ 142{
138 unsigned long flags; 143 unsigned long flags;
139 144
@@ -186,9 +191,7 @@ static int vhost_worker(void *data)
186 struct vhost_dev *dev = data; 191 struct vhost_dev *dev = data;
187 struct vhost_work *work = NULL; 192 struct vhost_work *work = NULL;
188 unsigned uninitialized_var(seq); 193 unsigned uninitialized_var(seq);
189 mm_segment_t oldfs = get_fs();
190 194
191 set_fs(USER_DS);
192 use_mm(dev->mm); 195 use_mm(dev->mm);
193 196
194 for (;;) { 197 for (;;) {
@@ -219,14 +222,11 @@ static int vhost_worker(void *data)
219 if (work) { 222 if (work) {
220 __set_current_state(TASK_RUNNING); 223 __set_current_state(TASK_RUNNING);
221 work->fn(work); 224 work->fn(work);
222 if (need_resched())
223 schedule();
224 } else 225 } else
225 schedule(); 226 schedule();
226 227
227 } 228 }
228 unuse_mm(dev->mm); 229 unuse_mm(dev->mm);
229 set_fs(oldfs);
230 return 0; 230 return 0;
231} 231}
232 232
@@ -403,14 +403,39 @@ long vhost_dev_reset_owner(struct vhost_dev *dev)
403 if (!memory) 403 if (!memory)
404 return -ENOMEM; 404 return -ENOMEM;
405 405
406 vhost_dev_cleanup(dev, true); 406 vhost_dev_cleanup(dev);
407 407
408 memory->nregions = 0; 408 memory->nregions = 0;
409 RCU_INIT_POINTER(dev->memory, memory); 409 RCU_INIT_POINTER(dev->memory, memory);
410 return 0; 410 return 0;
411} 411}
412 412
413void vhost_dev_stop(struct vhost_dev *dev) 413/* In case of DMA done not in order in lower device driver for some reason.
414 * upend_idx is used to track end of used idx, done_idx is used to track head
415 * of used idx. Once lower device DMA done contiguously, we will signal KVM
416 * guest used idx.
417 */
418int vhost_zerocopy_signal_used(struct vhost_virtqueue *vq)
419{
420 int i;
421 int j = 0;
422
423 for (i = vq->done_idx; i != vq->upend_idx; i = (i + 1) % UIO_MAXIOV) {
424 if ((vq->heads[i].len == VHOST_DMA_DONE_LEN)) {
425 vq->heads[i].len = VHOST_DMA_CLEAR_LEN;
426 vhost_add_used_and_signal(vq->dev, vq,
427 vq->heads[i].id, 0);
428 ++j;
429 } else
430 break;
431 }
432 if (j)
433 vq->done_idx = i;
434 return j;
435}
436
437/* Caller should have device mutex */
438void vhost_dev_cleanup(struct vhost_dev *dev)
414{ 439{
415 int i; 440 int i;
416 441
@@ -419,15 +444,13 @@ void vhost_dev_stop(struct vhost_dev *dev)
419 vhost_poll_stop(&dev->vqs[i].poll); 444 vhost_poll_stop(&dev->vqs[i].poll);
420 vhost_poll_flush(&dev->vqs[i].poll); 445 vhost_poll_flush(&dev->vqs[i].poll);
421 } 446 }
422 } 447 /* Wait for all lower device DMAs done. */
423} 448 if (dev->vqs[i].ubufs)
449 vhost_ubuf_put_and_wait(dev->vqs[i].ubufs);
424 450
425/* Caller should have device mutex if and only if locked is set */ 451 /* Signal guest as appropriate. */
426void vhost_dev_cleanup(struct vhost_dev *dev, bool locked) 452 vhost_zerocopy_signal_used(&dev->vqs[i]);
427{
428 int i;
429 453
430 for (i = 0; i < dev->nvqs; ++i) {
431 if (dev->vqs[i].error_ctx) 454 if (dev->vqs[i].error_ctx)
432 eventfd_ctx_put(dev->vqs[i].error_ctx); 455 eventfd_ctx_put(dev->vqs[i].error_ctx);
433 if (dev->vqs[i].error) 456 if (dev->vqs[i].error)
@@ -449,8 +472,7 @@ void vhost_dev_cleanup(struct vhost_dev *dev, bool locked)
449 dev->log_file = NULL; 472 dev->log_file = NULL;
450 /* No one will access memory at this point */ 473 /* No one will access memory at this point */
451 kfree(rcu_dereference_protected(dev->memory, 474 kfree(rcu_dereference_protected(dev->memory,
452 locked == 475 lockdep_is_held(&dev->mutex)));
453 lockdep_is_held(&dev->mutex)));
454 RCU_INIT_POINTER(dev->memory, NULL); 476 RCU_INIT_POINTER(dev->memory, NULL);
455 WARN_ON(!list_empty(&dev->work_list)); 477 WARN_ON(!list_empty(&dev->work_list));
456 if (dev->worker) { 478 if (dev->worker) {
@@ -607,10 +629,10 @@ static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m)
607 return 0; 629 return 0;
608} 630}
609 631
610long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp) 632static long vhost_set_vring(struct vhost_dev *d, int ioctl, void __user *argp)
611{ 633{
612 struct file *eventfp, *filep = NULL; 634 struct file *eventfp, *filep = NULL,
613 bool pollstart = false, pollstop = false; 635 *pollstart = NULL, *pollstop = NULL;
614 struct eventfd_ctx *ctx = NULL; 636 struct eventfd_ctx *ctx = NULL;
615 u32 __user *idxp = argp; 637 u32 __user *idxp = argp;
616 struct vhost_virtqueue *vq; 638 struct vhost_virtqueue *vq;
@@ -736,8 +758,8 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp)
736 break; 758 break;
737 } 759 }
738 if (eventfp != vq->kick) { 760 if (eventfp != vq->kick) {
739 pollstop = (filep = vq->kick) != NULL; 761 pollstop = filep = vq->kick;
740 pollstart = (vq->kick = eventfp) != NULL; 762 pollstart = vq->kick = eventfp;
741 } else 763 } else
742 filep = eventfp; 764 filep = eventfp;
743 break; 765 break;
@@ -802,8 +824,9 @@ long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp)
802} 824}
803 825
804/* Caller must have device mutex */ 826/* Caller must have device mutex */
805long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp) 827long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, unsigned long arg)
806{ 828{
829 void __user *argp = (void __user *)arg;
807 struct file *eventfp, *filep = NULL; 830 struct file *eventfp, *filep = NULL;
808 struct eventfd_ctx *ctx = NULL; 831 struct eventfd_ctx *ctx = NULL;
809 u64 p; 832 u64 p;
@@ -874,7 +897,7 @@ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *argp)
874 fput(filep); 897 fput(filep);
875 break; 898 break;
876 default: 899 default:
877 r = -ENOIOCTLCMD; 900 r = vhost_set_vring(d, ioctl, argp);
878 break; 901 break;
879 } 902 }
880done: 903done:
@@ -914,9 +937,9 @@ static int set_bit_to_user(int nr, void __user *addr)
914 if (r < 0) 937 if (r < 0)
915 return r; 938 return r;
916 BUG_ON(r != 1); 939 BUG_ON(r != 1);
917 base = kmap_atomic(page); 940 base = kmap_atomic(page, KM_USER0);
918 set_bit(bit, base); 941 set_bit(bit, base);
919 kunmap_atomic(base); 942 kunmap_atomic(base, KM_USER0);
920 set_page_dirty_lock(page); 943 set_page_dirty_lock(page);
921 put_page(page); 944 put_page(page);
922 return 0; 945 return 0;
@@ -1048,7 +1071,7 @@ static int translate_desc(struct vhost_dev *dev, u64 addr, u32 len,
1048 } 1071 }
1049 _iov = iov + ret; 1072 _iov = iov + ret;
1050 size = reg->memory_size - addr + reg->guest_phys_addr; 1073 size = reg->memory_size - addr + reg->guest_phys_addr;
1051 _iov->iov_len = min((u64)len - s, size); 1074 _iov->iov_len = min((u64)len, size);
1052 _iov->iov_base = (void __user *)(unsigned long) 1075 _iov->iov_base = (void __user *)(unsigned long)
1053 (reg->userspace_addr + addr - reg->guest_phys_addr); 1076 (reg->userspace_addr + addr - reg->guest_phys_addr);
1054 s += size; 1077 s += size;
@@ -1571,3 +1594,14 @@ void vhost_ubuf_put_and_wait(struct vhost_ubuf_ref *ubufs)
1571 wait_event(ubufs->wait, !atomic_read(&ubufs->kref.refcount)); 1594 wait_event(ubufs->wait, !atomic_read(&ubufs->kref.refcount));
1572 kfree(ubufs); 1595 kfree(ubufs);
1573} 1596}
1597
1598void vhost_zerocopy_callback(void *arg)
1599{
1600 struct ubuf_info *ubuf = arg;
1601 struct vhost_ubuf_ref *ubufs = ubuf->arg;
1602 struct vhost_virtqueue *vq = ubufs->vq;
1603
1604 /* set len = 1 to mark this desc buffers done DMA */
1605 vq->heads[ubuf->desc].len = VHOST_DMA_DONE_LEN;
1606 kref_put(&ubufs->kref, vhost_zerocopy_done_signal);
1607}
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
index 2639c58b23a..a801e2821d0 100644
--- a/drivers/vhost/vhost.h
+++ b/drivers/vhost/vhost.h
@@ -7,11 +7,17 @@
7#include <linux/mutex.h> 7#include <linux/mutex.h>
8#include <linux/poll.h> 8#include <linux/poll.h>
9#include <linux/file.h> 9#include <linux/file.h>
10#include <linux/skbuff.h>
10#include <linux/uio.h> 11#include <linux/uio.h>
11#include <linux/virtio_config.h> 12#include <linux/virtio_config.h>
12#include <linux/virtio_ring.h> 13#include <linux/virtio_ring.h>
13#include <linux/atomic.h> 14#include <linux/atomic.h>
14 15
16/* This is for zerocopy, used buffer len is set to 1 when lower device DMA
17 * done */
18#define VHOST_DMA_DONE_LEN 1
19#define VHOST_DMA_CLEAR_LEN 0
20
15struct vhost_device; 21struct vhost_device;
16 22
17struct vhost_work; 23struct vhost_work;
@@ -37,9 +43,6 @@ struct vhost_poll {
37 struct vhost_dev *dev; 43 struct vhost_dev *dev;
38}; 44};
39 45
40void vhost_work_init(struct vhost_work *work, vhost_work_fn_t fn);
41void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work);
42
43void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn, 46void vhost_poll_init(struct vhost_poll *poll, vhost_work_fn_t fn,
44 unsigned long mask, struct vhost_dev *dev); 47 unsigned long mask, struct vhost_dev *dev);
45void vhost_poll_start(struct vhost_poll *poll, struct file *file); 48void vhost_poll_start(struct vhost_poll *poll, struct file *file);
@@ -64,8 +67,6 @@ struct vhost_ubuf_ref *vhost_ubuf_alloc(struct vhost_virtqueue *, bool zcopy);
64void vhost_ubuf_put(struct vhost_ubuf_ref *); 67void vhost_ubuf_put(struct vhost_ubuf_ref *);
65void vhost_ubuf_put_and_wait(struct vhost_ubuf_ref *); 68void vhost_ubuf_put_and_wait(struct vhost_ubuf_ref *);
66 69
67struct ubuf_info;
68
69/* The virtqueue structure describes a queue attached to a device. */ 70/* The virtqueue structure describes a queue attached to a device. */
70struct vhost_virtqueue { 71struct vhost_virtqueue {
71 struct vhost_dev *dev; 72 struct vhost_dev *dev;
@@ -162,10 +163,8 @@ struct vhost_dev {
162long vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue *vqs, int nvqs); 163long vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue *vqs, int nvqs);
163long vhost_dev_check_owner(struct vhost_dev *); 164long vhost_dev_check_owner(struct vhost_dev *);
164long vhost_dev_reset_owner(struct vhost_dev *); 165long vhost_dev_reset_owner(struct vhost_dev *);
165void vhost_dev_cleanup(struct vhost_dev *, bool locked); 166void vhost_dev_cleanup(struct vhost_dev *);
166void vhost_dev_stop(struct vhost_dev *); 167long vhost_dev_ioctl(struct vhost_dev *, unsigned int ioctl, unsigned long arg);
167long vhost_dev_ioctl(struct vhost_dev *, unsigned int ioctl, void __user *argp);
168long vhost_vring_ioctl(struct vhost_dev *d, int ioctl, void __user *argp);
169int vhost_vq_access_ok(struct vhost_virtqueue *vq); 168int vhost_vq_access_ok(struct vhost_virtqueue *vq);
170int vhost_log_access_ok(struct vhost_dev *); 169int vhost_log_access_ok(struct vhost_dev *);
171 170
@@ -189,6 +188,8 @@ bool vhost_enable_notify(struct vhost_dev *, struct vhost_virtqueue *);
189 188
190int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, 189int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log,
191 unsigned int log_num, u64 len); 190 unsigned int log_num, u64 len);
191void vhost_zerocopy_callback(void *arg);
192int vhost_zerocopy_signal_used(struct vhost_virtqueue *vq);
192 193
193#define vq_err(vq, fmt, ...) do { \ 194#define vq_err(vq, fmt, ...) do { \
194 pr_debug(pr_fmt(fmt), ##__VA_ARGS__); \ 195 pr_debug(pr_fmt(fmt), ##__VA_ARGS__); \
@@ -200,8 +201,7 @@ enum {
200 VHOST_FEATURES = (1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | 201 VHOST_FEATURES = (1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) |
201 (1ULL << VIRTIO_RING_F_INDIRECT_DESC) | 202 (1ULL << VIRTIO_RING_F_INDIRECT_DESC) |
202 (1ULL << VIRTIO_RING_F_EVENT_IDX) | 203 (1ULL << VIRTIO_RING_F_EVENT_IDX) |
203 (1ULL << VHOST_F_LOG_ALL), 204 (1ULL << VHOST_F_LOG_ALL) |
204 VHOST_NET_FEATURES = VHOST_FEATURES |
205 (1ULL << VHOST_NET_F_VIRTIO_NET_HDR) | 205 (1ULL << VHOST_NET_F_VIRTIO_NET_HDR) |
206 (1ULL << VIRTIO_NET_F_MRG_RXBUF), 206 (1ULL << VIRTIO_NET_F_MRG_RXBUF),
207}; 207};