aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndy Grover <agrover@redhat.com>2014-10-01 19:07:05 -0400
committerNicholas Bellinger <nab@linux-iscsi.org>2014-10-03 14:15:20 -0400
commit7c9e7a6fe11c8dc5b3b9d0e889dde73347247584 (patch)
tree9c4c3a753228617e308226f47dc6d4fe83ddf15d
parentce87685128f3e0fced2aca9f73fc8cc67704ae11 (diff)
target: Add a user-passthrough backstore
Add a LIO storage engine that presents commands to userspace for execution. This would allow more complex backstores to be implemented out-of-kernel, and also make experimentation a-la FUSE (but at the SCSI level -- "SUSE"?) possible. It uses a mmap()able UIO device per LUN to share a command ring and data area. The commands are raw SCSI CDBs and iovs for in/out data. The command ring is also reused for returning scsi command status and optional sense data. This implementation is based on Shaohua Li's earlier version but heavily modified. Differences include: * Shared memory allocated by kernel, not locked-down user pages * Single ring for command request and response * Offsets instead of embedded pointers * Generic SCSI CDB passthrough instead of per-cmd specialization in ring format. * Uses UIO device instead of anon_file passed in mailbox. * Optional in-kernel handling of some commands. The main reason for these differences is to permit greater resiliency if the user process dies or hangs. Things not yet implemented (on purpose): * Zero copy. The data area is flexible enough to allow page flipping or backend-allocated pages to be used by fabrics, but it's not clear these are performance wins. Can come later. * Out-of-order command completion by userspace. Possible to add by just allowing userspace to change cmd_id in rsp cmd entries, but currently not supported. * No locks between kernel cmd submission and completion routines. Sounds like it's possible, but this can come later. * Sparse allocation of mmaped area. Current code vmallocs the whole thing. If the mapped area was larger and not fully mapped then the driver would have more freedom to change cmd and data area sizes based on demand. Current code open issues: * The use of idrs may be overkill -- we maybe can replace them with a simple counter to generate cmd_ids, and a hash table to get a cmd_id's associated pointer. * Use of a free-running counter for cmd ring instead of explicit modulo math. This would require power-of-2 cmd ring size. (Add kconfig depends NET - Randy) Signed-off-by: Andy Grover <agrover@redhat.com> Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
-rw-r--r--drivers/target/Kconfig7
-rw-r--r--drivers/target/Makefile1
-rw-r--r--drivers/target/target_core_transport.c4
-rw-r--r--drivers/target/target_core_user.c1163
-rw-r--r--include/uapi/linux/Kbuild1
-rw-r--r--include/uapi/linux/target_core_user.h142
6 files changed, 1318 insertions, 0 deletions
diff --git a/drivers/target/Kconfig b/drivers/target/Kconfig
index dc2d84ac5a0e..81d44c477a5b 100644
--- a/drivers/target/Kconfig
+++ b/drivers/target/Kconfig
@@ -31,6 +31,13 @@ config TCM_PSCSI
31 Say Y here to enable the TCM/pSCSI subsystem plugin for non-buffered 31 Say Y here to enable the TCM/pSCSI subsystem plugin for non-buffered
32 passthrough access to Linux/SCSI device 32 passthrough access to Linux/SCSI device
33 33
34config TCM_USER
35 tristate "TCM/USER Subsystem Plugin for Linux"
36 depends on UIO && NET
37 help
38 Say Y here to enable the TCM/USER subsystem plugin for a userspace
39 process to handle requests
40
34source "drivers/target/loopback/Kconfig" 41source "drivers/target/loopback/Kconfig"
35source "drivers/target/tcm_fc/Kconfig" 42source "drivers/target/tcm_fc/Kconfig"
36source "drivers/target/iscsi/Kconfig" 43source "drivers/target/iscsi/Kconfig"
diff --git a/drivers/target/Makefile b/drivers/target/Makefile
index 85b012d2f89b..bbb4a7d638ef 100644
--- a/drivers/target/Makefile
+++ b/drivers/target/Makefile
@@ -22,6 +22,7 @@ obj-$(CONFIG_TARGET_CORE) += target_core_mod.o
22obj-$(CONFIG_TCM_IBLOCK) += target_core_iblock.o 22obj-$(CONFIG_TCM_IBLOCK) += target_core_iblock.o
23obj-$(CONFIG_TCM_FILEIO) += target_core_file.o 23obj-$(CONFIG_TCM_FILEIO) += target_core_file.o
24obj-$(CONFIG_TCM_PSCSI) += target_core_pscsi.o 24obj-$(CONFIG_TCM_PSCSI) += target_core_pscsi.o
25obj-$(CONFIG_TCM_USER) += target_core_user.o
25 26
26# Fabric modules 27# Fabric modules
27obj-$(CONFIG_LOOPBACK_TARGET) += loopback/ 28obj-$(CONFIG_LOOPBACK_TARGET) += loopback/
diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c
index 9700ea125268..9ea0d5f03f7a 100644
--- a/drivers/target/target_core_transport.c
+++ b/drivers/target/target_core_transport.c
@@ -232,6 +232,10 @@ void transport_subsystem_check_init(void)
232 if (ret != 0) 232 if (ret != 0)
233 pr_err("Unable to load target_core_pscsi\n"); 233 pr_err("Unable to load target_core_pscsi\n");
234 234
235 ret = request_module("target_core_user");
236 if (ret != 0)
237 pr_err("Unable to load target_core_user\n");
238
235 sub_api_initialized = 1; 239 sub_api_initialized = 1;
236} 240}
237 241
diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
new file mode 100644
index 000000000000..6608ecf94570
--- /dev/null
+++ b/drivers/target/target_core_user.c
@@ -0,0 +1,1163 @@
1/*
2 * Copyright (C) 2013 Shaohua Li <shli@kernel.org>
3 * Copyright (C) 2014 Red Hat, Inc.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc.,
16 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
17 */
18
19#include <linux/spinlock.h>
20#include <linux/module.h>
21#include <linux/idr.h>
22#include <linux/timer.h>
23#include <linux/parser.h>
24#include <scsi/scsi.h>
25#include <scsi/scsi_host.h>
26#include <linux/uio_driver.h>
27#include <net/genetlink.h>
28#include <target/target_core_base.h>
29#include <target/target_core_fabric.h>
30#include <target/target_core_backend.h>
31#include <linux/target_core_user.h>
32
33/*
34 * Define a shared-memory interface for LIO to pass SCSI commands and
35 * data to userspace for processing. This is to allow backends that
36 * are too complex for in-kernel support to be possible.
37 *
38 * It uses the UIO framework to do a lot of the device-creation and
39 * introspection work for us.
40 *
41 * See the .h file for how the ring is laid out. Note that while the
42 * command ring is defined, the particulars of the data area are
43 * not. Offset values in the command entry point to other locations
44 * internal to the mmap()ed area. There is separate space outside the
45 * command ring for data buffers. This leaves maximum flexibility for
46 * moving buffer allocations, or even page flipping or other
47 * allocation techniques, without altering the command ring layout.
48 *
49 * SECURITY:
50 * The user process must be assumed to be malicious. There's no way to
51 * prevent it breaking the command ring protocol if it wants, but in
52 * order to prevent other issues we must only ever read *data* from
53 * the shared memory area, not offsets or sizes. This applies to
54 * command ring entries as well as the mailbox. Extra code needed for
55 * this may have a 'UAM' comment.
56 */
57
58
59#define TCMU_TIME_OUT (30 * MSEC_PER_SEC)
60
61#define CMDR_SIZE (16 * 4096)
62#define DATA_SIZE (257 * 4096)
63
64#define TCMU_RING_SIZE (CMDR_SIZE + DATA_SIZE)
65
66static struct device *tcmu_root_device;
67
68struct tcmu_hba {
69 u32 host_id;
70};
71
72/* User wants all cmds or just some */
73enum passthru_level {
74 TCMU_PASS_ALL = 0,
75 TCMU_PASS_IO,
76 TCMU_PASS_INVALID,
77};
78
79#define TCMU_CONFIG_LEN 256
80
81struct tcmu_dev {
82 struct se_device se_dev;
83
84 char *name;
85 struct se_hba *hba;
86
87#define TCMU_DEV_BIT_OPEN 0
88#define TCMU_DEV_BIT_BROKEN 1
89 unsigned long flags;
90 enum passthru_level pass_level;
91
92 struct uio_info uio_info;
93
94 struct tcmu_mailbox *mb_addr;
95 size_t dev_size;
96 u32 cmdr_size;
97 u32 cmdr_last_cleaned;
98 /* Offset of data ring from start of mb */
99 size_t data_off;
100 size_t data_size;
101 /* Ring head + tail values. */
102 /* Must add data_off and mb_addr to get the address */
103 size_t data_head;
104 size_t data_tail;
105
106 wait_queue_head_t wait_cmdr;
107 /* TODO should this be a mutex? */
108 spinlock_t cmdr_lock;
109
110 struct idr commands;
111 spinlock_t commands_lock;
112
113 struct timer_list timeout;
114
115 char dev_config[TCMU_CONFIG_LEN];
116};
117
118#define TCMU_DEV(_se_dev) container_of(_se_dev, struct tcmu_dev, se_dev)
119
120#define CMDR_OFF sizeof(struct tcmu_mailbox)
121
122struct tcmu_cmd {
123 struct se_cmd *se_cmd;
124 struct tcmu_dev *tcmu_dev;
125
126 uint16_t cmd_id;
127
128 /* Can't use se_cmd->data_length when cleaning up expired cmds, because if
129 cmd has been completed then accessing se_cmd is off limits */
130 size_t data_length;
131
132 unsigned long deadline;
133
134#define TCMU_CMD_BIT_EXPIRED 0
135 unsigned long flags;
136};
137
138static struct kmem_cache *tcmu_cmd_cache;
139
140/* multicast group */
141enum tcmu_multicast_groups {
142 TCMU_MCGRP_CONFIG,
143};
144
145static const struct genl_multicast_group tcmu_mcgrps[] = {
146 [TCMU_MCGRP_CONFIG] = { .name = "config", },
147};
148
149/* Our generic netlink family */
150static struct genl_family tcmu_genl_family = {
151 .id = GENL_ID_GENERATE,
152 .hdrsize = 0,
153 .name = "TCM-USER",
154 .version = 1,
155 .maxattr = TCMU_ATTR_MAX,
156 .mcgrps = tcmu_mcgrps,
157 .n_mcgrps = ARRAY_SIZE(tcmu_mcgrps),
158};
159
160static struct tcmu_cmd *tcmu_alloc_cmd(struct se_cmd *se_cmd)
161{
162 struct se_device *se_dev = se_cmd->se_dev;
163 struct tcmu_dev *udev = TCMU_DEV(se_dev);
164 struct tcmu_cmd *tcmu_cmd;
165 int cmd_id;
166
167 tcmu_cmd = kmem_cache_zalloc(tcmu_cmd_cache, GFP_KERNEL);
168 if (!tcmu_cmd)
169 return NULL;
170
171 tcmu_cmd->se_cmd = se_cmd;
172 tcmu_cmd->tcmu_dev = udev;
173 tcmu_cmd->data_length = se_cmd->data_length;
174
175 tcmu_cmd->deadline = jiffies + msecs_to_jiffies(TCMU_TIME_OUT);
176
177 idr_preload(GFP_KERNEL);
178 spin_lock_irq(&udev->commands_lock);
179 cmd_id = idr_alloc(&udev->commands, tcmu_cmd, 0,
180 USHRT_MAX, GFP_NOWAIT);
181 spin_unlock_irq(&udev->commands_lock);
182 idr_preload_end();
183
184 if (cmd_id < 0) {
185 kmem_cache_free(tcmu_cmd_cache, tcmu_cmd);
186 return NULL;
187 }
188 tcmu_cmd->cmd_id = cmd_id;
189
190 return tcmu_cmd;
191}
192
193static inline void tcmu_flush_dcache_range(void *vaddr, size_t size)
194{
195 unsigned long offset = (unsigned long) vaddr & ~PAGE_MASK;
196
197 size = round_up(size+offset, PAGE_SIZE);
198 vaddr -= offset;
199
200 while (size) {
201 flush_dcache_page(virt_to_page(vaddr));
202 size -= PAGE_SIZE;
203 }
204}
205
206/*
207 * Some ring helper functions. We don't assume size is a power of 2 so
208 * we can't use circ_buf.h.
209 */
210static inline size_t spc_used(size_t head, size_t tail, size_t size)
211{
212 int diff = head - tail;
213
214 if (diff >= 0)
215 return diff;
216 else
217 return size + diff;
218}
219
220static inline size_t spc_free(size_t head, size_t tail, size_t size)
221{
222 /* Keep 1 byte unused or we can't tell full from empty */
223 return (size - spc_used(head, tail, size) - 1);
224}
225
226static inline size_t head_to_end(size_t head, size_t size)
227{
228 return size - head;
229}
230
231#define UPDATE_HEAD(head, used, size) smp_store_release(&head, ((head % size) + used) % size)
232
233/*
234 * We can't queue a command until we have space available on the cmd ring *and* space
235 * space avail on the data ring.
236 *
237 * Called with ring lock held.
238 */
239static bool is_ring_space_avail(struct tcmu_dev *udev, size_t cmd_needed, size_t data_needed)
240{
241 struct tcmu_mailbox *mb = udev->mb_addr;
242 size_t space;
243 u32 cmd_head;
244
245 tcmu_flush_dcache_range(mb, sizeof(*mb));
246
247 cmd_head = mb->cmd_head % udev->cmdr_size; /* UAM */
248
249 space = spc_free(cmd_head, udev->cmdr_last_cleaned, udev->cmdr_size);
250 if (space < cmd_needed) {
251 pr_debug("no cmd space: %u %u %u\n", cmd_head,
252 udev->cmdr_last_cleaned, udev->cmdr_size);
253 return false;
254 }
255
256 space = spc_free(udev->data_head, udev->data_tail, udev->data_size);
257 if (space < data_needed) {
258 pr_debug("no data space: %zu %zu %zu\n", udev->data_head,
259 udev->data_tail, udev->data_size);
260 return false;
261 }
262
263 return true;
264}
265
266static int tcmu_queue_cmd_ring(struct tcmu_cmd *tcmu_cmd)
267{
268 struct tcmu_dev *udev = tcmu_cmd->tcmu_dev;
269 struct se_cmd *se_cmd = tcmu_cmd->se_cmd;
270 size_t base_command_size, command_size;
271 size_t cmdr_space_needed;
272 struct tcmu_mailbox *mb;
273 size_t pad_size;
274 struct tcmu_cmd_entry *entry;
275 int i;
276 struct scatterlist *sg;
277 struct iovec *iov;
278 int iov_cnt = 0;
279 uint32_t cmd_head;
280 uint64_t cdb_off;
281
282 if (test_bit(TCMU_DEV_BIT_BROKEN, &udev->flags))
283 return -EINVAL;
284
285 /*
286 * Must be a certain minimum size for response sense info, but
287 * also may be larger if the iov array is large.
288 *
289 * iovs = sgl_nents+1, for end-of-ring case, plus another 1
290 * b/c size == offsetof one-past-element.
291 */
292 base_command_size = max(offsetof(struct tcmu_cmd_entry,
293 req.iov[se_cmd->t_data_nents + 2]),
294 sizeof(struct tcmu_cmd_entry));
295 command_size = base_command_size
296 + round_up(scsi_command_size(se_cmd->t_task_cdb), TCMU_OP_ALIGN_SIZE);
297
298 WARN_ON(command_size & (TCMU_OP_ALIGN_SIZE-1));
299
300 spin_lock_irq(&udev->cmdr_lock);
301
302 mb = udev->mb_addr;
303 cmd_head = mb->cmd_head % udev->cmdr_size; /* UAM */
304 if ((command_size > (udev->cmdr_size / 2))
305 || tcmu_cmd->data_length > (udev->data_size - 1))
306 pr_warn("TCMU: Request of size %zu/%zu may be too big for %u/%zu "
307 "cmd/data ring buffers\n", command_size, tcmu_cmd->data_length,
308 udev->cmdr_size, udev->data_size);
309
310 /*
311 * Cmd end-of-ring space is too small so we need space for a NOP plus
312 * original cmd - cmds are internally contiguous.
313 */
314 if (head_to_end(cmd_head, udev->cmdr_size) >= command_size)
315 pad_size = 0;
316 else
317 pad_size = head_to_end(cmd_head, udev->cmdr_size);
318 cmdr_space_needed = command_size + pad_size;
319
320 while (!is_ring_space_avail(udev, cmdr_space_needed, tcmu_cmd->data_length)) {
321 int ret;
322 DEFINE_WAIT(__wait);
323
324 prepare_to_wait(&udev->wait_cmdr, &__wait, TASK_INTERRUPTIBLE);
325
326 pr_debug("sleeping for ring space\n");
327 spin_unlock_irq(&udev->cmdr_lock);
328 ret = schedule_timeout(msecs_to_jiffies(TCMU_TIME_OUT));
329 finish_wait(&udev->wait_cmdr, &__wait);
330 if (!ret) {
331 pr_warn("tcmu: command timed out\n");
332 return -ETIMEDOUT;
333 }
334
335 spin_lock_irq(&udev->cmdr_lock);
336
337 /* We dropped cmdr_lock, cmd_head is stale */
338 cmd_head = mb->cmd_head % udev->cmdr_size; /* UAM */
339 }
340
341 if (pad_size) {
342 entry = (void *) mb + CMDR_OFF + cmd_head;
343 tcmu_flush_dcache_range(entry, sizeof(*entry));
344 tcmu_hdr_set_op(&entry->hdr, TCMU_OP_PAD);
345 tcmu_hdr_set_len(&entry->hdr, pad_size);
346
347 UPDATE_HEAD(mb->cmd_head, pad_size, udev->cmdr_size);
348
349 cmd_head = mb->cmd_head % udev->cmdr_size; /* UAM */
350 WARN_ON(cmd_head != 0);
351 }
352
353 entry = (void *) mb + CMDR_OFF + cmd_head;
354 tcmu_flush_dcache_range(entry, sizeof(*entry));
355 tcmu_hdr_set_op(&entry->hdr, TCMU_OP_CMD);
356 tcmu_hdr_set_len(&entry->hdr, command_size);
357 entry->cmd_id = tcmu_cmd->cmd_id;
358
359 /*
360 * Fix up iovecs, and handle if allocation in data ring wrapped.
361 */
362 iov = &entry->req.iov[0];
363 for_each_sg(se_cmd->t_data_sg, sg, se_cmd->t_data_nents, i) {
364 size_t copy_bytes = min((size_t)sg->length,
365 head_to_end(udev->data_head, udev->data_size));
366 void *from = kmap_atomic(sg_page(sg)) + sg->offset;
367 void *to = (void *) mb + udev->data_off + udev->data_head;
368
369 if (tcmu_cmd->se_cmd->data_direction == DMA_TO_DEVICE) {
370 memcpy(to, from, copy_bytes);
371 tcmu_flush_dcache_range(to, copy_bytes);
372 }
373
374 /* Even iov_base is relative to mb_addr */
375 iov->iov_len = copy_bytes;
376 iov->iov_base = (void *) udev->data_off + udev->data_head;
377 iov_cnt++;
378 iov++;
379
380 UPDATE_HEAD(udev->data_head, copy_bytes, udev->data_size);
381
382 /* Uh oh, we wrapped the buffer. Must split sg across 2 iovs. */
383 if (sg->length != copy_bytes) {
384 from += copy_bytes;
385 copy_bytes = sg->length - copy_bytes;
386
387 iov->iov_len = copy_bytes;
388 iov->iov_base = (void *) udev->data_off + udev->data_head;
389
390 if (se_cmd->data_direction == DMA_TO_DEVICE) {
391 to = (void *) mb + udev->data_off + udev->data_head;
392 memcpy(to, from, copy_bytes);
393 tcmu_flush_dcache_range(to, copy_bytes);
394 }
395
396 iov_cnt++;
397 iov++;
398
399 UPDATE_HEAD(udev->data_head, copy_bytes, udev->data_size);
400 }
401
402 kunmap_atomic(from);
403 }
404 entry->req.iov_cnt = iov_cnt;
405
406 /* All offsets relative to mb_addr, not start of entry! */
407 cdb_off = CMDR_OFF + cmd_head + base_command_size;
408 memcpy((void *) mb + cdb_off, se_cmd->t_task_cdb, scsi_command_size(se_cmd->t_task_cdb));
409 entry->req.cdb_off = cdb_off;
410 tcmu_flush_dcache_range(entry, sizeof(*entry));
411
412 UPDATE_HEAD(mb->cmd_head, command_size, udev->cmdr_size);
413 tcmu_flush_dcache_range(mb, sizeof(*mb));
414
415 spin_unlock_irq(&udev->cmdr_lock);
416
417 /* TODO: only if FLUSH and FUA? */
418 uio_event_notify(&udev->uio_info);
419
420 mod_timer(&udev->timeout,
421 round_jiffies_up(jiffies + msecs_to_jiffies(TCMU_TIME_OUT)));
422
423 return 0;
424}
425
426static int tcmu_queue_cmd(struct se_cmd *se_cmd)
427{
428 struct se_device *se_dev = se_cmd->se_dev;
429 struct tcmu_dev *udev = TCMU_DEV(se_dev);
430 struct tcmu_cmd *tcmu_cmd;
431 int ret;
432
433 tcmu_cmd = tcmu_alloc_cmd(se_cmd);
434 if (!tcmu_cmd)
435 return -ENOMEM;
436
437 ret = tcmu_queue_cmd_ring(tcmu_cmd);
438 if (ret < 0) {
439 pr_err("TCMU: Could not queue command\n");
440 spin_lock_irq(&udev->commands_lock);
441 idr_remove(&udev->commands, tcmu_cmd->cmd_id);
442 spin_unlock_irq(&udev->commands_lock);
443
444 kmem_cache_free(tcmu_cmd_cache, tcmu_cmd);
445 }
446
447 return ret;
448}
449
450static void tcmu_handle_completion(struct tcmu_cmd *cmd, struct tcmu_cmd_entry *entry)
451{
452 struct se_cmd *se_cmd = cmd->se_cmd;
453 struct tcmu_dev *udev = cmd->tcmu_dev;
454
455 if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags)) {
456 /* cmd has been completed already from timeout, just reclaim data
457 ring space */
458 UPDATE_HEAD(udev->data_tail, cmd->data_length, udev->data_size);
459 return;
460 }
461
462 if (entry->rsp.scsi_status == SAM_STAT_CHECK_CONDITION) {
463 memcpy(se_cmd->sense_buffer, entry->rsp.sense_buffer,
464 se_cmd->scsi_sense_length);
465
466 UPDATE_HEAD(udev->data_tail, cmd->data_length, udev->data_size);
467 }
468 else if (se_cmd->data_direction == DMA_FROM_DEVICE) {
469 struct scatterlist *sg;
470 int i;
471
472 /* It'd be easier to look at entry's iovec again, but UAM */
473 for_each_sg(se_cmd->t_data_sg, sg, se_cmd->t_data_nents, i) {
474 size_t copy_bytes;
475 void *to;
476 void *from;
477
478 copy_bytes = min((size_t)sg->length,
479 head_to_end(udev->data_tail, udev->data_size));
480
481 to = kmap_atomic(sg_page(sg)) + sg->offset;
482 WARN_ON(sg->length + sg->offset > PAGE_SIZE);
483 from = (void *) udev->mb_addr + udev->data_off + udev->data_tail;
484 tcmu_flush_dcache_range(from, copy_bytes);
485 memcpy(to, from, copy_bytes);
486
487 UPDATE_HEAD(udev->data_tail, copy_bytes, udev->data_size);
488
489 /* Uh oh, wrapped the data buffer for this sg's data */
490 if (sg->length != copy_bytes) {
491 from = (void *) udev->mb_addr + udev->data_off + udev->data_tail;
492 WARN_ON(udev->data_tail);
493 to += copy_bytes;
494 copy_bytes = sg->length - copy_bytes;
495 tcmu_flush_dcache_range(from, copy_bytes);
496 memcpy(to, from, copy_bytes);
497
498 UPDATE_HEAD(udev->data_tail, copy_bytes, udev->data_size);
499 }
500
501 kunmap_atomic(to);
502 }
503
504 } else if (se_cmd->data_direction == DMA_TO_DEVICE) {
505 UPDATE_HEAD(udev->data_tail, cmd->data_length, udev->data_size);
506 } else {
507 pr_warn("TCMU: data direction was %d!\n", se_cmd->data_direction);
508 }
509
510 target_complete_cmd(cmd->se_cmd, entry->rsp.scsi_status);
511 cmd->se_cmd = NULL;
512
513 kmem_cache_free(tcmu_cmd_cache, cmd);
514}
515
516static unsigned int tcmu_handle_completions(struct tcmu_dev *udev)
517{
518 struct tcmu_mailbox *mb;
519 LIST_HEAD(cpl_cmds);
520 unsigned long flags;
521 int handled = 0;
522
523 if (test_bit(TCMU_DEV_BIT_BROKEN, &udev->flags)) {
524 pr_err("ring broken, not handling completions\n");
525 return 0;
526 }
527
528 spin_lock_irqsave(&udev->cmdr_lock, flags);
529
530 mb = udev->mb_addr;
531 tcmu_flush_dcache_range(mb, sizeof(*mb));
532
533 while (udev->cmdr_last_cleaned != ACCESS_ONCE(mb->cmd_tail)) {
534
535 struct tcmu_cmd_entry *entry = (void *) mb + CMDR_OFF + udev->cmdr_last_cleaned;
536 struct tcmu_cmd *cmd;
537
538 tcmu_flush_dcache_range(entry, sizeof(*entry));
539
540 if (tcmu_hdr_get_op(&entry->hdr) == TCMU_OP_PAD) {
541 UPDATE_HEAD(udev->cmdr_last_cleaned, tcmu_hdr_get_len(&entry->hdr), udev->cmdr_size);
542 continue;
543 }
544 WARN_ON(tcmu_hdr_get_op(&entry->hdr) != TCMU_OP_CMD);
545
546 spin_lock(&udev->commands_lock);
547 cmd = idr_find(&udev->commands, entry->cmd_id);
548 if (cmd)
549 idr_remove(&udev->commands, cmd->cmd_id);
550 spin_unlock(&udev->commands_lock);
551
552 if (!cmd) {
553 pr_err("cmd_id not found, ring is broken\n");
554 set_bit(TCMU_DEV_BIT_BROKEN, &udev->flags);
555 break;
556 }
557
558 tcmu_handle_completion(cmd, entry);
559
560 UPDATE_HEAD(udev->cmdr_last_cleaned, tcmu_hdr_get_len(&entry->hdr), udev->cmdr_size);
561
562 handled++;
563 }
564
565 if (mb->cmd_tail == mb->cmd_head)
566 del_timer(&udev->timeout); /* no more pending cmds */
567
568 spin_unlock_irqrestore(&udev->cmdr_lock, flags);
569
570 wake_up(&udev->wait_cmdr);
571
572 return handled;
573}
574
575static int tcmu_check_expired_cmd(int id, void *p, void *data)
576{
577 struct tcmu_cmd *cmd = p;
578
579 if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags))
580 return 0;
581
582 if (!time_after(cmd->deadline, jiffies))
583 return 0;
584
585 set_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags);
586 target_complete_cmd(cmd->se_cmd, SAM_STAT_CHECK_CONDITION);
587 cmd->se_cmd = NULL;
588
589 kmem_cache_free(tcmu_cmd_cache, cmd);
590
591 return 0;
592}
593
594static void tcmu_device_timedout(unsigned long data)
595{
596 struct tcmu_dev *udev = (struct tcmu_dev *)data;
597 unsigned long flags;
598 int handled;
599
600 handled = tcmu_handle_completions(udev);
601
602 pr_warn("%d completions handled from timeout\n", handled);
603
604 spin_lock_irqsave(&udev->commands_lock, flags);
605 idr_for_each(&udev->commands, tcmu_check_expired_cmd, NULL);
606 spin_unlock_irqrestore(&udev->commands_lock, flags);
607
608 /*
609 * We don't need to wakeup threads on wait_cmdr since they have their
610 * own timeout.
611 */
612}
613
614static int tcmu_attach_hba(struct se_hba *hba, u32 host_id)
615{
616 struct tcmu_hba *tcmu_hba;
617
618 tcmu_hba = kzalloc(sizeof(struct tcmu_hba), GFP_KERNEL);
619 if (!tcmu_hba)
620 return -ENOMEM;
621
622 tcmu_hba->host_id = host_id;
623 hba->hba_ptr = tcmu_hba;
624
625 return 0;
626}
627
628static void tcmu_detach_hba(struct se_hba *hba)
629{
630 kfree(hba->hba_ptr);
631 hba->hba_ptr = NULL;
632}
633
634static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name)
635{
636 struct tcmu_dev *udev;
637
638 udev = kzalloc(sizeof(struct tcmu_dev), GFP_KERNEL);
639 if (!udev)
640 return NULL;
641
642 udev->name = kstrdup(name, GFP_KERNEL);
643 if (!udev->name) {
644 kfree(udev);
645 return NULL;
646 }
647
648 udev->hba = hba;
649
650 init_waitqueue_head(&udev->wait_cmdr);
651 spin_lock_init(&udev->cmdr_lock);
652
653 idr_init(&udev->commands);
654 spin_lock_init(&udev->commands_lock);
655
656 setup_timer(&udev->timeout, tcmu_device_timedout,
657 (unsigned long)udev);
658
659 udev->pass_level = TCMU_PASS_ALL;
660
661 return &udev->se_dev;
662}
663
664static int tcmu_irqcontrol(struct uio_info *info, s32 irq_on)
665{
666 struct tcmu_dev *tcmu_dev = container_of(info, struct tcmu_dev, uio_info);
667
668 tcmu_handle_completions(tcmu_dev);
669
670 return 0;
671}
672
673/*
674 * mmap code from uio.c. Copied here because we want to hook mmap()
675 * and this stuff must come along.
676 */
677static int tcmu_find_mem_index(struct vm_area_struct *vma)
678{
679 struct tcmu_dev *udev = vma->vm_private_data;
680 struct uio_info *info = &udev->uio_info;
681
682 if (vma->vm_pgoff < MAX_UIO_MAPS) {
683 if (info->mem[vma->vm_pgoff].size == 0)
684 return -1;
685 return (int)vma->vm_pgoff;
686 }
687 return -1;
688}
689
690static int tcmu_vma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
691{
692 struct tcmu_dev *udev = vma->vm_private_data;
693 struct uio_info *info = &udev->uio_info;
694 struct page *page;
695 unsigned long offset;
696 void *addr;
697
698 int mi = tcmu_find_mem_index(vma);
699 if (mi < 0)
700 return VM_FAULT_SIGBUS;
701
702 /*
703 * We need to subtract mi because userspace uses offset = N*PAGE_SIZE
704 * to use mem[N].
705 */
706 offset = (vmf->pgoff - mi) << PAGE_SHIFT;
707
708 addr = (void *)(unsigned long)info->mem[mi].addr + offset;
709 if (info->mem[mi].memtype == UIO_MEM_LOGICAL)
710 page = virt_to_page(addr);
711 else
712 page = vmalloc_to_page(addr);
713 get_page(page);
714 vmf->page = page;
715 return 0;
716}
717
718static const struct vm_operations_struct tcmu_vm_ops = {
719 .fault = tcmu_vma_fault,
720};
721
722static int tcmu_mmap(struct uio_info *info, struct vm_area_struct *vma)
723{
724 struct tcmu_dev *udev = container_of(info, struct tcmu_dev, uio_info);
725
726 vma->vm_flags |= VM_DONTEXPAND | VM_DONTDUMP;
727 vma->vm_ops = &tcmu_vm_ops;
728
729 vma->vm_private_data = udev;
730
731 /* Ensure the mmap is exactly the right size */
732 if (vma_pages(vma) != (TCMU_RING_SIZE >> PAGE_SHIFT))
733 return -EINVAL;
734
735 return 0;
736}
737
738static int tcmu_open(struct uio_info *info, struct inode *inode)
739{
740 struct tcmu_dev *udev = container_of(info, struct tcmu_dev, uio_info);
741
742 /* O_EXCL not supported for char devs, so fake it? */
743 if (test_and_set_bit(TCMU_DEV_BIT_OPEN, &udev->flags))
744 return -EBUSY;
745
746 pr_debug("open\n");
747
748 return 0;
749}
750
751static int tcmu_release(struct uio_info *info, struct inode *inode)
752{
753 struct tcmu_dev *udev = container_of(info, struct tcmu_dev, uio_info);
754
755 clear_bit(TCMU_DEV_BIT_OPEN, &udev->flags);
756
757 pr_debug("close\n");
758
759 return 0;
760}
761
762static int tcmu_netlink_event(enum tcmu_genl_cmd cmd, const char *name, int minor)
763{
764 struct sk_buff *skb;
765 void *msg_header;
766 int ret;
767
768 skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
769 if (!skb)
770 return -ENOMEM;
771
772 msg_header = genlmsg_put(skb, 0, 0, &tcmu_genl_family, 0, cmd);
773 if (!msg_header) {
774 nlmsg_free(skb);
775 return -ENOMEM;
776 }
777
778 ret = nla_put_string(skb, TCMU_ATTR_DEVICE, name);
779
780 ret = nla_put_u32(skb, TCMU_ATTR_MINOR, minor);
781
782 ret = genlmsg_end(skb, msg_header);
783 if (ret < 0) {
784 nlmsg_free(skb);
785 return ret;
786 }
787
788 ret = genlmsg_multicast(&tcmu_genl_family, skb, 0,
789 TCMU_MCGRP_CONFIG, GFP_KERNEL);
790
791 /* We don't care if no one is listening */
792 if (ret == -ESRCH)
793 ret = 0;
794
795 return ret;
796}
797
798static int tcmu_configure_device(struct se_device *dev)
799{
800 struct tcmu_dev *udev = TCMU_DEV(dev);
801 struct tcmu_hba *hba = udev->hba->hba_ptr;
802 struct uio_info *info;
803 struct tcmu_mailbox *mb;
804 size_t size;
805 size_t used;
806 int ret = 0;
807 char *str;
808
809 info = &udev->uio_info;
810
811 size = snprintf(NULL, 0, "tcm-user/%u/%s/%s", hba->host_id, udev->name,
812 udev->dev_config);
813 size += 1; /* for \0 */
814 str = kmalloc(size, GFP_KERNEL);
815 if (!str)
816 return -ENOMEM;
817
818 used = snprintf(str, size, "tcm-user/%u/%s", hba->host_id, udev->name);
819
820 if (udev->dev_config[0])
821 snprintf(str + used, size - used, "/%s", udev->dev_config);
822
823 info->name = str;
824
825 udev->mb_addr = vzalloc(TCMU_RING_SIZE);
826 if (!udev->mb_addr) {
827 ret = -ENOMEM;
828 goto err_vzalloc;
829 }
830
831 /* mailbox fits in first part of CMDR space */
832 udev->cmdr_size = CMDR_SIZE - CMDR_OFF;
833 udev->data_off = CMDR_SIZE;
834 udev->data_size = TCMU_RING_SIZE - CMDR_SIZE;
835
836 mb = udev->mb_addr;
837 mb->version = 1;
838 mb->cmdr_off = CMDR_OFF;
839 mb->cmdr_size = udev->cmdr_size;
840
841 WARN_ON(!PAGE_ALIGNED(udev->data_off));
842 WARN_ON(udev->data_size % PAGE_SIZE);
843
844 info->version = "1";
845
846 info->mem[0].name = "tcm-user command & data buffer";
847 info->mem[0].addr = (phys_addr_t) udev->mb_addr;
848 info->mem[0].size = TCMU_RING_SIZE;
849 info->mem[0].memtype = UIO_MEM_VIRTUAL;
850
851 info->irqcontrol = tcmu_irqcontrol;
852 info->irq = UIO_IRQ_CUSTOM;
853
854 info->mmap = tcmu_mmap;
855 info->open = tcmu_open;
856 info->release = tcmu_release;
857
858 ret = uio_register_device(tcmu_root_device, info);
859 if (ret)
860 goto err_register;
861
862 /* Other attributes can be configured in userspace */
863 dev->dev_attrib.hw_block_size = 512;
864 dev->dev_attrib.hw_max_sectors = 128;
865 dev->dev_attrib.hw_queue_depth = 128;
866
867 ret = tcmu_netlink_event(TCMU_CMD_ADDED_DEVICE, udev->uio_info.name,
868 udev->uio_info.uio_dev->minor);
869 if (ret)
870 goto err_netlink;
871
872 return 0;
873
874err_netlink:
875 uio_unregister_device(&udev->uio_info);
876err_register:
877 vfree(udev->mb_addr);
878err_vzalloc:
879 kfree(info->name);
880
881 return ret;
882}
883
884static int tcmu_check_pending_cmd(int id, void *p, void *data)
885{
886 struct tcmu_cmd *cmd = p;
887
888 if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags))
889 return 0;
890 return -EINVAL;
891}
892
893static void tcmu_free_device(struct se_device *dev)
894{
895 struct tcmu_dev *udev = TCMU_DEV(dev);
896 int i;
897
898 del_timer_sync(&udev->timeout);
899
900 vfree(udev->mb_addr);
901
902 /* Upper layer should drain all requests before calling this */
903 spin_lock_irq(&udev->commands_lock);
904 i = idr_for_each(&udev->commands, tcmu_check_pending_cmd, NULL);
905 idr_destroy(&udev->commands);
906 spin_unlock_irq(&udev->commands_lock);
907 WARN_ON(i);
908
909 /* Device was configured */
910 if (udev->uio_info.uio_dev) {
911 tcmu_netlink_event(TCMU_CMD_REMOVED_DEVICE, udev->uio_info.name,
912 udev->uio_info.uio_dev->minor);
913
914 uio_unregister_device(&udev->uio_info);
915 kfree(udev->uio_info.name);
916 kfree(udev->name);
917 }
918
919 kfree(udev);
920}
921
922enum {
923 Opt_dev_config, Opt_dev_size, Opt_err, Opt_pass_level,
924};
925
926static match_table_t tokens = {
927 {Opt_dev_config, "dev_config=%s"},
928 {Opt_dev_size, "dev_size=%u"},
929 {Opt_pass_level, "pass_level=%u"},
930 {Opt_err, NULL}
931};
932
933static ssize_t tcmu_set_configfs_dev_params(struct se_device *dev,
934 const char *page, ssize_t count)
935{
936 struct tcmu_dev *udev = TCMU_DEV(dev);
937 char *orig, *ptr, *opts, *arg_p;
938 substring_t args[MAX_OPT_ARGS];
939 int ret = 0, token;
940 int arg;
941
942 opts = kstrdup(page, GFP_KERNEL);
943 if (!opts)
944 return -ENOMEM;
945
946 orig = opts;
947
948 while ((ptr = strsep(&opts, ",\n")) != NULL) {
949 if (!*ptr)
950 continue;
951
952 token = match_token(ptr, tokens, args);
953 switch (token) {
954 case Opt_dev_config:
955 if (match_strlcpy(udev->dev_config, &args[0],
956 TCMU_CONFIG_LEN) == 0) {
957 ret = -EINVAL;
958 break;
959 }
960 pr_debug("TCMU: Referencing Path: %s\n", udev->dev_config);
961 break;
962 case Opt_dev_size:
963 arg_p = match_strdup(&args[0]);
964 if (!arg_p) {
965 ret = -ENOMEM;
966 break;
967 }
968 ret = kstrtoul(arg_p, 0, (unsigned long *) &udev->dev_size);
969 kfree(arg_p);
970 if (ret < 0)
971 pr_err("kstrtoul() failed for dev_size=\n");
972 break;
973 case Opt_pass_level:
974 match_int(args, &arg);
975 if (arg >= TCMU_PASS_INVALID) {
976 pr_warn("TCMU: Invalid pass_level: %d\n", arg);
977 break;
978 }
979
980 pr_debug("TCMU: Setting pass_level to %d\n", arg);
981 udev->pass_level = arg;
982 break;
983 default:
984 break;
985 }
986 }
987
988 kfree(orig);
989 return (!ret) ? count : ret;
990}
991
992static ssize_t tcmu_show_configfs_dev_params(struct se_device *dev, char *b)
993{
994 struct tcmu_dev *udev = TCMU_DEV(dev);
995 ssize_t bl = 0;
996
997 bl = sprintf(b + bl, "Config: %s ",
998 udev->dev_config[0] ? udev->dev_config : "NULL");
999 bl += sprintf(b + bl, "Size: %zu PassLevel: %u\n",
1000 udev->dev_size, udev->pass_level);
1001
1002 return bl;
1003}
1004
1005static sector_t tcmu_get_blocks(struct se_device *dev)
1006{
1007 struct tcmu_dev *udev = TCMU_DEV(dev);
1008
1009 return div_u64(udev->dev_size - dev->dev_attrib.block_size,
1010 dev->dev_attrib.block_size);
1011}
1012
1013static sense_reason_t
1014tcmu_execute_rw(struct se_cmd *se_cmd, struct scatterlist *sgl, u32 sgl_nents,
1015 enum dma_data_direction data_direction)
1016{
1017 int ret;
1018
1019 ret = tcmu_queue_cmd(se_cmd);
1020
1021 if (ret != 0)
1022 return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
1023 else
1024 return TCM_NO_SENSE;
1025}
1026
1027static sense_reason_t
1028tcmu_pass_op(struct se_cmd *se_cmd)
1029{
1030 int ret = tcmu_queue_cmd(se_cmd);
1031
1032 if (ret != 0)
1033 return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
1034 else
1035 return TCM_NO_SENSE;
1036}
1037
1038static struct sbc_ops tcmu_sbc_ops = {
1039 .execute_rw = tcmu_execute_rw,
1040 .execute_sync_cache = tcmu_pass_op,
1041 .execute_write_same = tcmu_pass_op,
1042 .execute_write_same_unmap = tcmu_pass_op,
1043 .execute_unmap = tcmu_pass_op,
1044};
1045
1046static sense_reason_t
1047tcmu_parse_cdb(struct se_cmd *cmd)
1048{
1049 unsigned char *cdb = cmd->t_task_cdb;
1050 struct tcmu_dev *udev = TCMU_DEV(cmd->se_dev);
1051 sense_reason_t ret;
1052
1053 switch (udev->pass_level) {
1054 case TCMU_PASS_ALL:
1055 /* We're just like pscsi, then */
1056 /*
1057 * For REPORT LUNS we always need to emulate the response, for everything
1058 * else, pass it up.
1059 */
1060 switch (cdb[0]) {
1061 case REPORT_LUNS:
1062 cmd->execute_cmd = spc_emulate_report_luns;
1063 break;
1064 case READ_6:
1065 case READ_10:
1066 case READ_12:
1067 case READ_16:
1068 case WRITE_6:
1069 case WRITE_10:
1070 case WRITE_12:
1071 case WRITE_16:
1072 case WRITE_VERIFY:
1073 cmd->se_cmd_flags |= SCF_SCSI_DATA_CDB;
1074 /* FALLTHROUGH */
1075 default:
1076 cmd->execute_cmd = tcmu_pass_op;
1077 }
1078 ret = TCM_NO_SENSE;
1079 break;
1080 case TCMU_PASS_IO:
1081 ret = sbc_parse_cdb(cmd, &tcmu_sbc_ops);
1082 break;
1083 default:
1084 pr_err("Unknown tcm-user pass level %d\n", udev->pass_level);
1085 ret = TCM_CHECK_CONDITION_ABORT_CMD;
1086 }
1087
1088 return ret;
1089}
1090
1091static struct se_subsystem_api tcmu_template = {
1092 .name = "user",
1093 .inquiry_prod = "USER",
1094 .inquiry_rev = TCMU_VERSION,
1095 .owner = THIS_MODULE,
1096 .transport_type = TRANSPORT_PLUGIN_VHBA_PDEV,
1097 .attach_hba = tcmu_attach_hba,
1098 .detach_hba = tcmu_detach_hba,
1099 .alloc_device = tcmu_alloc_device,
1100 .configure_device = tcmu_configure_device,
1101 .free_device = tcmu_free_device,
1102 .parse_cdb = tcmu_parse_cdb,
1103 .set_configfs_dev_params = tcmu_set_configfs_dev_params,
1104 .show_configfs_dev_params = tcmu_show_configfs_dev_params,
1105 .get_device_type = sbc_get_device_type,
1106 .get_blocks = tcmu_get_blocks,
1107};
1108
1109static int __init tcmu_module_init(void)
1110{
1111 int ret;
1112
1113 BUILD_BUG_ON((sizeof(struct tcmu_cmd_entry) % TCMU_OP_ALIGN_SIZE) != 0);
1114
1115 tcmu_cmd_cache = kmem_cache_create("tcmu_cmd_cache",
1116 sizeof(struct tcmu_cmd),
1117 __alignof__(struct tcmu_cmd),
1118 0, NULL);
1119 if (!tcmu_cmd_cache)
1120 return -ENOMEM;
1121
1122 tcmu_root_device = root_device_register("tcm_user");
1123 if (IS_ERR(tcmu_root_device)) {
1124 ret = PTR_ERR(tcmu_root_device);
1125 goto out_free_cache;
1126 }
1127
1128 ret = genl_register_family(&tcmu_genl_family);
1129 if (ret < 0) {
1130 goto out_unreg_device;
1131 }
1132
1133 ret = transport_subsystem_register(&tcmu_template);
1134 if (ret)
1135 goto out_unreg_genl;
1136
1137 return 0;
1138
1139out_unreg_genl:
1140 genl_unregister_family(&tcmu_genl_family);
1141out_unreg_device:
1142 root_device_unregister(tcmu_root_device);
1143out_free_cache:
1144 kmem_cache_destroy(tcmu_cmd_cache);
1145
1146 return ret;
1147}
1148
1149static void __exit tcmu_module_exit(void)
1150{
1151 transport_subsystem_release(&tcmu_template);
1152 genl_unregister_family(&tcmu_genl_family);
1153 root_device_unregister(tcmu_root_device);
1154 kmem_cache_destroy(tcmu_cmd_cache);
1155}
1156
1157MODULE_DESCRIPTION("TCM USER subsystem plugin");
1158MODULE_AUTHOR("Shaohua Li <shli@kernel.org>");
1159MODULE_AUTHOR("Andy Grover <agrover@redhat.com>");
1160MODULE_LICENSE("GPL");
1161
1162module_init(tcmu_module_init);
1163module_exit(tcmu_module_exit);
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index be88166349a1..6ebd0d1faf2e 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -371,6 +371,7 @@ header-y += swab.h
371header-y += synclink.h 371header-y += synclink.h
372header-y += sysctl.h 372header-y += sysctl.h
373header-y += sysinfo.h 373header-y += sysinfo.h
374header-y += target_core_user.h
374header-y += taskstats.h 375header-y += taskstats.h
375header-y += tcp.h 376header-y += tcp.h
376header-y += tcp_metrics.h 377header-y += tcp_metrics.h
diff --git a/include/uapi/linux/target_core_user.h b/include/uapi/linux/target_core_user.h
new file mode 100644
index 000000000000..7dcfbe6771b1
--- /dev/null
+++ b/include/uapi/linux/target_core_user.h
@@ -0,0 +1,142 @@
1#ifndef __TARGET_CORE_USER_H
2#define __TARGET_CORE_USER_H
3
4/* This header will be used by application too */
5
6#include <linux/types.h>
7#include <linux/uio.h>
8
9#ifndef __packed
10#define __packed __attribute__((packed))
11#endif
12
13#define TCMU_VERSION "1.0"
14
15/*
16 * Ring Design
17 * -----------
18 *
19 * The mmaped area is divided into three parts:
20 * 1) The mailbox (struct tcmu_mailbox, below)
21 * 2) The command ring
22 * 3) Everything beyond the command ring (data)
23 *
24 * The mailbox tells userspace the offset of the command ring from the
25 * start of the shared memory region, and how big the command ring is.
26 *
27 * The kernel passes SCSI commands to userspace by putting a struct
28 * tcmu_cmd_entry in the ring, updating mailbox->cmd_head, and poking
29 * userspace via uio's interrupt mechanism.
30 *
31 * tcmu_cmd_entry contains a header. If the header type is PAD,
32 * userspace should skip hdr->length bytes (mod cmdr_size) to find the
33 * next cmd_entry.
34 *
35 * Otherwise, the entry will contain offsets into the mmaped area that
36 * contain the cdb and data buffers -- the latter accessible via the
37 * iov array. iov addresses are also offsets into the shared area.
38 *
39 * When userspace is completed handling the command, set
40 * entry->rsp.scsi_status, fill in rsp.sense_buffer if appropriate,
41 * and also set mailbox->cmd_tail equal to the old cmd_tail plus
42 * hdr->length, mod cmdr_size. If cmd_tail doesn't equal cmd_head, it
43 * should process the next packet the same way, and so on.
44 */
45
46#define TCMU_MAILBOX_VERSION 1
47#define ALIGN_SIZE 64 /* Should be enough for most CPUs */
48
49struct tcmu_mailbox {
50 __u16 version;
51 __u16 flags;
52 __u32 cmdr_off;
53 __u32 cmdr_size;
54
55 __u32 cmd_head;
56
57 /* Updated by user. On its own cacheline */
58 __u32 cmd_tail __attribute__((__aligned__(ALIGN_SIZE)));
59
60} __packed;
61
62enum tcmu_opcode {
63 TCMU_OP_PAD = 0,
64 TCMU_OP_CMD,
65};
66
67/*
68 * Only a few opcodes, and length is 8-byte aligned, so use low bits for opcode.
69 */
70struct tcmu_cmd_entry_hdr {
71 __u32 len_op;
72} __packed;
73
74#define TCMU_OP_MASK 0x7
75
76static inline enum tcmu_opcode tcmu_hdr_get_op(struct tcmu_cmd_entry_hdr *hdr)
77{
78 return hdr->len_op & TCMU_OP_MASK;
79}
80
81static inline void tcmu_hdr_set_op(struct tcmu_cmd_entry_hdr *hdr, enum tcmu_opcode op)
82{
83 hdr->len_op &= ~TCMU_OP_MASK;
84 hdr->len_op |= (op & TCMU_OP_MASK);
85}
86
87static inline __u32 tcmu_hdr_get_len(struct tcmu_cmd_entry_hdr *hdr)
88{
89 return hdr->len_op & ~TCMU_OP_MASK;
90}
91
92static inline void tcmu_hdr_set_len(struct tcmu_cmd_entry_hdr *hdr, __u32 len)
93{
94 hdr->len_op &= TCMU_OP_MASK;
95 hdr->len_op |= len;
96}
97
98/* Currently the same as SCSI_SENSE_BUFFERSIZE */
99#define TCMU_SENSE_BUFFERSIZE 96
100
101struct tcmu_cmd_entry {
102 struct tcmu_cmd_entry_hdr hdr;
103
104 uint16_t cmd_id;
105 uint16_t __pad1;
106
107 union {
108 struct {
109 uint64_t cdb_off;
110 uint64_t iov_cnt;
111 struct iovec iov[0];
112 } req;
113 struct {
114 uint8_t scsi_status;
115 uint8_t __pad1;
116 uint16_t __pad2;
117 uint32_t __pad3;
118 char sense_buffer[TCMU_SENSE_BUFFERSIZE];
119 } rsp;
120 };
121
122} __packed;
123
124#define TCMU_OP_ALIGN_SIZE sizeof(uint64_t)
125
126enum tcmu_genl_cmd {
127 TCMU_CMD_UNSPEC,
128 TCMU_CMD_ADDED_DEVICE,
129 TCMU_CMD_REMOVED_DEVICE,
130 __TCMU_CMD_MAX,
131};
132#define TCMU_CMD_MAX (__TCMU_CMD_MAX - 1)
133
134enum tcmu_genl_attr {
135 TCMU_ATTR_UNSPEC,
136 TCMU_ATTR_DEVICE,
137 TCMU_ATTR_MINOR,
138 __TCMU_ATTR_MAX,
139};
140#define TCMU_ATTR_MAX (__TCMU_ATTR_MAX - 1)
141
142#endif