aboutsummaryrefslogtreecommitdiffstats
path: root/include/uapi
diff options
context:
space:
mode:
authorAndy Grover <agrover@redhat.com>2014-10-01 19:07:05 -0400
committerNicholas Bellinger <nab@linux-iscsi.org>2014-10-03 14:15:20 -0400
commit7c9e7a6fe11c8dc5b3b9d0e889dde73347247584 (patch)
tree9c4c3a753228617e308226f47dc6d4fe83ddf15d /include/uapi
parentce87685128f3e0fced2aca9f73fc8cc67704ae11 (diff)
target: Add a user-passthrough backstore
Add a LIO storage engine that presents commands to userspace for execution. This would allow more complex backstores to be implemented out-of-kernel, and also make experimentation a-la FUSE (but at the SCSI level -- "SUSE"?) possible. It uses a mmap()able UIO device per LUN to share a command ring and data area. The commands are raw SCSI CDBs and iovs for in/out data. The command ring is also reused for returning scsi command status and optional sense data. This implementation is based on Shaohua Li's earlier version but heavily modified. Differences include: * Shared memory allocated by kernel, not locked-down user pages * Single ring for command request and response * Offsets instead of embedded pointers * Generic SCSI CDB passthrough instead of per-cmd specialization in ring format. * Uses UIO device instead of anon_file passed in mailbox. * Optional in-kernel handling of some commands. The main reason for these differences is to permit greater resiliency if the user process dies or hangs. Things not yet implemented (on purpose): * Zero copy. The data area is flexible enough to allow page flipping or backend-allocated pages to be used by fabrics, but it's not clear these are performance wins. Can come later. * Out-of-order command completion by userspace. Possible to add by just allowing userspace to change cmd_id in rsp cmd entries, but currently not supported. * No locks between kernel cmd submission and completion routines. Sounds like it's possible, but this can come later. * Sparse allocation of mmaped area. Current code vmallocs the whole thing. If the mapped area was larger and not fully mapped then the driver would have more freedom to change cmd and data area sizes based on demand. Current code open issues: * The use of idrs may be overkill -- we maybe can replace them with a simple counter to generate cmd_ids, and a hash table to get a cmd_id's associated pointer. * Use of a free-running counter for cmd ring instead of explicit modulo math. This would require power-of-2 cmd ring size. (Add kconfig depends NET - Randy) Signed-off-by: Andy Grover <agrover@redhat.com> Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
Diffstat (limited to 'include/uapi')
-rw-r--r--include/uapi/linux/Kbuild1
-rw-r--r--include/uapi/linux/target_core_user.h142
2 files changed, 143 insertions, 0 deletions
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index be88166349a1..6ebd0d1faf2e 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -371,6 +371,7 @@ header-y += swab.h
371header-y += synclink.h 371header-y += synclink.h
372header-y += sysctl.h 372header-y += sysctl.h
373header-y += sysinfo.h 373header-y += sysinfo.h
374header-y += target_core_user.h
374header-y += taskstats.h 375header-y += taskstats.h
375header-y += tcp.h 376header-y += tcp.h
376header-y += tcp_metrics.h 377header-y += tcp_metrics.h
diff --git a/include/uapi/linux/target_core_user.h b/include/uapi/linux/target_core_user.h
new file mode 100644
index 000000000000..7dcfbe6771b1
--- /dev/null
+++ b/include/uapi/linux/target_core_user.h
@@ -0,0 +1,142 @@
1#ifndef __TARGET_CORE_USER_H
2#define __TARGET_CORE_USER_H
3
4/* This header will be used by application too */
5
6#include <linux/types.h>
7#include <linux/uio.h>
8
9#ifndef __packed
10#define __packed __attribute__((packed))
11#endif
12
13#define TCMU_VERSION "1.0"
14
15/*
16 * Ring Design
17 * -----------
18 *
19 * The mmaped area is divided into three parts:
20 * 1) The mailbox (struct tcmu_mailbox, below)
21 * 2) The command ring
22 * 3) Everything beyond the command ring (data)
23 *
24 * The mailbox tells userspace the offset of the command ring from the
25 * start of the shared memory region, and how big the command ring is.
26 *
27 * The kernel passes SCSI commands to userspace by putting a struct
28 * tcmu_cmd_entry in the ring, updating mailbox->cmd_head, and poking
29 * userspace via uio's interrupt mechanism.
30 *
31 * tcmu_cmd_entry contains a header. If the header type is PAD,
32 * userspace should skip hdr->length bytes (mod cmdr_size) to find the
33 * next cmd_entry.
34 *
35 * Otherwise, the entry will contain offsets into the mmaped area that
36 * contain the cdb and data buffers -- the latter accessible via the
37 * iov array. iov addresses are also offsets into the shared area.
38 *
39 * When userspace is completed handling the command, set
40 * entry->rsp.scsi_status, fill in rsp.sense_buffer if appropriate,
41 * and also set mailbox->cmd_tail equal to the old cmd_tail plus
42 * hdr->length, mod cmdr_size. If cmd_tail doesn't equal cmd_head, it
43 * should process the next packet the same way, and so on.
44 */
45
46#define TCMU_MAILBOX_VERSION 1
47#define ALIGN_SIZE 64 /* Should be enough for most CPUs */
48
49struct tcmu_mailbox {
50 __u16 version;
51 __u16 flags;
52 __u32 cmdr_off;
53 __u32 cmdr_size;
54
55 __u32 cmd_head;
56
57 /* Updated by user. On its own cacheline */
58 __u32 cmd_tail __attribute__((__aligned__(ALIGN_SIZE)));
59
60} __packed;
61
62enum tcmu_opcode {
63 TCMU_OP_PAD = 0,
64 TCMU_OP_CMD,
65};
66
67/*
68 * Only a few opcodes, and length is 8-byte aligned, so use low bits for opcode.
69 */
70struct tcmu_cmd_entry_hdr {
71 __u32 len_op;
72} __packed;
73
74#define TCMU_OP_MASK 0x7
75
76static inline enum tcmu_opcode tcmu_hdr_get_op(struct tcmu_cmd_entry_hdr *hdr)
77{
78 return hdr->len_op & TCMU_OP_MASK;
79}
80
81static inline void tcmu_hdr_set_op(struct tcmu_cmd_entry_hdr *hdr, enum tcmu_opcode op)
82{
83 hdr->len_op &= ~TCMU_OP_MASK;
84 hdr->len_op |= (op & TCMU_OP_MASK);
85}
86
87static inline __u32 tcmu_hdr_get_len(struct tcmu_cmd_entry_hdr *hdr)
88{
89 return hdr->len_op & ~TCMU_OP_MASK;
90}
91
92static inline void tcmu_hdr_set_len(struct tcmu_cmd_entry_hdr *hdr, __u32 len)
93{
94 hdr->len_op &= TCMU_OP_MASK;
95 hdr->len_op |= len;
96}
97
98/* Currently the same as SCSI_SENSE_BUFFERSIZE */
99#define TCMU_SENSE_BUFFERSIZE 96
100
101struct tcmu_cmd_entry {
102 struct tcmu_cmd_entry_hdr hdr;
103
104 uint16_t cmd_id;
105 uint16_t __pad1;
106
107 union {
108 struct {
109 uint64_t cdb_off;
110 uint64_t iov_cnt;
111 struct iovec iov[0];
112 } req;
113 struct {
114 uint8_t scsi_status;
115 uint8_t __pad1;
116 uint16_t __pad2;
117 uint32_t __pad3;
118 char sense_buffer[TCMU_SENSE_BUFFERSIZE];
119 } rsp;
120 };
121
122} __packed;
123
124#define TCMU_OP_ALIGN_SIZE sizeof(uint64_t)
125
126enum tcmu_genl_cmd {
127 TCMU_CMD_UNSPEC,
128 TCMU_CMD_ADDED_DEVICE,
129 TCMU_CMD_REMOVED_DEVICE,
130 __TCMU_CMD_MAX,
131};
132#define TCMU_CMD_MAX (__TCMU_CMD_MAX - 1)
133
134enum tcmu_genl_attr {
135 TCMU_ATTR_UNSPEC,
136 TCMU_ATTR_DEVICE,
137 TCMU_ATTR_MINOR,
138 __TCMU_ATTR_MAX,
139};
140#define TCMU_ATTR_MAX (__TCMU_ATTR_MAX - 1)
141
142#endif