aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@lst.de>2016-06-21 12:04:21 -0400
committerJens Axboe <axboe@fb.com>2016-07-05 13:30:36 -0400
commit3a85a5de29ea779634ddfd768059e06196687aba (patch)
treeb2c055a70538069b89089b17730e3a0ce0fa2452
parenta07b4970f464f13640e28e16dad6cfa33647cc99 (diff)
nvme-loop: add a NVMe loopback host driver
This patch implements adds nvme-loop which allows to access local devices exported as NVMe over Fabrics namespaces. This module can be useful for easy evaluation, testing and also feature experimentation. To createa nvme-loop device you need to configure the NVMe target to export a loop port (see the nvmetcli documentaton for that) and then connect to it using nvme connect-all -t loop which requires the very latest nvme-cli version with Fabrics support. Signed-off-by: Jay Freyensee <james.p.freyensee@intel.com> Signed-off-by: Ming Lin <ming.l@ssi.samsung.com> Signed-off-by: Sagi Grimberg <sagi@grimberg.me> Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Steve Wise <swise@opengridcomputing.com> Signed-off-by: Jens Axboe <axboe@fb.com>
-rw-r--r--drivers/nvme/target/Kconfig10
-rw-r--r--drivers/nvme/target/Makefile2
-rw-r--r--drivers/nvme/target/loop.c754
3 files changed, 766 insertions, 0 deletions
diff --git a/drivers/nvme/target/Kconfig b/drivers/nvme/target/Kconfig
index acf0c070e50d..b77ce4350695 100644
--- a/drivers/nvme/target/Kconfig
+++ b/drivers/nvme/target/Kconfig
@@ -13,4 +13,14 @@ config NVME_TARGET
13 To configure the NVMe target you probably want to use the nvmetcli 13 To configure the NVMe target you probably want to use the nvmetcli
14 tool from http://git.infradead.org/users/hch/nvmetcli.git. 14 tool from http://git.infradead.org/users/hch/nvmetcli.git.
15 15
16config NVME_TARGET_LOOP
17 tristate "NVMe loopback device support"
18 depends on BLK_DEV_NVME
19 select NVME_TARGET
20 select NVME_FABRICS
21 select SG_POOL
22 help
23 This enables the NVMe loopback device support, which can be useful
24 to test NVMe host and target side features.
25
16 If unsure, say N. 26 If unsure, say N.
diff --git a/drivers/nvme/target/Makefile b/drivers/nvme/target/Makefile
index b4600b6f5724..e49ba60756d4 100644
--- a/drivers/nvme/target/Makefile
+++ b/drivers/nvme/target/Makefile
@@ -1,5 +1,7 @@
1 1
2obj-$(CONFIG_NVME_TARGET) += nvmet.o 2obj-$(CONFIG_NVME_TARGET) += nvmet.o
3obj-$(CONFIG_NVME_TARGET_LOOP) += nvme-loop.o
3 4
4nvmet-y += core.o configfs.o admin-cmd.o io-cmd.o fabrics-cmd.o \ 5nvmet-y += core.o configfs.o admin-cmd.o io-cmd.o fabrics-cmd.o \
5 discovery.o 6 discovery.o
7nvme-loop-y += loop.o
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
new file mode 100644
index 000000000000..94e782987cc9
--- /dev/null
+++ b/drivers/nvme/target/loop.c
@@ -0,0 +1,754 @@
1/*
2 * NVMe over Fabrics loopback device.
3 * Copyright (c) 2015-2016 HGST, a Western Digital Company.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 */
14#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15#include <linux/scatterlist.h>
16#include <linux/delay.h>
17#include <linux/blk-mq.h>
18#include <linux/nvme.h>
19#include <linux/module.h>
20#include <linux/parser.h>
21#include <linux/t10-pi.h>
22#include "nvmet.h"
23#include "../host/nvme.h"
24#include "../host/fabrics.h"
25
26#define NVME_LOOP_AQ_DEPTH 256
27
28#define NVME_LOOP_MAX_SEGMENTS 256
29
30/*
31 * We handle AEN commands ourselves and don't even let the
32 * block layer know about them.
33 */
34#define NVME_LOOP_NR_AEN_COMMANDS 1
35#define NVME_LOOP_AQ_BLKMQ_DEPTH \
36 (NVME_LOOP_AQ_DEPTH - NVME_LOOP_NR_AEN_COMMANDS)
37
38struct nvme_loop_iod {
39 struct nvme_command cmd;
40 struct nvme_completion rsp;
41 struct nvmet_req req;
42 struct nvme_loop_queue *queue;
43 struct work_struct work;
44 struct sg_table sg_table;
45 struct scatterlist first_sgl[];
46};
47
48struct nvme_loop_ctrl {
49 spinlock_t lock;
50 struct nvme_loop_queue *queues;
51 u32 queue_count;
52
53 struct blk_mq_tag_set admin_tag_set;
54
55 struct list_head list;
56 u64 cap;
57 struct blk_mq_tag_set tag_set;
58 struct nvme_loop_iod async_event_iod;
59 struct nvme_ctrl ctrl;
60
61 struct nvmet_ctrl *target_ctrl;
62 struct work_struct delete_work;
63 struct work_struct reset_work;
64};
65
66static inline struct nvme_loop_ctrl *to_loop_ctrl(struct nvme_ctrl *ctrl)
67{
68 return container_of(ctrl, struct nvme_loop_ctrl, ctrl);
69}
70
71struct nvme_loop_queue {
72 struct nvmet_cq nvme_cq;
73 struct nvmet_sq nvme_sq;
74 struct nvme_loop_ctrl *ctrl;
75};
76
77static struct nvmet_port *nvmet_loop_port;
78
79static LIST_HEAD(nvme_loop_ctrl_list);
80static DEFINE_MUTEX(nvme_loop_ctrl_mutex);
81
82static void nvme_loop_queue_response(struct nvmet_req *nvme_req);
83static void nvme_loop_delete_ctrl(struct nvmet_ctrl *ctrl);
84
85static struct nvmet_fabrics_ops nvme_loop_ops;
86
87static inline int nvme_loop_queue_idx(struct nvme_loop_queue *queue)
88{
89 return queue - queue->ctrl->queues;
90}
91
92static void nvme_loop_complete_rq(struct request *req)
93{
94 struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req);
95 int error = 0;
96
97 nvme_cleanup_cmd(req);
98 sg_free_table_chained(&iod->sg_table, true);
99
100 if (unlikely(req->errors)) {
101 if (nvme_req_needs_retry(req, req->errors)) {
102 nvme_requeue_req(req);
103 return;
104 }
105
106 if (req->cmd_type == REQ_TYPE_DRV_PRIV)
107 error = req->errors;
108 else
109 error = nvme_error_status(req->errors);
110 }
111
112 blk_mq_end_request(req, error);
113}
114
115static void nvme_loop_queue_response(struct nvmet_req *nvme_req)
116{
117 struct nvme_loop_iod *iod =
118 container_of(nvme_req, struct nvme_loop_iod, req);
119 struct nvme_completion *cqe = &iod->rsp;
120
121 /*
122 * AEN requests are special as they don't time out and can
123 * survive any kind of queue freeze and often don't respond to
124 * aborts. We don't even bother to allocate a struct request
125 * for them but rather special case them here.
126 */
127 if (unlikely(nvme_loop_queue_idx(iod->queue) == 0 &&
128 cqe->command_id >= NVME_LOOP_AQ_BLKMQ_DEPTH)) {
129 nvme_complete_async_event(&iod->queue->ctrl->ctrl, cqe);
130 } else {
131 struct request *req = blk_mq_rq_from_pdu(iod);
132
133 if (req->cmd_type == REQ_TYPE_DRV_PRIV && req->special)
134 memcpy(req->special, cqe, sizeof(*cqe));
135 blk_mq_complete_request(req, le16_to_cpu(cqe->status) >> 1);
136 }
137}
138
139static void nvme_loop_execute_work(struct work_struct *work)
140{
141 struct nvme_loop_iod *iod =
142 container_of(work, struct nvme_loop_iod, work);
143
144 iod->req.execute(&iod->req);
145}
146
147static enum blk_eh_timer_return
148nvme_loop_timeout(struct request *rq, bool reserved)
149{
150 struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(rq);
151
152 /* queue error recovery */
153 schedule_work(&iod->queue->ctrl->reset_work);
154
155 /* fail with DNR on admin cmd timeout */
156 rq->errors = NVME_SC_ABORT_REQ | NVME_SC_DNR;
157
158 return BLK_EH_HANDLED;
159}
160
161static int nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
162 const struct blk_mq_queue_data *bd)
163{
164 struct nvme_ns *ns = hctx->queue->queuedata;
165 struct nvme_loop_queue *queue = hctx->driver_data;
166 struct request *req = bd->rq;
167 struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req);
168 int ret;
169
170 ret = nvme_setup_cmd(ns, req, &iod->cmd);
171 if (ret)
172 return ret;
173
174 iod->cmd.common.flags |= NVME_CMD_SGL_METABUF;
175 iod->req.port = nvmet_loop_port;
176 if (!nvmet_req_init(&iod->req, &queue->nvme_cq,
177 &queue->nvme_sq, &nvme_loop_ops)) {
178 nvme_cleanup_cmd(req);
179 blk_mq_start_request(req);
180 nvme_loop_queue_response(&iod->req);
181 return 0;
182 }
183
184 if (blk_rq_bytes(req)) {
185 iod->sg_table.sgl = iod->first_sgl;
186 ret = sg_alloc_table_chained(&iod->sg_table,
187 req->nr_phys_segments, iod->sg_table.sgl);
188 if (ret)
189 return BLK_MQ_RQ_QUEUE_BUSY;
190
191 iod->req.sg = iod->sg_table.sgl;
192 iod->req.sg_cnt = blk_rq_map_sg(req->q, req, iod->sg_table.sgl);
193 BUG_ON(iod->req.sg_cnt > req->nr_phys_segments);
194 }
195
196 iod->cmd.common.command_id = req->tag;
197 blk_mq_start_request(req);
198
199 schedule_work(&iod->work);
200 return 0;
201}
202
203static void nvme_loop_submit_async_event(struct nvme_ctrl *arg, int aer_idx)
204{
205 struct nvme_loop_ctrl *ctrl = to_loop_ctrl(arg);
206 struct nvme_loop_queue *queue = &ctrl->queues[0];
207 struct nvme_loop_iod *iod = &ctrl->async_event_iod;
208
209 memset(&iod->cmd, 0, sizeof(iod->cmd));
210 iod->cmd.common.opcode = nvme_admin_async_event;
211 iod->cmd.common.command_id = NVME_LOOP_AQ_BLKMQ_DEPTH;
212 iod->cmd.common.flags |= NVME_CMD_SGL_METABUF;
213
214 if (!nvmet_req_init(&iod->req, &queue->nvme_cq, &queue->nvme_sq,
215 &nvme_loop_ops)) {
216 dev_err(ctrl->ctrl.device, "failed async event work\n");
217 return;
218 }
219
220 schedule_work(&iod->work);
221}
222
223static int nvme_loop_init_iod(struct nvme_loop_ctrl *ctrl,
224 struct nvme_loop_iod *iod, unsigned int queue_idx)
225{
226 BUG_ON(queue_idx >= ctrl->queue_count);
227
228 iod->req.cmd = &iod->cmd;
229 iod->req.rsp = &iod->rsp;
230 iod->queue = &ctrl->queues[queue_idx];
231 INIT_WORK(&iod->work, nvme_loop_execute_work);
232 return 0;
233}
234
235static int nvme_loop_init_request(void *data, struct request *req,
236 unsigned int hctx_idx, unsigned int rq_idx,
237 unsigned int numa_node)
238{
239 return nvme_loop_init_iod(data, blk_mq_rq_to_pdu(req), hctx_idx + 1);
240}
241
242static int nvme_loop_init_admin_request(void *data, struct request *req,
243 unsigned int hctx_idx, unsigned int rq_idx,
244 unsigned int numa_node)
245{
246 return nvme_loop_init_iod(data, blk_mq_rq_to_pdu(req), 0);
247}
248
249static int nvme_loop_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
250 unsigned int hctx_idx)
251{
252 struct nvme_loop_ctrl *ctrl = data;
253 struct nvme_loop_queue *queue = &ctrl->queues[hctx_idx + 1];
254
255 BUG_ON(hctx_idx >= ctrl->queue_count);
256
257 hctx->driver_data = queue;
258 return 0;
259}
260
261static int nvme_loop_init_admin_hctx(struct blk_mq_hw_ctx *hctx, void *data,
262 unsigned int hctx_idx)
263{
264 struct nvme_loop_ctrl *ctrl = data;
265 struct nvme_loop_queue *queue = &ctrl->queues[0];
266
267 BUG_ON(hctx_idx != 0);
268
269 hctx->driver_data = queue;
270 return 0;
271}
272
273static struct blk_mq_ops nvme_loop_mq_ops = {
274 .queue_rq = nvme_loop_queue_rq,
275 .complete = nvme_loop_complete_rq,
276 .map_queue = blk_mq_map_queue,
277 .init_request = nvme_loop_init_request,
278 .init_hctx = nvme_loop_init_hctx,
279 .timeout = nvme_loop_timeout,
280};
281
282static struct blk_mq_ops nvme_loop_admin_mq_ops = {
283 .queue_rq = nvme_loop_queue_rq,
284 .complete = nvme_loop_complete_rq,
285 .map_queue = blk_mq_map_queue,
286 .init_request = nvme_loop_init_admin_request,
287 .init_hctx = nvme_loop_init_admin_hctx,
288 .timeout = nvme_loop_timeout,
289};
290
291static void nvme_loop_destroy_admin_queue(struct nvme_loop_ctrl *ctrl)
292{
293 blk_cleanup_queue(ctrl->ctrl.admin_q);
294 blk_mq_free_tag_set(&ctrl->admin_tag_set);
295 nvmet_sq_destroy(&ctrl->queues[0].nvme_sq);
296}
297
298static void nvme_loop_free_ctrl(struct nvme_ctrl *nctrl)
299{
300 struct nvme_loop_ctrl *ctrl = to_loop_ctrl(nctrl);
301
302 if (list_empty(&ctrl->list))
303 goto free_ctrl;
304
305 mutex_lock(&nvme_loop_ctrl_mutex);
306 list_del(&ctrl->list);
307 mutex_unlock(&nvme_loop_ctrl_mutex);
308
309 if (nctrl->tagset) {
310 blk_cleanup_queue(ctrl->ctrl.connect_q);
311 blk_mq_free_tag_set(&ctrl->tag_set);
312 }
313 kfree(ctrl->queues);
314 nvmf_free_options(nctrl->opts);
315free_ctrl:
316 kfree(ctrl);
317}
318
319static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
320{
321 int error;
322
323 memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set));
324 ctrl->admin_tag_set.ops = &nvme_loop_admin_mq_ops;
325 ctrl->admin_tag_set.queue_depth = NVME_LOOP_AQ_BLKMQ_DEPTH;
326 ctrl->admin_tag_set.reserved_tags = 2; /* connect + keep-alive */
327 ctrl->admin_tag_set.numa_node = NUMA_NO_NODE;
328 ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_loop_iod) +
329 SG_CHUNK_SIZE * sizeof(struct scatterlist);
330 ctrl->admin_tag_set.driver_data = ctrl;
331 ctrl->admin_tag_set.nr_hw_queues = 1;
332 ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT;
333
334 ctrl->queues[0].ctrl = ctrl;
335 error = nvmet_sq_init(&ctrl->queues[0].nvme_sq);
336 if (error)
337 return error;
338 ctrl->queue_count = 1;
339
340 error = blk_mq_alloc_tag_set(&ctrl->admin_tag_set);
341 if (error)
342 goto out_free_sq;
343
344 ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set);
345 if (IS_ERR(ctrl->ctrl.admin_q)) {
346 error = PTR_ERR(ctrl->ctrl.admin_q);
347 goto out_free_tagset;
348 }
349
350 error = nvmf_connect_admin_queue(&ctrl->ctrl);
351 if (error)
352 goto out_cleanup_queue;
353
354 error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->cap);
355 if (error) {
356 dev_err(ctrl->ctrl.device,
357 "prop_get NVME_REG_CAP failed\n");
358 goto out_cleanup_queue;
359 }
360
361 ctrl->ctrl.sqsize =
362 min_t(int, NVME_CAP_MQES(ctrl->cap) + 1, ctrl->ctrl.sqsize);
363
364 error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->cap);
365 if (error)
366 goto out_cleanup_queue;
367
368 ctrl->ctrl.max_hw_sectors =
369 (NVME_LOOP_MAX_SEGMENTS - 1) << (PAGE_SHIFT - 9);
370
371 error = nvme_init_identify(&ctrl->ctrl);
372 if (error)
373 goto out_cleanup_queue;
374
375 nvme_start_keep_alive(&ctrl->ctrl);
376
377 return 0;
378
379out_cleanup_queue:
380 blk_cleanup_queue(ctrl->ctrl.admin_q);
381out_free_tagset:
382 blk_mq_free_tag_set(&ctrl->admin_tag_set);
383out_free_sq:
384 nvmet_sq_destroy(&ctrl->queues[0].nvme_sq);
385 return error;
386}
387
388static void nvme_loop_shutdown_ctrl(struct nvme_loop_ctrl *ctrl)
389{
390 int i;
391
392 nvme_stop_keep_alive(&ctrl->ctrl);
393
394 if (ctrl->queue_count > 1) {
395 nvme_stop_queues(&ctrl->ctrl);
396 blk_mq_tagset_busy_iter(&ctrl->tag_set,
397 nvme_cancel_request, &ctrl->ctrl);
398
399 for (i = 1; i < ctrl->queue_count; i++)
400 nvmet_sq_destroy(&ctrl->queues[i].nvme_sq);
401 }
402
403 if (ctrl->ctrl.state == NVME_CTRL_LIVE)
404 nvme_shutdown_ctrl(&ctrl->ctrl);
405
406 blk_mq_stop_hw_queues(ctrl->ctrl.admin_q);
407 blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
408 nvme_cancel_request, &ctrl->ctrl);
409 nvme_loop_destroy_admin_queue(ctrl);
410}
411
412static void nvme_loop_del_ctrl_work(struct work_struct *work)
413{
414 struct nvme_loop_ctrl *ctrl = container_of(work,
415 struct nvme_loop_ctrl, delete_work);
416
417 nvme_remove_namespaces(&ctrl->ctrl);
418 nvme_loop_shutdown_ctrl(ctrl);
419 nvme_uninit_ctrl(&ctrl->ctrl);
420 nvme_put_ctrl(&ctrl->ctrl);
421}
422
423static int __nvme_loop_del_ctrl(struct nvme_loop_ctrl *ctrl)
424{
425 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING))
426 return -EBUSY;
427
428 if (!schedule_work(&ctrl->delete_work))
429 return -EBUSY;
430
431 return 0;
432}
433
434static int nvme_loop_del_ctrl(struct nvme_ctrl *nctrl)
435{
436 struct nvme_loop_ctrl *ctrl = to_loop_ctrl(nctrl);
437 int ret;
438
439 ret = __nvme_loop_del_ctrl(ctrl);
440 if (ret)
441 return ret;
442
443 flush_work(&ctrl->delete_work);
444
445 return 0;
446}
447
448static void nvme_loop_delete_ctrl(struct nvmet_ctrl *nctrl)
449{
450 struct nvme_loop_ctrl *ctrl;
451
452 mutex_lock(&nvme_loop_ctrl_mutex);
453 list_for_each_entry(ctrl, &nvme_loop_ctrl_list, list) {
454 if (ctrl->ctrl.cntlid == nctrl->cntlid)
455 __nvme_loop_del_ctrl(ctrl);
456 }
457 mutex_unlock(&nvme_loop_ctrl_mutex);
458}
459
460static void nvme_loop_reset_ctrl_work(struct work_struct *work)
461{
462 struct nvme_loop_ctrl *ctrl = container_of(work,
463 struct nvme_loop_ctrl, reset_work);
464 bool changed;
465 int i, ret;
466
467 nvme_loop_shutdown_ctrl(ctrl);
468
469 ret = nvme_loop_configure_admin_queue(ctrl);
470 if (ret)
471 goto out_disable;
472
473 for (i = 1; i <= ctrl->ctrl.opts->nr_io_queues; i++) {
474 ctrl->queues[i].ctrl = ctrl;
475 ret = nvmet_sq_init(&ctrl->queues[i].nvme_sq);
476 if (ret)
477 goto out_free_queues;
478
479 ctrl->queue_count++;
480 }
481
482 for (i = 1; i <= ctrl->ctrl.opts->nr_io_queues; i++) {
483 ret = nvmf_connect_io_queue(&ctrl->ctrl, i);
484 if (ret)
485 goto out_free_queues;
486 }
487
488 changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
489 WARN_ON_ONCE(!changed);
490
491 nvme_queue_scan(&ctrl->ctrl);
492 nvme_queue_async_events(&ctrl->ctrl);
493
494 nvme_start_queues(&ctrl->ctrl);
495
496 return;
497
498out_free_queues:
499 for (i = 1; i < ctrl->queue_count; i++)
500 nvmet_sq_destroy(&ctrl->queues[i].nvme_sq);
501 nvme_loop_destroy_admin_queue(ctrl);
502out_disable:
503 dev_warn(ctrl->ctrl.device, "Removing after reset failure\n");
504 nvme_remove_namespaces(&ctrl->ctrl);
505 nvme_uninit_ctrl(&ctrl->ctrl);
506 nvme_put_ctrl(&ctrl->ctrl);
507}
508
509static int nvme_loop_reset_ctrl(struct nvme_ctrl *nctrl)
510{
511 struct nvme_loop_ctrl *ctrl = to_loop_ctrl(nctrl);
512
513 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING))
514 return -EBUSY;
515
516 if (!schedule_work(&ctrl->reset_work))
517 return -EBUSY;
518
519 flush_work(&ctrl->reset_work);
520
521 return 0;
522}
523
524static const struct nvme_ctrl_ops nvme_loop_ctrl_ops = {
525 .name = "loop",
526 .module = THIS_MODULE,
527 .is_fabrics = true,
528 .reg_read32 = nvmf_reg_read32,
529 .reg_read64 = nvmf_reg_read64,
530 .reg_write32 = nvmf_reg_write32,
531 .reset_ctrl = nvme_loop_reset_ctrl,
532 .free_ctrl = nvme_loop_free_ctrl,
533 .submit_async_event = nvme_loop_submit_async_event,
534 .delete_ctrl = nvme_loop_del_ctrl,
535 .get_subsysnqn = nvmf_get_subsysnqn,
536};
537
538static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl)
539{
540 struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
541 int ret, i;
542
543 ret = nvme_set_queue_count(&ctrl->ctrl, &opts->nr_io_queues);
544 if (ret || !opts->nr_io_queues)
545 return ret;
546
547 dev_info(ctrl->ctrl.device, "creating %d I/O queues.\n",
548 opts->nr_io_queues);
549
550 for (i = 1; i <= opts->nr_io_queues; i++) {
551 ctrl->queues[i].ctrl = ctrl;
552 ret = nvmet_sq_init(&ctrl->queues[i].nvme_sq);
553 if (ret)
554 goto out_destroy_queues;
555
556 ctrl->queue_count++;
557 }
558
559 memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set));
560 ctrl->tag_set.ops = &nvme_loop_mq_ops;
561 ctrl->tag_set.queue_depth = ctrl->ctrl.sqsize;
562 ctrl->tag_set.reserved_tags = 1; /* fabric connect */
563 ctrl->tag_set.numa_node = NUMA_NO_NODE;
564 ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
565 ctrl->tag_set.cmd_size = sizeof(struct nvme_loop_iod) +
566 SG_CHUNK_SIZE * sizeof(struct scatterlist);
567 ctrl->tag_set.driver_data = ctrl;
568 ctrl->tag_set.nr_hw_queues = ctrl->queue_count - 1;
569 ctrl->tag_set.timeout = NVME_IO_TIMEOUT;
570 ctrl->ctrl.tagset = &ctrl->tag_set;
571
572 ret = blk_mq_alloc_tag_set(&ctrl->tag_set);
573 if (ret)
574 goto out_destroy_queues;
575
576 ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set);
577 if (IS_ERR(ctrl->ctrl.connect_q)) {
578 ret = PTR_ERR(ctrl->ctrl.connect_q);
579 goto out_free_tagset;
580 }
581
582 for (i = 1; i <= opts->nr_io_queues; i++) {
583 ret = nvmf_connect_io_queue(&ctrl->ctrl, i);
584 if (ret)
585 goto out_cleanup_connect_q;
586 }
587
588 return 0;
589
590out_cleanup_connect_q:
591 blk_cleanup_queue(ctrl->ctrl.connect_q);
592out_free_tagset:
593 blk_mq_free_tag_set(&ctrl->tag_set);
594out_destroy_queues:
595 for (i = 1; i < ctrl->queue_count; i++)
596 nvmet_sq_destroy(&ctrl->queues[i].nvme_sq);
597 return ret;
598}
599
600static struct nvme_ctrl *nvme_loop_create_ctrl(struct device *dev,
601 struct nvmf_ctrl_options *opts)
602{
603 struct nvme_loop_ctrl *ctrl;
604 bool changed;
605 int ret;
606
607 ctrl = kzalloc(sizeof(*ctrl), GFP_KERNEL);
608 if (!ctrl)
609 return ERR_PTR(-ENOMEM);
610 ctrl->ctrl.opts = opts;
611 INIT_LIST_HEAD(&ctrl->list);
612
613 INIT_WORK(&ctrl->delete_work, nvme_loop_del_ctrl_work);
614 INIT_WORK(&ctrl->reset_work, nvme_loop_reset_ctrl_work);
615
616 ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_loop_ctrl_ops,
617 0 /* no quirks, we're perfect! */);
618 if (ret)
619 goto out_put_ctrl;
620
621 spin_lock_init(&ctrl->lock);
622
623 ret = -ENOMEM;
624
625 ctrl->ctrl.sqsize = opts->queue_size;
626 ctrl->ctrl.kato = opts->kato;
627
628 ctrl->queues = kcalloc(opts->nr_io_queues + 1, sizeof(*ctrl->queues),
629 GFP_KERNEL);
630 if (!ctrl->queues)
631 goto out_uninit_ctrl;
632
633 ret = nvme_loop_configure_admin_queue(ctrl);
634 if (ret)
635 goto out_free_queues;
636
637 if (opts->queue_size > ctrl->ctrl.maxcmd) {
638 /* warn if maxcmd is lower than queue_size */
639 dev_warn(ctrl->ctrl.device,
640 "queue_size %zu > ctrl maxcmd %u, clamping down\n",
641 opts->queue_size, ctrl->ctrl.maxcmd);
642 opts->queue_size = ctrl->ctrl.maxcmd;
643 }
644
645 if (opts->nr_io_queues) {
646 ret = nvme_loop_create_io_queues(ctrl);
647 if (ret)
648 goto out_remove_admin_queue;
649 }
650
651 nvme_loop_init_iod(ctrl, &ctrl->async_event_iod, 0);
652
653 dev_info(ctrl->ctrl.device,
654 "new ctrl: \"%s\"\n", ctrl->ctrl.opts->subsysnqn);
655
656 kref_get(&ctrl->ctrl.kref);
657
658 changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE);
659 WARN_ON_ONCE(!changed);
660
661 mutex_lock(&nvme_loop_ctrl_mutex);
662 list_add_tail(&ctrl->list, &nvme_loop_ctrl_list);
663 mutex_unlock(&nvme_loop_ctrl_mutex);
664
665 if (opts->nr_io_queues) {
666 nvme_queue_scan(&ctrl->ctrl);
667 nvme_queue_async_events(&ctrl->ctrl);
668 }
669
670 return &ctrl->ctrl;
671
672out_remove_admin_queue:
673 nvme_loop_destroy_admin_queue(ctrl);
674out_free_queues:
675 kfree(ctrl->queues);
676out_uninit_ctrl:
677 nvme_uninit_ctrl(&ctrl->ctrl);
678out_put_ctrl:
679 nvme_put_ctrl(&ctrl->ctrl);
680 if (ret > 0)
681 ret = -EIO;
682 return ERR_PTR(ret);
683}
684
685static int nvme_loop_add_port(struct nvmet_port *port)
686{
687 /*
688 * XXX: disalow adding more than one port so
689 * there is no connection rejections when a
690 * a subsystem is assigned to a port for which
691 * loop doesn't have a pointer.
692 * This scenario would be possible if we allowed
693 * more than one port to be added and a subsystem
694 * was assigned to a port other than nvmet_loop_port.
695 */
696
697 if (nvmet_loop_port)
698 return -EPERM;
699
700 nvmet_loop_port = port;
701 return 0;
702}
703
704static void nvme_loop_remove_port(struct nvmet_port *port)
705{
706 if (port == nvmet_loop_port)
707 nvmet_loop_port = NULL;
708}
709
710static struct nvmet_fabrics_ops nvme_loop_ops = {
711 .owner = THIS_MODULE,
712 .type = NVMF_TRTYPE_LOOP,
713 .add_port = nvme_loop_add_port,
714 .remove_port = nvme_loop_remove_port,
715 .queue_response = nvme_loop_queue_response,
716 .delete_ctrl = nvme_loop_delete_ctrl,
717};
718
719static struct nvmf_transport_ops nvme_loop_transport = {
720 .name = "loop",
721 .create_ctrl = nvme_loop_create_ctrl,
722};
723
724static int __init nvme_loop_init_module(void)
725{
726 int ret;
727
728 ret = nvmet_register_transport(&nvme_loop_ops);
729 if (ret)
730 return ret;
731 nvmf_register_transport(&nvme_loop_transport);
732 return 0;
733}
734
735static void __exit nvme_loop_cleanup_module(void)
736{
737 struct nvme_loop_ctrl *ctrl, *next;
738
739 nvmf_unregister_transport(&nvme_loop_transport);
740 nvmet_unregister_transport(&nvme_loop_ops);
741
742 mutex_lock(&nvme_loop_ctrl_mutex);
743 list_for_each_entry_safe(ctrl, next, &nvme_loop_ctrl_list, list)
744 __nvme_loop_del_ctrl(ctrl);
745 mutex_unlock(&nvme_loop_ctrl_mutex);
746
747 flush_scheduled_work();
748}
749
750module_init(nvme_loop_init_module);
751module_exit(nvme_loop_cleanup_module);
752
753MODULE_LICENSE("GPL v2");
754MODULE_ALIAS("nvmet-transport-254"); /* 254 == NVMF_TRTYPE_LOOP */