aboutsummaryrefslogtreecommitdiffstats
path: root/drivers
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-06-16 16:37:55 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-06-16 16:37:55 -0400
commit265c5596da61a5467b6b3bfbd9ba637f867d3927 (patch)
tree92eac309ddb4bc97afef3c2586db1fa32d280073 /drivers
parent5e7b9212a4a887f42221376445df52cd5991d100 (diff)
parentd6c73964f1e2a07f75057fb32ae46f6599036f93 (diff)
Merge tag 'for-linus-20180616' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe: "A collection of fixes that should go into -rc1. This contains: - bsg_open vs bsg_unregister race fix (Anatoliy) - NVMe pull request from Christoph, with fixes for regressions in this window, FC connect/reconnect path code unification, and a trace point addition. - timeout fix (Christoph) - remove a few unused functions (Christoph) - blk-mq tag_set reinit fix (Roman)" * tag 'for-linus-20180616' of git://git.kernel.dk/linux-block: bsg: fix race of bsg_open and bsg_unregister block: remov blk_queue_invalidate_tags nvme-fabrics: fix and refine state checks in __nvmf_check_ready nvme-fabrics: handle the admin-only case properly in nvmf_check_ready nvme-fabrics: refactor queue ready check blk-mq: remove blk_mq_tagset_iter nvme: remove nvme_reinit_tagset nvme-fc: fix nulling of queue data on reconnect nvme-fc: remove reinit_request routine blk-mq: don't time out requests again that are in the timeout handler nvme-fc: change controllers first connect to use reconnect path nvme: don't rely on the changed namespace list log nvmet: free smart-log buffer after use nvme-rdma: fix error flow during mapping request data nvme: add bio remapping tracepoint nvme: fix NULL pointer dereference in nvme_init_subsystem blk-mq: reinit q->tag_set_list entry only after grace period
Diffstat (limited to 'drivers')
-rw-r--r--drivers/nvme/host/core.c48
-rw-r--r--drivers/nvme/host/fabrics.c86
-rw-r--r--drivers/nvme/host/fabrics.h14
-rw-r--r--drivers/nvme/host/fc.c144
-rw-r--r--drivers/nvme/host/multipath.c4
-rw-r--r--drivers/nvme/host/nvme.h2
-rw-r--r--drivers/nvme/host/rdma.c38
-rw-r--r--drivers/nvme/target/admin-cmd.c4
-rw-r--r--drivers/nvme/target/loop.c7
9 files changed, 154 insertions, 193 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index effb1309682e..21710a7460c8 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2208,7 +2208,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
2208 * Verify that the subsystem actually supports multiple 2208 * Verify that the subsystem actually supports multiple
2209 * controllers, else bail out. 2209 * controllers, else bail out.
2210 */ 2210 */
2211 if (!ctrl->opts->discovery_nqn && 2211 if (!(ctrl->opts && ctrl->opts->discovery_nqn) &&
2212 nvme_active_ctrls(found) && !(id->cmic & (1 << 1))) { 2212 nvme_active_ctrls(found) && !(id->cmic & (1 << 1))) {
2213 dev_err(ctrl->device, 2213 dev_err(ctrl->device,
2214 "ignoring ctrl due to duplicate subnqn (%s).\n", 2214 "ignoring ctrl due to duplicate subnqn (%s).\n",
@@ -3197,40 +3197,28 @@ static void nvme_scan_ns_sequential(struct nvme_ctrl *ctrl, unsigned nn)
3197 nvme_remove_invalid_namespaces(ctrl, nn); 3197 nvme_remove_invalid_namespaces(ctrl, nn);
3198} 3198}
3199 3199
3200static bool nvme_scan_changed_ns_log(struct nvme_ctrl *ctrl) 3200static void nvme_clear_changed_ns_log(struct nvme_ctrl *ctrl)
3201{ 3201{
3202 size_t log_size = NVME_MAX_CHANGED_NAMESPACES * sizeof(__le32); 3202 size_t log_size = NVME_MAX_CHANGED_NAMESPACES * sizeof(__le32);
3203 __le32 *log; 3203 __le32 *log;
3204 int error, i; 3204 int error;
3205 bool ret = false;
3206 3205
3207 log = kzalloc(log_size, GFP_KERNEL); 3206 log = kzalloc(log_size, GFP_KERNEL);
3208 if (!log) 3207 if (!log)
3209 return false; 3208 return;
3210 3209
3210 /*
3211 * We need to read the log to clear the AEN, but we don't want to rely
3212 * on it for the changed namespace information as userspace could have
3213 * raced with us in reading the log page, which could cause us to miss
3214 * updates.
3215 */
3211 error = nvme_get_log(ctrl, NVME_LOG_CHANGED_NS, log, log_size); 3216 error = nvme_get_log(ctrl, NVME_LOG_CHANGED_NS, log, log_size);
3212 if (error) { 3217 if (error)
3213 dev_warn(ctrl->device, 3218 dev_warn(ctrl->device,
3214 "reading changed ns log failed: %d\n", error); 3219 "reading changed ns log failed: %d\n", error);
3215 goto out_free_log;
3216 }
3217
3218 if (log[0] == cpu_to_le32(0xffffffff))
3219 goto out_free_log;
3220
3221 for (i = 0; i < NVME_MAX_CHANGED_NAMESPACES; i++) {
3222 u32 nsid = le32_to_cpu(log[i]);
3223 3220
3224 if (nsid == 0)
3225 break;
3226 dev_info(ctrl->device, "rescanning namespace %d.\n", nsid);
3227 nvme_validate_ns(ctrl, nsid);
3228 }
3229 ret = true;
3230
3231out_free_log:
3232 kfree(log); 3221 kfree(log);
3233 return ret;
3234} 3222}
3235 3223
3236static void nvme_scan_work(struct work_struct *work) 3224static void nvme_scan_work(struct work_struct *work)
@@ -3246,9 +3234,8 @@ static void nvme_scan_work(struct work_struct *work)
3246 WARN_ON_ONCE(!ctrl->tagset); 3234 WARN_ON_ONCE(!ctrl->tagset);
3247 3235
3248 if (test_and_clear_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events)) { 3236 if (test_and_clear_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events)) {
3249 if (nvme_scan_changed_ns_log(ctrl))
3250 goto out_sort_namespaces;
3251 dev_info(ctrl->device, "rescanning namespaces.\n"); 3237 dev_info(ctrl->device, "rescanning namespaces.\n");
3238 nvme_clear_changed_ns_log(ctrl);
3252 } 3239 }
3253 3240
3254 if (nvme_identify_ctrl(ctrl, &id)) 3241 if (nvme_identify_ctrl(ctrl, &id))
@@ -3263,7 +3250,6 @@ static void nvme_scan_work(struct work_struct *work)
3263 nvme_scan_ns_sequential(ctrl, nn); 3250 nvme_scan_ns_sequential(ctrl, nn);
3264out_free_id: 3251out_free_id:
3265 kfree(id); 3252 kfree(id);
3266out_sort_namespaces:
3267 down_write(&ctrl->namespaces_rwsem); 3253 down_write(&ctrl->namespaces_rwsem);
3268 list_sort(NULL, &ctrl->namespaces, ns_cmp); 3254 list_sort(NULL, &ctrl->namespaces, ns_cmp);
3269 up_write(&ctrl->namespaces_rwsem); 3255 up_write(&ctrl->namespaces_rwsem);
@@ -3641,16 +3627,6 @@ void nvme_start_queues(struct nvme_ctrl *ctrl)
3641} 3627}
3642EXPORT_SYMBOL_GPL(nvme_start_queues); 3628EXPORT_SYMBOL_GPL(nvme_start_queues);
3643 3629
3644int nvme_reinit_tagset(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set)
3645{
3646 if (!ctrl->ops->reinit_request)
3647 return 0;
3648
3649 return blk_mq_tagset_iter(set, set->driver_data,
3650 ctrl->ops->reinit_request);
3651}
3652EXPORT_SYMBOL_GPL(nvme_reinit_tagset);
3653
3654int __init nvme_core_init(void) 3630int __init nvme_core_init(void)
3655{ 3631{
3656 int result = -ENOMEM; 3632 int result = -ENOMEM;
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index fa32c1216409..903eb4545e26 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -536,67 +536,55 @@ static struct nvmf_transport_ops *nvmf_lookup_transport(
536 return NULL; 536 return NULL;
537} 537}
538 538
539blk_status_t nvmf_check_if_ready(struct nvme_ctrl *ctrl, struct request *rq, 539/*
540 bool queue_live, bool is_connected) 540 * For something we're not in a state to send to the device the default action
541 * is to busy it and retry it after the controller state is recovered. However,
542 * anything marked for failfast or nvme multipath is immediately failed.
543 *
544 * Note: commands used to initialize the controller will be marked for failfast.
545 * Note: nvme cli/ioctl commands are marked for failfast.
546 */
547blk_status_t nvmf_fail_nonready_command(struct request *rq)
541{ 548{
542 struct nvme_command *cmd = nvme_req(rq)->cmd; 549 if (!blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
550 return BLK_STS_RESOURCE;
551 nvme_req(rq)->status = NVME_SC_ABORT_REQ;
552 return BLK_STS_IOERR;
553}
554EXPORT_SYMBOL_GPL(nvmf_fail_nonready_command);
543 555
544 if (likely(ctrl->state == NVME_CTRL_LIVE && is_connected)) 556bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
545 return BLK_STS_OK; 557 bool queue_live)
558{
559 struct nvme_request *req = nvme_req(rq);
560
561 /*
562 * If we are in some state of setup or teardown only allow
563 * internally generated commands.
564 */
565 if (!blk_rq_is_passthrough(rq) || (req->flags & NVME_REQ_USERCMD))
566 return false;
546 567
568 /*
569 * Only allow commands on a live queue, except for the connect command,
570 * which is require to set the queue live in the appropinquate states.
571 */
547 switch (ctrl->state) { 572 switch (ctrl->state) {
548 case NVME_CTRL_NEW: 573 case NVME_CTRL_NEW:
549 case NVME_CTRL_CONNECTING: 574 case NVME_CTRL_CONNECTING:
550 case NVME_CTRL_DELETING: 575 if (req->cmd->common.opcode == nvme_fabrics_command &&
551 /* 576 req->cmd->fabrics.fctype == nvme_fabrics_type_connect)
552 * This is the case of starting a new or deleting an association 577 return true;
553 * but connectivity was lost before it was fully created or torn
554 * down. We need to error the commands used to initialize the
555 * controller so the reconnect can go into a retry attempt. The
556 * commands should all be marked REQ_FAILFAST_DRIVER, which will
557 * hit the reject path below. Anything else will be queued while
558 * the state settles.
559 */
560 if (!is_connected)
561 break;
562
563 /*
564 * If queue is live, allow only commands that are internally
565 * generated pass through. These are commands on the admin
566 * queue to initialize the controller. This will reject any
567 * ioctl admin cmds received while initializing.
568 */
569 if (queue_live && !(nvme_req(rq)->flags & NVME_REQ_USERCMD))
570 return BLK_STS_OK;
571
572 /*
573 * If the queue is not live, allow only a connect command. This
574 * will reject any ioctl admin cmd as well as initialization
575 * commands if the controller reverted the queue to non-live.
576 */
577 if (!queue_live && blk_rq_is_passthrough(rq) &&
578 cmd->common.opcode == nvme_fabrics_command &&
579 cmd->fabrics.fctype == nvme_fabrics_type_connect)
580 return BLK_STS_OK;
581 break; 578 break;
582 default: 579 default:
583 break; 580 break;
581 case NVME_CTRL_DEAD:
582 return false;
584 } 583 }
585 584
586 /* 585 return queue_live;
587 * Any other new io is something we're not in a state to send to the
588 * device. Default action is to busy it and retry it after the
589 * controller state is recovered. However, anything marked for failfast
590 * or nvme multipath is immediately failed. Note: commands used to
591 * initialize the controller will be marked for failfast.
592 * Note: nvme cli/ioctl commands are marked for failfast.
593 */
594 if (!blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
595 return BLK_STS_RESOURCE;
596 nvme_req(rq)->status = NVME_SC_ABORT_REQ;
597 return BLK_STS_IOERR;
598} 586}
599EXPORT_SYMBOL_GPL(nvmf_check_if_ready); 587EXPORT_SYMBOL_GPL(__nvmf_check_ready);
600 588
601static const match_table_t opt_tokens = { 589static const match_table_t opt_tokens = {
602 { NVMF_OPT_TRANSPORT, "transport=%s" }, 590 { NVMF_OPT_TRANSPORT, "transport=%s" },
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index 7491a0bbf711..e1818a27aa2d 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -162,7 +162,17 @@ void nvmf_unregister_transport(struct nvmf_transport_ops *ops);
162void nvmf_free_options(struct nvmf_ctrl_options *opts); 162void nvmf_free_options(struct nvmf_ctrl_options *opts);
163int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size); 163int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size);
164bool nvmf_should_reconnect(struct nvme_ctrl *ctrl); 164bool nvmf_should_reconnect(struct nvme_ctrl *ctrl);
165blk_status_t nvmf_check_if_ready(struct nvme_ctrl *ctrl, 165blk_status_t nvmf_fail_nonready_command(struct request *rq);
166 struct request *rq, bool queue_live, bool is_connected); 166bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
167 bool queue_live);
168
169static inline bool nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
170 bool queue_live)
171{
172 if (likely(ctrl->state == NVME_CTRL_LIVE ||
173 ctrl->state == NVME_CTRL_ADMIN_ONLY))
174 return true;
175 return __nvmf_check_ready(ctrl, rq, queue_live);
176}
167 177
168#endif /* _NVME_FABRICS_H */ 178#endif /* _NVME_FABRICS_H */
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 0bad65803271..b528a2f5826c 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -142,6 +142,7 @@ struct nvme_fc_ctrl {
142 struct nvme_fc_rport *rport; 142 struct nvme_fc_rport *rport;
143 u32 cnum; 143 u32 cnum;
144 144
145 bool ioq_live;
145 bool assoc_active; 146 bool assoc_active;
146 u64 association_id; 147 u64 association_id;
147 148
@@ -1470,21 +1471,6 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl)
1470 1471
1471static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg); 1472static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg);
1472 1473
1473static int
1474nvme_fc_reinit_request(void *data, struct request *rq)
1475{
1476 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
1477 struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu;
1478
1479 memset(cmdiu, 0, sizeof(*cmdiu));
1480 cmdiu->scsi_id = NVME_CMD_SCSI_ID;
1481 cmdiu->fc_id = NVME_CMD_FC_ID;
1482 cmdiu->iu_len = cpu_to_be16(sizeof(*cmdiu) / sizeof(u32));
1483 memset(&op->rsp_iu, 0, sizeof(op->rsp_iu));
1484
1485 return 0;
1486}
1487
1488static void 1474static void
1489__nvme_fc_exit_request(struct nvme_fc_ctrl *ctrl, 1475__nvme_fc_exit_request(struct nvme_fc_ctrl *ctrl,
1490 struct nvme_fc_fcp_op *op) 1476 struct nvme_fc_fcp_op *op)
@@ -1893,6 +1879,7 @@ nvme_fc_free_queue(struct nvme_fc_queue *queue)
1893 */ 1879 */
1894 1880
1895 queue->connection_id = 0; 1881 queue->connection_id = 0;
1882 atomic_set(&queue->csn, 1);
1896} 1883}
1897 1884
1898static void 1885static void
@@ -2279,14 +2266,13 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,
2279 struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; 2266 struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu;
2280 struct nvme_command *sqe = &cmdiu->sqe; 2267 struct nvme_command *sqe = &cmdiu->sqe;
2281 enum nvmefc_fcp_datadir io_dir; 2268 enum nvmefc_fcp_datadir io_dir;
2269 bool queue_ready = test_bit(NVME_FC_Q_LIVE, &queue->flags);
2282 u32 data_len; 2270 u32 data_len;
2283 blk_status_t ret; 2271 blk_status_t ret;
2284 2272
2285 ret = nvmf_check_if_ready(&queue->ctrl->ctrl, rq, 2273 if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE ||
2286 test_bit(NVME_FC_Q_LIVE, &queue->flags), 2274 !nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
2287 ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE); 2275 return nvmf_fail_nonready_command(rq);
2288 if (unlikely(ret))
2289 return ret;
2290 2276
2291 ret = nvme_setup_cmd(ns, rq, sqe); 2277 ret = nvme_setup_cmd(ns, rq, sqe);
2292 if (ret) 2278 if (ret)
@@ -2463,6 +2449,8 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
2463 if (ret) 2449 if (ret)
2464 goto out_delete_hw_queues; 2450 goto out_delete_hw_queues;
2465 2451
2452 ctrl->ioq_live = true;
2453
2466 return 0; 2454 return 0;
2467 2455
2468out_delete_hw_queues: 2456out_delete_hw_queues:
@@ -2480,7 +2468,7 @@ out_free_tag_set:
2480} 2468}
2481 2469
2482static int 2470static int
2483nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl) 2471nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl)
2484{ 2472{
2485 struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; 2473 struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
2486 unsigned int nr_io_queues; 2474 unsigned int nr_io_queues;
@@ -2500,12 +2488,6 @@ nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl)
2500 if (ctrl->ctrl.queue_count == 1) 2488 if (ctrl->ctrl.queue_count == 1)
2501 return 0; 2489 return 0;
2502 2490
2503 nvme_fc_init_io_queues(ctrl);
2504
2505 ret = nvme_reinit_tagset(&ctrl->ctrl, ctrl->ctrl.tagset);
2506 if (ret)
2507 goto out_free_io_queues;
2508
2509 ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1); 2491 ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1);
2510 if (ret) 2492 if (ret)
2511 goto out_free_io_queues; 2493 goto out_free_io_queues;
@@ -2603,8 +2585,6 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
2603 * Create the admin queue 2585 * Create the admin queue
2604 */ 2586 */
2605 2587
2606 nvme_fc_init_queue(ctrl, 0);
2607
2608 ret = __nvme_fc_create_hw_queue(ctrl, &ctrl->queues[0], 0, 2588 ret = __nvme_fc_create_hw_queue(ctrl, &ctrl->queues[0], 0,
2609 NVME_AQ_DEPTH); 2589 NVME_AQ_DEPTH);
2610 if (ret) 2590 if (ret)
@@ -2615,8 +2595,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
2615 if (ret) 2595 if (ret)
2616 goto out_delete_hw_queue; 2596 goto out_delete_hw_queue;
2617 2597
2618 if (ctrl->ctrl.state != NVME_CTRL_NEW) 2598 blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
2619 blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
2620 2599
2621 ret = nvmf_connect_admin_queue(&ctrl->ctrl); 2600 ret = nvmf_connect_admin_queue(&ctrl->ctrl);
2622 if (ret) 2601 if (ret)
@@ -2689,10 +2668,10 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
2689 */ 2668 */
2690 2669
2691 if (ctrl->ctrl.queue_count > 1) { 2670 if (ctrl->ctrl.queue_count > 1) {
2692 if (ctrl->ctrl.state == NVME_CTRL_NEW) 2671 if (!ctrl->ioq_live)
2693 ret = nvme_fc_create_io_queues(ctrl); 2672 ret = nvme_fc_create_io_queues(ctrl);
2694 else 2673 else
2695 ret = nvme_fc_reinit_io_queues(ctrl); 2674 ret = nvme_fc_recreate_io_queues(ctrl);
2696 if (ret) 2675 if (ret)
2697 goto out_term_aen_ops; 2676 goto out_term_aen_ops;
2698 } 2677 }
@@ -2776,8 +2755,7 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
2776 * use blk_mq_tagset_busy_itr() and the transport routine to 2755 * use blk_mq_tagset_busy_itr() and the transport routine to
2777 * terminate the exchanges. 2756 * terminate the exchanges.
2778 */ 2757 */
2779 if (ctrl->ctrl.state != NVME_CTRL_NEW) 2758 blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
2780 blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
2781 blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, 2759 blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
2782 nvme_fc_terminate_exchange, &ctrl->ctrl); 2760 nvme_fc_terminate_exchange, &ctrl->ctrl);
2783 2761
@@ -2917,7 +2895,6 @@ static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
2917 .submit_async_event = nvme_fc_submit_async_event, 2895 .submit_async_event = nvme_fc_submit_async_event,
2918 .delete_ctrl = nvme_fc_delete_ctrl, 2896 .delete_ctrl = nvme_fc_delete_ctrl,
2919 .get_address = nvmf_get_address, 2897 .get_address = nvmf_get_address,
2920 .reinit_request = nvme_fc_reinit_request,
2921}; 2898};
2922 2899
2923static void 2900static void
@@ -2934,7 +2911,7 @@ nvme_fc_connect_ctrl_work(struct work_struct *work)
2934 nvme_fc_reconnect_or_delete(ctrl, ret); 2911 nvme_fc_reconnect_or_delete(ctrl, ret);
2935 else 2912 else
2936 dev_info(ctrl->ctrl.device, 2913 dev_info(ctrl->ctrl.device,
2937 "NVME-FC{%d}: controller reconnect complete\n", 2914 "NVME-FC{%d}: controller connect complete\n",
2938 ctrl->cnum); 2915 ctrl->cnum);
2939} 2916}
2940 2917
@@ -2982,7 +2959,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
2982{ 2959{
2983 struct nvme_fc_ctrl *ctrl; 2960 struct nvme_fc_ctrl *ctrl;
2984 unsigned long flags; 2961 unsigned long flags;
2985 int ret, idx, retry; 2962 int ret, idx;
2986 2963
2987 if (!(rport->remoteport.port_role & 2964 if (!(rport->remoteport.port_role &
2988 (FC_PORT_ROLE_NVME_DISCOVERY | FC_PORT_ROLE_NVME_TARGET))) { 2965 (FC_PORT_ROLE_NVME_DISCOVERY | FC_PORT_ROLE_NVME_TARGET))) {
@@ -3009,11 +2986,13 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
3009 } 2986 }
3010 2987
3011 ctrl->ctrl.opts = opts; 2988 ctrl->ctrl.opts = opts;
2989 ctrl->ctrl.nr_reconnects = 0;
3012 INIT_LIST_HEAD(&ctrl->ctrl_list); 2990 INIT_LIST_HEAD(&ctrl->ctrl_list);
3013 ctrl->lport = lport; 2991 ctrl->lport = lport;
3014 ctrl->rport = rport; 2992 ctrl->rport = rport;
3015 ctrl->dev = lport->dev; 2993 ctrl->dev = lport->dev;
3016 ctrl->cnum = idx; 2994 ctrl->cnum = idx;
2995 ctrl->ioq_live = false;
3017 ctrl->assoc_active = false; 2996 ctrl->assoc_active = false;
3018 init_waitqueue_head(&ctrl->ioabort_wait); 2997 init_waitqueue_head(&ctrl->ioabort_wait);
3019 2998
@@ -3032,6 +3011,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
3032 3011
3033 ctrl->ctrl.sqsize = opts->queue_size - 1; 3012 ctrl->ctrl.sqsize = opts->queue_size - 1;
3034 ctrl->ctrl.kato = opts->kato; 3013 ctrl->ctrl.kato = opts->kato;
3014 ctrl->ctrl.cntlid = 0xffff;
3035 3015
3036 ret = -ENOMEM; 3016 ret = -ENOMEM;
3037 ctrl->queues = kcalloc(ctrl->ctrl.queue_count, 3017 ctrl->queues = kcalloc(ctrl->ctrl.queue_count,
@@ -3039,6 +3019,8 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
3039 if (!ctrl->queues) 3019 if (!ctrl->queues)
3040 goto out_free_ida; 3020 goto out_free_ida;
3041 3021
3022 nvme_fc_init_queue(ctrl, 0);
3023
3042 memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set)); 3024 memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set));
3043 ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops; 3025 ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops;
3044 ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH; 3026 ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH;
@@ -3081,62 +3063,24 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
3081 list_add_tail(&ctrl->ctrl_list, &rport->ctrl_list); 3063 list_add_tail(&ctrl->ctrl_list, &rport->ctrl_list);
3082 spin_unlock_irqrestore(&rport->lock, flags); 3064 spin_unlock_irqrestore(&rport->lock, flags);
3083 3065
3084 /* 3066 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING) ||
3085 * It's possible that transactions used to create the association 3067 !nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
3086 * may fail. Examples: CreateAssociation LS or CreateIOConnection
3087 * LS gets dropped/corrupted/fails; or a frame gets dropped or a
3088 * command times out for one of the actions to init the controller
3089 * (Connect, Get/Set_Property, Set_Features, etc). Many of these
3090 * transport errors (frame drop, LS failure) inherently must kill
3091 * the association. The transport is coded so that any command used
3092 * to create the association (prior to a LIVE state transition
3093 * while NEW or CONNECTING) will fail if it completes in error or
3094 * times out.
3095 *
3096 * As such: as the connect request was mostly likely due to a
3097 * udev event that discovered the remote port, meaning there is
3098 * not an admin or script there to restart if the connect
3099 * request fails, retry the initial connection creation up to
3100 * three times before giving up and declaring failure.
3101 */
3102 for (retry = 0; retry < 3; retry++) {
3103 ret = nvme_fc_create_association(ctrl);
3104 if (!ret)
3105 break;
3106 }
3107
3108 if (ret) {
3109 nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING);
3110 cancel_work_sync(&ctrl->ctrl.reset_work);
3111 cancel_delayed_work_sync(&ctrl->connect_work);
3112
3113 /* couldn't schedule retry - fail out */
3114 dev_err(ctrl->ctrl.device, 3068 dev_err(ctrl->ctrl.device,
3115 "NVME-FC{%d}: Connect retry failed\n", ctrl->cnum); 3069 "NVME-FC{%d}: failed to init ctrl state\n", ctrl->cnum);
3116 3070 goto fail_ctrl;
3117 ctrl->ctrl.opts = NULL; 3071 }
3118 3072
3119 /* initiate nvme ctrl ref counting teardown */ 3073 nvme_get_ctrl(&ctrl->ctrl);
3120 nvme_uninit_ctrl(&ctrl->ctrl);
3121 3074
3122 /* Remove core ctrl ref. */ 3075 if (!queue_delayed_work(nvme_wq, &ctrl->connect_work, 0)) {
3123 nvme_put_ctrl(&ctrl->ctrl); 3076 nvme_put_ctrl(&ctrl->ctrl);
3124 3077 dev_err(ctrl->ctrl.device,
3125 /* as we're past the point where we transition to the ref 3078 "NVME-FC{%d}: failed to schedule initial connect\n",
3126 * counting teardown path, if we return a bad pointer here, 3079 ctrl->cnum);
3127 * the calling routine, thinking it's prior to the 3080 goto fail_ctrl;
3128 * transition, will do an rport put. Since the teardown
3129 * path also does a rport put, we do an extra get here to
3130 * so proper order/teardown happens.
3131 */
3132 nvme_fc_rport_get(rport);
3133
3134 if (ret > 0)
3135 ret = -EIO;
3136 return ERR_PTR(ret);
3137 } 3081 }
3138 3082
3139 nvme_get_ctrl(&ctrl->ctrl); 3083 flush_delayed_work(&ctrl->connect_work);
3140 3084
3141 dev_info(ctrl->ctrl.device, 3085 dev_info(ctrl->ctrl.device,
3142 "NVME-FC{%d}: new ctrl: NQN \"%s\"\n", 3086 "NVME-FC{%d}: new ctrl: NQN \"%s\"\n",
@@ -3144,6 +3088,30 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
3144 3088
3145 return &ctrl->ctrl; 3089 return &ctrl->ctrl;
3146 3090
3091fail_ctrl:
3092 nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING);
3093 cancel_work_sync(&ctrl->ctrl.reset_work);
3094 cancel_delayed_work_sync(&ctrl->connect_work);
3095
3096 ctrl->ctrl.opts = NULL;
3097
3098 /* initiate nvme ctrl ref counting teardown */
3099 nvme_uninit_ctrl(&ctrl->ctrl);
3100
3101 /* Remove core ctrl ref. */
3102 nvme_put_ctrl(&ctrl->ctrl);
3103
3104 /* as we're past the point where we transition to the ref
3105 * counting teardown path, if we return a bad pointer here,
3106 * the calling routine, thinking it's prior to the
3107 * transition, will do an rport put. Since the teardown
3108 * path also does a rport put, we do an extra get here to
3109 * so proper order/teardown happens.
3110 */
3111 nvme_fc_rport_get(rport);
3112
3113 return ERR_PTR(-EIO);
3114
3147out_cleanup_admin_q: 3115out_cleanup_admin_q:
3148 blk_cleanup_queue(ctrl->ctrl.admin_q); 3116 blk_cleanup_queue(ctrl->ctrl.admin_q);
3149out_free_admin_tag_set: 3117out_free_admin_tag_set:
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index d7b664ae5923..1ffd3e8b13a1 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -12,6 +12,7 @@
12 */ 12 */
13 13
14#include <linux/moduleparam.h> 14#include <linux/moduleparam.h>
15#include <trace/events/block.h>
15#include "nvme.h" 16#include "nvme.h"
16 17
17static bool multipath = true; 18static bool multipath = true;
@@ -111,6 +112,9 @@ static blk_qc_t nvme_ns_head_make_request(struct request_queue *q,
111 if (likely(ns)) { 112 if (likely(ns)) {
112 bio->bi_disk = ns->disk; 113 bio->bi_disk = ns->disk;
113 bio->bi_opf |= REQ_NVME_MPATH; 114 bio->bi_opf |= REQ_NVME_MPATH;
115 trace_block_bio_remap(bio->bi_disk->queue, bio,
116 disk_devt(ns->head->disk),
117 bio->bi_iter.bi_sector);
114 ret = direct_make_request(bio); 118 ret = direct_make_request(bio);
115 } else if (!list_empty_careful(&head->list)) { 119 } else if (!list_empty_careful(&head->list)) {
116 dev_warn_ratelimited(dev, "no path available - requeuing I/O\n"); 120 dev_warn_ratelimited(dev, "no path available - requeuing I/O\n");
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 34df07d44f80..231807cbc849 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -321,7 +321,6 @@ struct nvme_ctrl_ops {
321 void (*submit_async_event)(struct nvme_ctrl *ctrl); 321 void (*submit_async_event)(struct nvme_ctrl *ctrl);
322 void (*delete_ctrl)(struct nvme_ctrl *ctrl); 322 void (*delete_ctrl)(struct nvme_ctrl *ctrl);
323 int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size); 323 int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size);
324 int (*reinit_request)(void *data, struct request *rq);
325 void (*stop_ctrl)(struct nvme_ctrl *ctrl); 324 void (*stop_ctrl)(struct nvme_ctrl *ctrl);
326}; 325};
327 326
@@ -416,7 +415,6 @@ void nvme_unfreeze(struct nvme_ctrl *ctrl);
416void nvme_wait_freeze(struct nvme_ctrl *ctrl); 415void nvme_wait_freeze(struct nvme_ctrl *ctrl);
417void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout); 416void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout);
418void nvme_start_freeze(struct nvme_ctrl *ctrl); 417void nvme_start_freeze(struct nvme_ctrl *ctrl);
419int nvme_reinit_tagset(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set);
420 418
421#define NVME_QID_ANY -1 419#define NVME_QID_ANY -1
422struct request *nvme_alloc_request(struct request_queue *q, 420struct request *nvme_alloc_request(struct request_queue *q,
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 2aba03876d84..c9424da0d23e 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1189,21 +1189,38 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
1189 count = ib_dma_map_sg(ibdev, req->sg_table.sgl, req->nents, 1189 count = ib_dma_map_sg(ibdev, req->sg_table.sgl, req->nents,
1190 rq_data_dir(rq) == WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE); 1190 rq_data_dir(rq) == WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
1191 if (unlikely(count <= 0)) { 1191 if (unlikely(count <= 0)) {
1192 sg_free_table_chained(&req->sg_table, true); 1192 ret = -EIO;
1193 return -EIO; 1193 goto out_free_table;
1194 } 1194 }
1195 1195
1196 if (count == 1) { 1196 if (count == 1) {
1197 if (rq_data_dir(rq) == WRITE && nvme_rdma_queue_idx(queue) && 1197 if (rq_data_dir(rq) == WRITE && nvme_rdma_queue_idx(queue) &&
1198 blk_rq_payload_bytes(rq) <= 1198 blk_rq_payload_bytes(rq) <=
1199 nvme_rdma_inline_data_size(queue)) 1199 nvme_rdma_inline_data_size(queue)) {
1200 return nvme_rdma_map_sg_inline(queue, req, c); 1200 ret = nvme_rdma_map_sg_inline(queue, req, c);
1201 goto out;
1202 }
1201 1203
1202 if (dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) 1204 if (dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) {
1203 return nvme_rdma_map_sg_single(queue, req, c); 1205 ret = nvme_rdma_map_sg_single(queue, req, c);
1206 goto out;
1207 }
1204 } 1208 }
1205 1209
1206 return nvme_rdma_map_sg_fr(queue, req, c, count); 1210 ret = nvme_rdma_map_sg_fr(queue, req, c, count);
1211out:
1212 if (unlikely(ret))
1213 goto out_unmap_sg;
1214
1215 return 0;
1216
1217out_unmap_sg:
1218 ib_dma_unmap_sg(ibdev, req->sg_table.sgl,
1219 req->nents, rq_data_dir(rq) ==
1220 WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
1221out_free_table:
1222 sg_free_table_chained(&req->sg_table, true);
1223 return ret;
1207} 1224}
1208 1225
1209static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc) 1226static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc)
@@ -1613,15 +1630,14 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
1613 struct nvme_rdma_qe *sqe = &req->sqe; 1630 struct nvme_rdma_qe *sqe = &req->sqe;
1614 struct nvme_command *c = sqe->data; 1631 struct nvme_command *c = sqe->data;
1615 struct ib_device *dev; 1632 struct ib_device *dev;
1633 bool queue_ready = test_bit(NVME_RDMA_Q_LIVE, &queue->flags);
1616 blk_status_t ret; 1634 blk_status_t ret;
1617 int err; 1635 int err;
1618 1636
1619 WARN_ON_ONCE(rq->tag < 0); 1637 WARN_ON_ONCE(rq->tag < 0);
1620 1638
1621 ret = nvmf_check_if_ready(&queue->ctrl->ctrl, rq, 1639 if (!nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
1622 test_bit(NVME_RDMA_Q_LIVE, &queue->flags), true); 1640 return nvmf_fail_nonready_command(rq);
1623 if (unlikely(ret))
1624 return ret;
1625 1641
1626 dev = queue->device->dev; 1642 dev = queue->device->dev;
1627 ib_dma_sync_single_for_cpu(dev, sqe->dma, 1643 ib_dma_sync_single_for_cpu(dev, sqe->dma,
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 962532842769..38803576d5e1 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -119,9 +119,11 @@ static void nvmet_execute_get_log_page_smart(struct nvmet_req *req)
119 else 119 else
120 status = nvmet_get_smart_log_nsid(req, log); 120 status = nvmet_get_smart_log_nsid(req, log);
121 if (status) 121 if (status)
122 goto out; 122 goto out_free_log;
123 123
124 status = nvmet_copy_to_sgl(req, 0, log, sizeof(*log)); 124 status = nvmet_copy_to_sgl(req, 0, log, sizeof(*log));
125out_free_log:
126 kfree(log);
125out: 127out:
126 nvmet_req_complete(req, status); 128 nvmet_req_complete(req, status);
127} 129}
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 1304ec3a7ede..d8d91f04bd7e 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -158,12 +158,11 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
158 struct nvme_loop_queue *queue = hctx->driver_data; 158 struct nvme_loop_queue *queue = hctx->driver_data;
159 struct request *req = bd->rq; 159 struct request *req = bd->rq;
160 struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req); 160 struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req);
161 bool queue_ready = test_bit(NVME_LOOP_Q_LIVE, &queue->flags);
161 blk_status_t ret; 162 blk_status_t ret;
162 163
163 ret = nvmf_check_if_ready(&queue->ctrl->ctrl, req, 164 if (!nvmf_check_ready(&queue->ctrl->ctrl, req, queue_ready))
164 test_bit(NVME_LOOP_Q_LIVE, &queue->flags), true); 165 return nvmf_fail_nonready_command(req);
165 if (unlikely(ret))
166 return ret;
167 166
168 ret = nvme_setup_cmd(ns, req, &iod->cmd); 167 ret = nvme_setup_cmd(ns, req, &iod->cmd);
169 if (ret) 168 if (ret)