aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-06-16 16:37:55 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-06-16 16:37:55 -0400
commit265c5596da61a5467b6b3bfbd9ba637f867d3927 (patch)
tree92eac309ddb4bc97afef3c2586db1fa32d280073
parent5e7b9212a4a887f42221376445df52cd5991d100 (diff)
parentd6c73964f1e2a07f75057fb32ae46f6599036f93 (diff)
Merge tag 'for-linus-20180616' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe: "A collection of fixes that should go into -rc1. This contains: - bsg_open vs bsg_unregister race fix (Anatoliy) - NVMe pull request from Christoph, with fixes for regressions in this window, FC connect/reconnect path code unification, and a trace point addition. - timeout fix (Christoph) - remove a few unused functions (Christoph) - blk-mq tag_set reinit fix (Roman)" * tag 'for-linus-20180616' of git://git.kernel.dk/linux-block: bsg: fix race of bsg_open and bsg_unregister block: remov blk_queue_invalidate_tags nvme-fabrics: fix and refine state checks in __nvmf_check_ready nvme-fabrics: handle the admin-only case properly in nvmf_check_ready nvme-fabrics: refactor queue ready check blk-mq: remove blk_mq_tagset_iter nvme: remove nvme_reinit_tagset nvme-fc: fix nulling of queue data on reconnect nvme-fc: remove reinit_request routine blk-mq: don't time out requests again that are in the timeout handler nvme-fc: change controllers first connect to use reconnect path nvme: don't rely on the changed namespace list log nvmet: free smart-log buffer after use nvme-rdma: fix error flow during mapping request data nvme: add bio remapping tracepoint nvme: fix NULL pointer dereference in nvme_init_subsystem blk-mq: reinit q->tag_set_list entry only after grace period
-rw-r--r--Documentation/block/biodoc.txt15
-rw-r--r--block/blk-mq-tag.c29
-rw-r--r--block/blk-mq.c8
-rw-r--r--block/blk-tag.c22
-rw-r--r--block/bsg.c22
-rw-r--r--drivers/nvme/host/core.c48
-rw-r--r--drivers/nvme/host/fabrics.c86
-rw-r--r--drivers/nvme/host/fabrics.h14
-rw-r--r--drivers/nvme/host/fc.c144
-rw-r--r--drivers/nvme/host/multipath.c4
-rw-r--r--drivers/nvme/host/nvme.h2
-rw-r--r--drivers/nvme/host/rdma.c38
-rw-r--r--drivers/nvme/target/admin-cmd.c4
-rw-r--r--drivers/nvme/target/loop.c7
-rw-r--r--include/linux/blk-mq.h2
-rw-r--r--include/linux/blkdev.h4
16 files changed, 174 insertions, 275 deletions
diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt
index 86927029a52d..207eca58efaa 100644
--- a/Documentation/block/biodoc.txt
+++ b/Documentation/block/biodoc.txt
@@ -752,18 +752,6 @@ completion of the request to the block layer. This means ending tag
752operations before calling end_that_request_last()! For an example of a user 752operations before calling end_that_request_last()! For an example of a user
753of these helpers, see the IDE tagged command queueing support. 753of these helpers, see the IDE tagged command queueing support.
754 754
755Certain hardware conditions may dictate a need to invalidate the block tag
756queue. For instance, on IDE any tagged request error needs to clear both
757the hardware and software block queue and enable the driver to sanely restart
758all the outstanding requests. There's a third helper to do that:
759
760 blk_queue_invalidate_tags(struct request_queue *q)
761
762 Clear the internal block tag queue and re-add all the pending requests
763 to the request queue. The driver will receive them again on the
764 next request_fn run, just like it did the first time it encountered
765 them.
766
7673.2.5.2 Tag info 7553.2.5.2 Tag info
768 756
769Some block functions exist to query current tag status or to go from a 757Some block functions exist to query current tag status or to go from a
@@ -805,8 +793,7 @@ Internally, block manages tags in the blk_queue_tag structure:
805Most of the above is simple and straight forward, however busy_list may need 793Most of the above is simple and straight forward, however busy_list may need
806a bit of explaining. Normally we don't care too much about request ordering, 794a bit of explaining. Normally we don't care too much about request ordering,
807but in the event of any barrier requests in the tag queue we need to ensure 795but in the event of any barrier requests in the tag queue we need to ensure
808that requests are restarted in the order they were queue. This may happen 796that requests are restarted in the order they were queue.
809if the driver needs to use blk_queue_invalidate_tags().
810 797
8113.3 I/O Submission 7983.3 I/O Submission
812 799
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 70356a2a11ab..09b2ee6694fb 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -311,35 +311,6 @@ void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset,
311} 311}
312EXPORT_SYMBOL(blk_mq_tagset_busy_iter); 312EXPORT_SYMBOL(blk_mq_tagset_busy_iter);
313 313
314int blk_mq_tagset_iter(struct blk_mq_tag_set *set, void *data,
315 int (fn)(void *, struct request *))
316{
317 int i, j, ret = 0;
318
319 if (WARN_ON_ONCE(!fn))
320 goto out;
321
322 for (i = 0; i < set->nr_hw_queues; i++) {
323 struct blk_mq_tags *tags = set->tags[i];
324
325 if (!tags)
326 continue;
327
328 for (j = 0; j < tags->nr_tags; j++) {
329 if (!tags->static_rqs[j])
330 continue;
331
332 ret = fn(data, tags->static_rqs[j]);
333 if (ret)
334 goto out;
335 }
336 }
337
338out:
339 return ret;
340}
341EXPORT_SYMBOL_GPL(blk_mq_tagset_iter);
342
343void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn, 314void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn,
344 void *priv) 315 void *priv)
345{ 316{
diff --git a/block/blk-mq.c b/block/blk-mq.c
index e9da5e6a8526..70c65bb6c013 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -671,6 +671,7 @@ static void __blk_mq_requeue_request(struct request *rq)
671 671
672 if (blk_mq_request_started(rq)) { 672 if (blk_mq_request_started(rq)) {
673 WRITE_ONCE(rq->state, MQ_RQ_IDLE); 673 WRITE_ONCE(rq->state, MQ_RQ_IDLE);
674 rq->rq_flags &= ~RQF_TIMED_OUT;
674 if (q->dma_drain_size && blk_rq_bytes(rq)) 675 if (q->dma_drain_size && blk_rq_bytes(rq))
675 rq->nr_phys_segments--; 676 rq->nr_phys_segments--;
676 } 677 }
@@ -770,6 +771,7 @@ EXPORT_SYMBOL(blk_mq_tag_to_rq);
770 771
771static void blk_mq_rq_timed_out(struct request *req, bool reserved) 772static void blk_mq_rq_timed_out(struct request *req, bool reserved)
772{ 773{
774 req->rq_flags |= RQF_TIMED_OUT;
773 if (req->q->mq_ops->timeout) { 775 if (req->q->mq_ops->timeout) {
774 enum blk_eh_timer_return ret; 776 enum blk_eh_timer_return ret;
775 777
@@ -779,6 +781,7 @@ static void blk_mq_rq_timed_out(struct request *req, bool reserved)
779 WARN_ON_ONCE(ret != BLK_EH_RESET_TIMER); 781 WARN_ON_ONCE(ret != BLK_EH_RESET_TIMER);
780 } 782 }
781 783
784 req->rq_flags &= ~RQF_TIMED_OUT;
782 blk_add_timer(req); 785 blk_add_timer(req);
783} 786}
784 787
@@ -788,6 +791,8 @@ static bool blk_mq_req_expired(struct request *rq, unsigned long *next)
788 791
789 if (blk_mq_rq_state(rq) != MQ_RQ_IN_FLIGHT) 792 if (blk_mq_rq_state(rq) != MQ_RQ_IN_FLIGHT)
790 return false; 793 return false;
794 if (rq->rq_flags & RQF_TIMED_OUT)
795 return false;
791 796
792 deadline = blk_rq_deadline(rq); 797 deadline = blk_rq_deadline(rq);
793 if (time_after_eq(jiffies, deadline)) 798 if (time_after_eq(jiffies, deadline))
@@ -2349,7 +2354,6 @@ static void blk_mq_del_queue_tag_set(struct request_queue *q)
2349 2354
2350 mutex_lock(&set->tag_list_lock); 2355 mutex_lock(&set->tag_list_lock);
2351 list_del_rcu(&q->tag_set_list); 2356 list_del_rcu(&q->tag_set_list);
2352 INIT_LIST_HEAD(&q->tag_set_list);
2353 if (list_is_singular(&set->tag_list)) { 2357 if (list_is_singular(&set->tag_list)) {
2354 /* just transitioned to unshared */ 2358 /* just transitioned to unshared */
2355 set->flags &= ~BLK_MQ_F_TAG_SHARED; 2359 set->flags &= ~BLK_MQ_F_TAG_SHARED;
@@ -2357,8 +2361,8 @@ static void blk_mq_del_queue_tag_set(struct request_queue *q)
2357 blk_mq_update_tag_set_depth(set, false); 2361 blk_mq_update_tag_set_depth(set, false);
2358 } 2362 }
2359 mutex_unlock(&set->tag_list_lock); 2363 mutex_unlock(&set->tag_list_lock);
2360
2361 synchronize_rcu(); 2364 synchronize_rcu();
2365 INIT_LIST_HEAD(&q->tag_set_list);
2362} 2366}
2363 2367
2364static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set, 2368static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set,
diff --git a/block/blk-tag.c b/block/blk-tag.c
index 24b20d86bcbc..fbc153aef166 100644
--- a/block/blk-tag.c
+++ b/block/blk-tag.c
@@ -188,7 +188,6 @@ int blk_queue_init_tags(struct request_queue *q, int depth,
188 */ 188 */
189 q->queue_tags = tags; 189 q->queue_tags = tags;
190 queue_flag_set_unlocked(QUEUE_FLAG_QUEUED, q); 190 queue_flag_set_unlocked(QUEUE_FLAG_QUEUED, q);
191 INIT_LIST_HEAD(&q->tag_busy_list);
192 return 0; 191 return 0;
193} 192}
194EXPORT_SYMBOL(blk_queue_init_tags); 193EXPORT_SYMBOL(blk_queue_init_tags);
@@ -374,27 +373,6 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq)
374 rq->tag = tag; 373 rq->tag = tag;
375 bqt->tag_index[tag] = rq; 374 bqt->tag_index[tag] = rq;
376 blk_start_request(rq); 375 blk_start_request(rq);
377 list_add(&rq->queuelist, &q->tag_busy_list);
378 return 0; 376 return 0;
379} 377}
380EXPORT_SYMBOL(blk_queue_start_tag); 378EXPORT_SYMBOL(blk_queue_start_tag);
381
382/**
383 * blk_queue_invalidate_tags - invalidate all pending tags
384 * @q: the request queue for the device
385 *
386 * Description:
387 * Hardware conditions may dictate a need to stop all pending requests.
388 * In this case, we will safely clear the block side of the tag queue and
389 * readd all requests to the request queue in the right order.
390 **/
391void blk_queue_invalidate_tags(struct request_queue *q)
392{
393 struct list_head *tmp, *n;
394
395 lockdep_assert_held(q->queue_lock);
396
397 list_for_each_safe(tmp, n, &q->tag_busy_list)
398 blk_requeue_request(q, list_entry_rq(tmp));
399}
400EXPORT_SYMBOL(blk_queue_invalidate_tags);
diff --git a/block/bsg.c b/block/bsg.c
index 132e657e2d91..66602c489956 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -693,6 +693,8 @@ static struct bsg_device *bsg_add_device(struct inode *inode,
693 struct bsg_device *bd; 693 struct bsg_device *bd;
694 unsigned char buf[32]; 694 unsigned char buf[32];
695 695
696 lockdep_assert_held(&bsg_mutex);
697
696 if (!blk_get_queue(rq)) 698 if (!blk_get_queue(rq))
697 return ERR_PTR(-ENXIO); 699 return ERR_PTR(-ENXIO);
698 700
@@ -707,14 +709,12 @@ static struct bsg_device *bsg_add_device(struct inode *inode,
707 bsg_set_block(bd, file); 709 bsg_set_block(bd, file);
708 710
709 atomic_set(&bd->ref_count, 1); 711 atomic_set(&bd->ref_count, 1);
710 mutex_lock(&bsg_mutex);
711 hlist_add_head(&bd->dev_list, bsg_dev_idx_hash(iminor(inode))); 712 hlist_add_head(&bd->dev_list, bsg_dev_idx_hash(iminor(inode)));
712 713
713 strncpy(bd->name, dev_name(rq->bsg_dev.class_dev), sizeof(bd->name) - 1); 714 strncpy(bd->name, dev_name(rq->bsg_dev.class_dev), sizeof(bd->name) - 1);
714 bsg_dbg(bd, "bound to <%s>, max queue %d\n", 715 bsg_dbg(bd, "bound to <%s>, max queue %d\n",
715 format_dev_t(buf, inode->i_rdev), bd->max_queue); 716 format_dev_t(buf, inode->i_rdev), bd->max_queue);
716 717
717 mutex_unlock(&bsg_mutex);
718 return bd; 718 return bd;
719} 719}
720 720
@@ -722,7 +722,7 @@ static struct bsg_device *__bsg_get_device(int minor, struct request_queue *q)
722{ 722{
723 struct bsg_device *bd; 723 struct bsg_device *bd;
724 724
725 mutex_lock(&bsg_mutex); 725 lockdep_assert_held(&bsg_mutex);
726 726
727 hlist_for_each_entry(bd, bsg_dev_idx_hash(minor), dev_list) { 727 hlist_for_each_entry(bd, bsg_dev_idx_hash(minor), dev_list) {
728 if (bd->queue == q) { 728 if (bd->queue == q) {
@@ -732,7 +732,6 @@ static struct bsg_device *__bsg_get_device(int minor, struct request_queue *q)
732 } 732 }
733 bd = NULL; 733 bd = NULL;
734found: 734found:
735 mutex_unlock(&bsg_mutex);
736 return bd; 735 return bd;
737} 736}
738 737
@@ -746,17 +745,18 @@ static struct bsg_device *bsg_get_device(struct inode *inode, struct file *file)
746 */ 745 */
747 mutex_lock(&bsg_mutex); 746 mutex_lock(&bsg_mutex);
748 bcd = idr_find(&bsg_minor_idr, iminor(inode)); 747 bcd = idr_find(&bsg_minor_idr, iminor(inode));
749 mutex_unlock(&bsg_mutex);
750 748
751 if (!bcd) 749 if (!bcd) {
752 return ERR_PTR(-ENODEV); 750 bd = ERR_PTR(-ENODEV);
751 goto out_unlock;
752 }
753 753
754 bd = __bsg_get_device(iminor(inode), bcd->queue); 754 bd = __bsg_get_device(iminor(inode), bcd->queue);
755 if (bd) 755 if (!bd)
756 return bd; 756 bd = bsg_add_device(inode, bcd->queue, file);
757
758 bd = bsg_add_device(inode, bcd->queue, file);
759 757
758out_unlock:
759 mutex_unlock(&bsg_mutex);
760 return bd; 760 return bd;
761} 761}
762 762
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index effb1309682e..21710a7460c8 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2208,7 +2208,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id)
2208 * Verify that the subsystem actually supports multiple 2208 * Verify that the subsystem actually supports multiple
2209 * controllers, else bail out. 2209 * controllers, else bail out.
2210 */ 2210 */
2211 if (!ctrl->opts->discovery_nqn && 2211 if (!(ctrl->opts && ctrl->opts->discovery_nqn) &&
2212 nvme_active_ctrls(found) && !(id->cmic & (1 << 1))) { 2212 nvme_active_ctrls(found) && !(id->cmic & (1 << 1))) {
2213 dev_err(ctrl->device, 2213 dev_err(ctrl->device,
2214 "ignoring ctrl due to duplicate subnqn (%s).\n", 2214 "ignoring ctrl due to duplicate subnqn (%s).\n",
@@ -3197,40 +3197,28 @@ static void nvme_scan_ns_sequential(struct nvme_ctrl *ctrl, unsigned nn)
3197 nvme_remove_invalid_namespaces(ctrl, nn); 3197 nvme_remove_invalid_namespaces(ctrl, nn);
3198} 3198}
3199 3199
3200static bool nvme_scan_changed_ns_log(struct nvme_ctrl *ctrl) 3200static void nvme_clear_changed_ns_log(struct nvme_ctrl *ctrl)
3201{ 3201{
3202 size_t log_size = NVME_MAX_CHANGED_NAMESPACES * sizeof(__le32); 3202 size_t log_size = NVME_MAX_CHANGED_NAMESPACES * sizeof(__le32);
3203 __le32 *log; 3203 __le32 *log;
3204 int error, i; 3204 int error;
3205 bool ret = false;
3206 3205
3207 log = kzalloc(log_size, GFP_KERNEL); 3206 log = kzalloc(log_size, GFP_KERNEL);
3208 if (!log) 3207 if (!log)
3209 return false; 3208 return;
3210 3209
3210 /*
3211 * We need to read the log to clear the AEN, but we don't want to rely
3212 * on it for the changed namespace information as userspace could have
3213 * raced with us in reading the log page, which could cause us to miss
3214 * updates.
3215 */
3211 error = nvme_get_log(ctrl, NVME_LOG_CHANGED_NS, log, log_size); 3216 error = nvme_get_log(ctrl, NVME_LOG_CHANGED_NS, log, log_size);
3212 if (error) { 3217 if (error)
3213 dev_warn(ctrl->device, 3218 dev_warn(ctrl->device,
3214 "reading changed ns log failed: %d\n", error); 3219 "reading changed ns log failed: %d\n", error);
3215 goto out_free_log;
3216 }
3217
3218 if (log[0] == cpu_to_le32(0xffffffff))
3219 goto out_free_log;
3220
3221 for (i = 0; i < NVME_MAX_CHANGED_NAMESPACES; i++) {
3222 u32 nsid = le32_to_cpu(log[i]);
3223 3220
3224 if (nsid == 0)
3225 break;
3226 dev_info(ctrl->device, "rescanning namespace %d.\n", nsid);
3227 nvme_validate_ns(ctrl, nsid);
3228 }
3229 ret = true;
3230
3231out_free_log:
3232 kfree(log); 3221 kfree(log);
3233 return ret;
3234} 3222}
3235 3223
3236static void nvme_scan_work(struct work_struct *work) 3224static void nvme_scan_work(struct work_struct *work)
@@ -3246,9 +3234,8 @@ static void nvme_scan_work(struct work_struct *work)
3246 WARN_ON_ONCE(!ctrl->tagset); 3234 WARN_ON_ONCE(!ctrl->tagset);
3247 3235
3248 if (test_and_clear_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events)) { 3236 if (test_and_clear_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events)) {
3249 if (nvme_scan_changed_ns_log(ctrl))
3250 goto out_sort_namespaces;
3251 dev_info(ctrl->device, "rescanning namespaces.\n"); 3237 dev_info(ctrl->device, "rescanning namespaces.\n");
3238 nvme_clear_changed_ns_log(ctrl);
3252 } 3239 }
3253 3240
3254 if (nvme_identify_ctrl(ctrl, &id)) 3241 if (nvme_identify_ctrl(ctrl, &id))
@@ -3263,7 +3250,6 @@ static void nvme_scan_work(struct work_struct *work)
3263 nvme_scan_ns_sequential(ctrl, nn); 3250 nvme_scan_ns_sequential(ctrl, nn);
3264out_free_id: 3251out_free_id:
3265 kfree(id); 3252 kfree(id);
3266out_sort_namespaces:
3267 down_write(&ctrl->namespaces_rwsem); 3253 down_write(&ctrl->namespaces_rwsem);
3268 list_sort(NULL, &ctrl->namespaces, ns_cmp); 3254 list_sort(NULL, &ctrl->namespaces, ns_cmp);
3269 up_write(&ctrl->namespaces_rwsem); 3255 up_write(&ctrl->namespaces_rwsem);
@@ -3641,16 +3627,6 @@ void nvme_start_queues(struct nvme_ctrl *ctrl)
3641} 3627}
3642EXPORT_SYMBOL_GPL(nvme_start_queues); 3628EXPORT_SYMBOL_GPL(nvme_start_queues);
3643 3629
3644int nvme_reinit_tagset(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set)
3645{
3646 if (!ctrl->ops->reinit_request)
3647 return 0;
3648
3649 return blk_mq_tagset_iter(set, set->driver_data,
3650 ctrl->ops->reinit_request);
3651}
3652EXPORT_SYMBOL_GPL(nvme_reinit_tagset);
3653
3654int __init nvme_core_init(void) 3630int __init nvme_core_init(void)
3655{ 3631{
3656 int result = -ENOMEM; 3632 int result = -ENOMEM;
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c
index fa32c1216409..903eb4545e26 100644
--- a/drivers/nvme/host/fabrics.c
+++ b/drivers/nvme/host/fabrics.c
@@ -536,67 +536,55 @@ static struct nvmf_transport_ops *nvmf_lookup_transport(
536 return NULL; 536 return NULL;
537} 537}
538 538
539blk_status_t nvmf_check_if_ready(struct nvme_ctrl *ctrl, struct request *rq, 539/*
540 bool queue_live, bool is_connected) 540 * For something we're not in a state to send to the device the default action
541 * is to busy it and retry it after the controller state is recovered. However,
542 * anything marked for failfast or nvme multipath is immediately failed.
543 *
544 * Note: commands used to initialize the controller will be marked for failfast.
545 * Note: nvme cli/ioctl commands are marked for failfast.
546 */
547blk_status_t nvmf_fail_nonready_command(struct request *rq)
541{ 548{
542 struct nvme_command *cmd = nvme_req(rq)->cmd; 549 if (!blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
550 return BLK_STS_RESOURCE;
551 nvme_req(rq)->status = NVME_SC_ABORT_REQ;
552 return BLK_STS_IOERR;
553}
554EXPORT_SYMBOL_GPL(nvmf_fail_nonready_command);
543 555
544 if (likely(ctrl->state == NVME_CTRL_LIVE && is_connected)) 556bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
545 return BLK_STS_OK; 557 bool queue_live)
558{
559 struct nvme_request *req = nvme_req(rq);
560
561 /*
562 * If we are in some state of setup or teardown only allow
563 * internally generated commands.
564 */
565 if (!blk_rq_is_passthrough(rq) || (req->flags & NVME_REQ_USERCMD))
566 return false;
546 567
568 /*
569 * Only allow commands on a live queue, except for the connect command,
570 * which is require to set the queue live in the appropinquate states.
571 */
547 switch (ctrl->state) { 572 switch (ctrl->state) {
548 case NVME_CTRL_NEW: 573 case NVME_CTRL_NEW:
549 case NVME_CTRL_CONNECTING: 574 case NVME_CTRL_CONNECTING:
550 case NVME_CTRL_DELETING: 575 if (req->cmd->common.opcode == nvme_fabrics_command &&
551 /* 576 req->cmd->fabrics.fctype == nvme_fabrics_type_connect)
552 * This is the case of starting a new or deleting an association 577 return true;
553 * but connectivity was lost before it was fully created or torn
554 * down. We need to error the commands used to initialize the
555 * controller so the reconnect can go into a retry attempt. The
556 * commands should all be marked REQ_FAILFAST_DRIVER, which will
557 * hit the reject path below. Anything else will be queued while
558 * the state settles.
559 */
560 if (!is_connected)
561 break;
562
563 /*
564 * If queue is live, allow only commands that are internally
565 * generated pass through. These are commands on the admin
566 * queue to initialize the controller. This will reject any
567 * ioctl admin cmds received while initializing.
568 */
569 if (queue_live && !(nvme_req(rq)->flags & NVME_REQ_USERCMD))
570 return BLK_STS_OK;
571
572 /*
573 * If the queue is not live, allow only a connect command. This
574 * will reject any ioctl admin cmd as well as initialization
575 * commands if the controller reverted the queue to non-live.
576 */
577 if (!queue_live && blk_rq_is_passthrough(rq) &&
578 cmd->common.opcode == nvme_fabrics_command &&
579 cmd->fabrics.fctype == nvme_fabrics_type_connect)
580 return BLK_STS_OK;
581 break; 578 break;
582 default: 579 default:
583 break; 580 break;
581 case NVME_CTRL_DEAD:
582 return false;
584 } 583 }
585 584
586 /* 585 return queue_live;
587 * Any other new io is something we're not in a state to send to the
588 * device. Default action is to busy it and retry it after the
589 * controller state is recovered. However, anything marked for failfast
590 * or nvme multipath is immediately failed. Note: commands used to
591 * initialize the controller will be marked for failfast.
592 * Note: nvme cli/ioctl commands are marked for failfast.
593 */
594 if (!blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
595 return BLK_STS_RESOURCE;
596 nvme_req(rq)->status = NVME_SC_ABORT_REQ;
597 return BLK_STS_IOERR;
598} 586}
599EXPORT_SYMBOL_GPL(nvmf_check_if_ready); 587EXPORT_SYMBOL_GPL(__nvmf_check_ready);
600 588
601static const match_table_t opt_tokens = { 589static const match_table_t opt_tokens = {
602 { NVMF_OPT_TRANSPORT, "transport=%s" }, 590 { NVMF_OPT_TRANSPORT, "transport=%s" },
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index 7491a0bbf711..e1818a27aa2d 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -162,7 +162,17 @@ void nvmf_unregister_transport(struct nvmf_transport_ops *ops);
162void nvmf_free_options(struct nvmf_ctrl_options *opts); 162void nvmf_free_options(struct nvmf_ctrl_options *opts);
163int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size); 163int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size);
164bool nvmf_should_reconnect(struct nvme_ctrl *ctrl); 164bool nvmf_should_reconnect(struct nvme_ctrl *ctrl);
165blk_status_t nvmf_check_if_ready(struct nvme_ctrl *ctrl, 165blk_status_t nvmf_fail_nonready_command(struct request *rq);
166 struct request *rq, bool queue_live, bool is_connected); 166bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
167 bool queue_live);
168
169static inline bool nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
170 bool queue_live)
171{
172 if (likely(ctrl->state == NVME_CTRL_LIVE ||
173 ctrl->state == NVME_CTRL_ADMIN_ONLY))
174 return true;
175 return __nvmf_check_ready(ctrl, rq, queue_live);
176}
167 177
168#endif /* _NVME_FABRICS_H */ 178#endif /* _NVME_FABRICS_H */
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 0bad65803271..b528a2f5826c 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -142,6 +142,7 @@ struct nvme_fc_ctrl {
142 struct nvme_fc_rport *rport; 142 struct nvme_fc_rport *rport;
143 u32 cnum; 143 u32 cnum;
144 144
145 bool ioq_live;
145 bool assoc_active; 146 bool assoc_active;
146 u64 association_id; 147 u64 association_id;
147 148
@@ -1470,21 +1471,6 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl)
1470 1471
1471static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg); 1472static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg);
1472 1473
1473static int
1474nvme_fc_reinit_request(void *data, struct request *rq)
1475{
1476 struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq);
1477 struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu;
1478
1479 memset(cmdiu, 0, sizeof(*cmdiu));
1480 cmdiu->scsi_id = NVME_CMD_SCSI_ID;
1481 cmdiu->fc_id = NVME_CMD_FC_ID;
1482 cmdiu->iu_len = cpu_to_be16(sizeof(*cmdiu) / sizeof(u32));
1483 memset(&op->rsp_iu, 0, sizeof(op->rsp_iu));
1484
1485 return 0;
1486}
1487
1488static void 1474static void
1489__nvme_fc_exit_request(struct nvme_fc_ctrl *ctrl, 1475__nvme_fc_exit_request(struct nvme_fc_ctrl *ctrl,
1490 struct nvme_fc_fcp_op *op) 1476 struct nvme_fc_fcp_op *op)
@@ -1893,6 +1879,7 @@ nvme_fc_free_queue(struct nvme_fc_queue *queue)
1893 */ 1879 */
1894 1880
1895 queue->connection_id = 0; 1881 queue->connection_id = 0;
1882 atomic_set(&queue->csn, 1);
1896} 1883}
1897 1884
1898static void 1885static void
@@ -2279,14 +2266,13 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,
2279 struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; 2266 struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu;
2280 struct nvme_command *sqe = &cmdiu->sqe; 2267 struct nvme_command *sqe = &cmdiu->sqe;
2281 enum nvmefc_fcp_datadir io_dir; 2268 enum nvmefc_fcp_datadir io_dir;
2269 bool queue_ready = test_bit(NVME_FC_Q_LIVE, &queue->flags);
2282 u32 data_len; 2270 u32 data_len;
2283 blk_status_t ret; 2271 blk_status_t ret;
2284 2272
2285 ret = nvmf_check_if_ready(&queue->ctrl->ctrl, rq, 2273 if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE ||
2286 test_bit(NVME_FC_Q_LIVE, &queue->flags), 2274 !nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
2287 ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE); 2275 return nvmf_fail_nonready_command(rq);
2288 if (unlikely(ret))
2289 return ret;
2290 2276
2291 ret = nvme_setup_cmd(ns, rq, sqe); 2277 ret = nvme_setup_cmd(ns, rq, sqe);
2292 if (ret) 2278 if (ret)
@@ -2463,6 +2449,8 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
2463 if (ret) 2449 if (ret)
2464 goto out_delete_hw_queues; 2450 goto out_delete_hw_queues;
2465 2451
2452 ctrl->ioq_live = true;
2453
2466 return 0; 2454 return 0;
2467 2455
2468out_delete_hw_queues: 2456out_delete_hw_queues:
@@ -2480,7 +2468,7 @@ out_free_tag_set:
2480} 2468}
2481 2469
2482static int 2470static int
2483nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl) 2471nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl)
2484{ 2472{
2485 struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; 2473 struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
2486 unsigned int nr_io_queues; 2474 unsigned int nr_io_queues;
@@ -2500,12 +2488,6 @@ nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl)
2500 if (ctrl->ctrl.queue_count == 1) 2488 if (ctrl->ctrl.queue_count == 1)
2501 return 0; 2489 return 0;
2502 2490
2503 nvme_fc_init_io_queues(ctrl);
2504
2505 ret = nvme_reinit_tagset(&ctrl->ctrl, ctrl->ctrl.tagset);
2506 if (ret)
2507 goto out_free_io_queues;
2508
2509 ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1); 2491 ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1);
2510 if (ret) 2492 if (ret)
2511 goto out_free_io_queues; 2493 goto out_free_io_queues;
@@ -2603,8 +2585,6 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
2603 * Create the admin queue 2585 * Create the admin queue
2604 */ 2586 */
2605 2587
2606 nvme_fc_init_queue(ctrl, 0);
2607
2608 ret = __nvme_fc_create_hw_queue(ctrl, &ctrl->queues[0], 0, 2588 ret = __nvme_fc_create_hw_queue(ctrl, &ctrl->queues[0], 0,
2609 NVME_AQ_DEPTH); 2589 NVME_AQ_DEPTH);
2610 if (ret) 2590 if (ret)
@@ -2615,8 +2595,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
2615 if (ret) 2595 if (ret)
2616 goto out_delete_hw_queue; 2596 goto out_delete_hw_queue;
2617 2597
2618 if (ctrl->ctrl.state != NVME_CTRL_NEW) 2598 blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
2619 blk_mq_unquiesce_queue(ctrl->ctrl.admin_q);
2620 2599
2621 ret = nvmf_connect_admin_queue(&ctrl->ctrl); 2600 ret = nvmf_connect_admin_queue(&ctrl->ctrl);
2622 if (ret) 2601 if (ret)
@@ -2689,10 +2668,10 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
2689 */ 2668 */
2690 2669
2691 if (ctrl->ctrl.queue_count > 1) { 2670 if (ctrl->ctrl.queue_count > 1) {
2692 if (ctrl->ctrl.state == NVME_CTRL_NEW) 2671 if (!ctrl->ioq_live)
2693 ret = nvme_fc_create_io_queues(ctrl); 2672 ret = nvme_fc_create_io_queues(ctrl);
2694 else 2673 else
2695 ret = nvme_fc_reinit_io_queues(ctrl); 2674 ret = nvme_fc_recreate_io_queues(ctrl);
2696 if (ret) 2675 if (ret)
2697 goto out_term_aen_ops; 2676 goto out_term_aen_ops;
2698 } 2677 }
@@ -2776,8 +2755,7 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl)
2776 * use blk_mq_tagset_busy_itr() and the transport routine to 2755 * use blk_mq_tagset_busy_itr() and the transport routine to
2777 * terminate the exchanges. 2756 * terminate the exchanges.
2778 */ 2757 */
2779 if (ctrl->ctrl.state != NVME_CTRL_NEW) 2758 blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
2780 blk_mq_quiesce_queue(ctrl->ctrl.admin_q);
2781 blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, 2759 blk_mq_tagset_busy_iter(&ctrl->admin_tag_set,
2782 nvme_fc_terminate_exchange, &ctrl->ctrl); 2760 nvme_fc_terminate_exchange, &ctrl->ctrl);
2783 2761
@@ -2917,7 +2895,6 @@ static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
2917 .submit_async_event = nvme_fc_submit_async_event, 2895 .submit_async_event = nvme_fc_submit_async_event,
2918 .delete_ctrl = nvme_fc_delete_ctrl, 2896 .delete_ctrl = nvme_fc_delete_ctrl,
2919 .get_address = nvmf_get_address, 2897 .get_address = nvmf_get_address,
2920 .reinit_request = nvme_fc_reinit_request,
2921}; 2898};
2922 2899
2923static void 2900static void
@@ -2934,7 +2911,7 @@ nvme_fc_connect_ctrl_work(struct work_struct *work)
2934 nvme_fc_reconnect_or_delete(ctrl, ret); 2911 nvme_fc_reconnect_or_delete(ctrl, ret);
2935 else 2912 else
2936 dev_info(ctrl->ctrl.device, 2913 dev_info(ctrl->ctrl.device,
2937 "NVME-FC{%d}: controller reconnect complete\n", 2914 "NVME-FC{%d}: controller connect complete\n",
2938 ctrl->cnum); 2915 ctrl->cnum);
2939} 2916}
2940 2917
@@ -2982,7 +2959,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
2982{ 2959{
2983 struct nvme_fc_ctrl *ctrl; 2960 struct nvme_fc_ctrl *ctrl;
2984 unsigned long flags; 2961 unsigned long flags;
2985 int ret, idx, retry; 2962 int ret, idx;
2986 2963
2987 if (!(rport->remoteport.port_role & 2964 if (!(rport->remoteport.port_role &
2988 (FC_PORT_ROLE_NVME_DISCOVERY | FC_PORT_ROLE_NVME_TARGET))) { 2965 (FC_PORT_ROLE_NVME_DISCOVERY | FC_PORT_ROLE_NVME_TARGET))) {
@@ -3009,11 +2986,13 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
3009 } 2986 }
3010 2987
3011 ctrl->ctrl.opts = opts; 2988 ctrl->ctrl.opts = opts;
2989 ctrl->ctrl.nr_reconnects = 0;
3012 INIT_LIST_HEAD(&ctrl->ctrl_list); 2990 INIT_LIST_HEAD(&ctrl->ctrl_list);
3013 ctrl->lport = lport; 2991 ctrl->lport = lport;
3014 ctrl->rport = rport; 2992 ctrl->rport = rport;
3015 ctrl->dev = lport->dev; 2993 ctrl->dev = lport->dev;
3016 ctrl->cnum = idx; 2994 ctrl->cnum = idx;
2995 ctrl->ioq_live = false;
3017 ctrl->assoc_active = false; 2996 ctrl->assoc_active = false;
3018 init_waitqueue_head(&ctrl->ioabort_wait); 2997 init_waitqueue_head(&ctrl->ioabort_wait);
3019 2998
@@ -3032,6 +3011,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
3032 3011
3033 ctrl->ctrl.sqsize = opts->queue_size - 1; 3012 ctrl->ctrl.sqsize = opts->queue_size - 1;
3034 ctrl->ctrl.kato = opts->kato; 3013 ctrl->ctrl.kato = opts->kato;
3014 ctrl->ctrl.cntlid = 0xffff;
3035 3015
3036 ret = -ENOMEM; 3016 ret = -ENOMEM;
3037 ctrl->queues = kcalloc(ctrl->ctrl.queue_count, 3017 ctrl->queues = kcalloc(ctrl->ctrl.queue_count,
@@ -3039,6 +3019,8 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
3039 if (!ctrl->queues) 3019 if (!ctrl->queues)
3040 goto out_free_ida; 3020 goto out_free_ida;
3041 3021
3022 nvme_fc_init_queue(ctrl, 0);
3023
3042 memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set)); 3024 memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set));
3043 ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops; 3025 ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops;
3044 ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH; 3026 ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH;
@@ -3081,62 +3063,24 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
3081 list_add_tail(&ctrl->ctrl_list, &rport->ctrl_list); 3063 list_add_tail(&ctrl->ctrl_list, &rport->ctrl_list);
3082 spin_unlock_irqrestore(&rport->lock, flags); 3064 spin_unlock_irqrestore(&rport->lock, flags);
3083 3065
3084 /* 3066 if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING) ||
3085 * It's possible that transactions used to create the association 3067 !nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) {
3086 * may fail. Examples: CreateAssociation LS or CreateIOConnection
3087 * LS gets dropped/corrupted/fails; or a frame gets dropped or a
3088 * command times out for one of the actions to init the controller
3089 * (Connect, Get/Set_Property, Set_Features, etc). Many of these
3090 * transport errors (frame drop, LS failure) inherently must kill
3091 * the association. The transport is coded so that any command used
3092 * to create the association (prior to a LIVE state transition
3093 * while NEW or CONNECTING) will fail if it completes in error or
3094 * times out.
3095 *
3096 * As such: as the connect request was mostly likely due to a
3097 * udev event that discovered the remote port, meaning there is
3098 * not an admin or script there to restart if the connect
3099 * request fails, retry the initial connection creation up to
3100 * three times before giving up and declaring failure.
3101 */
3102 for (retry = 0; retry < 3; retry++) {
3103 ret = nvme_fc_create_association(ctrl);
3104 if (!ret)
3105 break;
3106 }
3107
3108 if (ret) {
3109 nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING);
3110 cancel_work_sync(&ctrl->ctrl.reset_work);
3111 cancel_delayed_work_sync(&ctrl->connect_work);
3112
3113 /* couldn't schedule retry - fail out */
3114 dev_err(ctrl->ctrl.device, 3068 dev_err(ctrl->ctrl.device,
3115 "NVME-FC{%d}: Connect retry failed\n", ctrl->cnum); 3069 "NVME-FC{%d}: failed to init ctrl state\n", ctrl->cnum);
3116 3070 goto fail_ctrl;
3117 ctrl->ctrl.opts = NULL; 3071 }
3118 3072
3119 /* initiate nvme ctrl ref counting teardown */ 3073 nvme_get_ctrl(&ctrl->ctrl);
3120 nvme_uninit_ctrl(&ctrl->ctrl);
3121 3074
3122 /* Remove core ctrl ref. */ 3075 if (!queue_delayed_work(nvme_wq, &ctrl->connect_work, 0)) {
3123 nvme_put_ctrl(&ctrl->ctrl); 3076 nvme_put_ctrl(&ctrl->ctrl);
3124 3077 dev_err(ctrl->ctrl.device,
3125 /* as we're past the point where we transition to the ref 3078 "NVME-FC{%d}: failed to schedule initial connect\n",
3126 * counting teardown path, if we return a bad pointer here, 3079 ctrl->cnum);
3127 * the calling routine, thinking it's prior to the 3080 goto fail_ctrl;
3128 * transition, will do an rport put. Since the teardown
3129 * path also does a rport put, we do an extra get here to
3130 * so proper order/teardown happens.
3131 */
3132 nvme_fc_rport_get(rport);
3133
3134 if (ret > 0)
3135 ret = -EIO;
3136 return ERR_PTR(ret);
3137 } 3081 }
3138 3082
3139 nvme_get_ctrl(&ctrl->ctrl); 3083 flush_delayed_work(&ctrl->connect_work);
3140 3084
3141 dev_info(ctrl->ctrl.device, 3085 dev_info(ctrl->ctrl.device,
3142 "NVME-FC{%d}: new ctrl: NQN \"%s\"\n", 3086 "NVME-FC{%d}: new ctrl: NQN \"%s\"\n",
@@ -3144,6 +3088,30 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
3144 3088
3145 return &ctrl->ctrl; 3089 return &ctrl->ctrl;
3146 3090
3091fail_ctrl:
3092 nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING);
3093 cancel_work_sync(&ctrl->ctrl.reset_work);
3094 cancel_delayed_work_sync(&ctrl->connect_work);
3095
3096 ctrl->ctrl.opts = NULL;
3097
3098 /* initiate nvme ctrl ref counting teardown */
3099 nvme_uninit_ctrl(&ctrl->ctrl);
3100
3101 /* Remove core ctrl ref. */
3102 nvme_put_ctrl(&ctrl->ctrl);
3103
3104 /* as we're past the point where we transition to the ref
3105 * counting teardown path, if we return a bad pointer here,
3106 * the calling routine, thinking it's prior to the
3107 * transition, will do an rport put. Since the teardown
3108 * path also does a rport put, we do an extra get here to
3109 * so proper order/teardown happens.
3110 */
3111 nvme_fc_rport_get(rport);
3112
3113 return ERR_PTR(-EIO);
3114
3147out_cleanup_admin_q: 3115out_cleanup_admin_q:
3148 blk_cleanup_queue(ctrl->ctrl.admin_q); 3116 blk_cleanup_queue(ctrl->ctrl.admin_q);
3149out_free_admin_tag_set: 3117out_free_admin_tag_set:
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index d7b664ae5923..1ffd3e8b13a1 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -12,6 +12,7 @@
12 */ 12 */
13 13
14#include <linux/moduleparam.h> 14#include <linux/moduleparam.h>
15#include <trace/events/block.h>
15#include "nvme.h" 16#include "nvme.h"
16 17
17static bool multipath = true; 18static bool multipath = true;
@@ -111,6 +112,9 @@ static blk_qc_t nvme_ns_head_make_request(struct request_queue *q,
111 if (likely(ns)) { 112 if (likely(ns)) {
112 bio->bi_disk = ns->disk; 113 bio->bi_disk = ns->disk;
113 bio->bi_opf |= REQ_NVME_MPATH; 114 bio->bi_opf |= REQ_NVME_MPATH;
115 trace_block_bio_remap(bio->bi_disk->queue, bio,
116 disk_devt(ns->head->disk),
117 bio->bi_iter.bi_sector);
114 ret = direct_make_request(bio); 118 ret = direct_make_request(bio);
115 } else if (!list_empty_careful(&head->list)) { 119 } else if (!list_empty_careful(&head->list)) {
116 dev_warn_ratelimited(dev, "no path available - requeuing I/O\n"); 120 dev_warn_ratelimited(dev, "no path available - requeuing I/O\n");
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 34df07d44f80..231807cbc849 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -321,7 +321,6 @@ struct nvme_ctrl_ops {
321 void (*submit_async_event)(struct nvme_ctrl *ctrl); 321 void (*submit_async_event)(struct nvme_ctrl *ctrl);
322 void (*delete_ctrl)(struct nvme_ctrl *ctrl); 322 void (*delete_ctrl)(struct nvme_ctrl *ctrl);
323 int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size); 323 int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size);
324 int (*reinit_request)(void *data, struct request *rq);
325 void (*stop_ctrl)(struct nvme_ctrl *ctrl); 324 void (*stop_ctrl)(struct nvme_ctrl *ctrl);
326}; 325};
327 326
@@ -416,7 +415,6 @@ void nvme_unfreeze(struct nvme_ctrl *ctrl);
416void nvme_wait_freeze(struct nvme_ctrl *ctrl); 415void nvme_wait_freeze(struct nvme_ctrl *ctrl);
417void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout); 416void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout);
418void nvme_start_freeze(struct nvme_ctrl *ctrl); 417void nvme_start_freeze(struct nvme_ctrl *ctrl);
419int nvme_reinit_tagset(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set);
420 418
421#define NVME_QID_ANY -1 419#define NVME_QID_ANY -1
422struct request *nvme_alloc_request(struct request_queue *q, 420struct request *nvme_alloc_request(struct request_queue *q,
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 2aba03876d84..c9424da0d23e 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -1189,21 +1189,38 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
1189 count = ib_dma_map_sg(ibdev, req->sg_table.sgl, req->nents, 1189 count = ib_dma_map_sg(ibdev, req->sg_table.sgl, req->nents,
1190 rq_data_dir(rq) == WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE); 1190 rq_data_dir(rq) == WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
1191 if (unlikely(count <= 0)) { 1191 if (unlikely(count <= 0)) {
1192 sg_free_table_chained(&req->sg_table, true); 1192 ret = -EIO;
1193 return -EIO; 1193 goto out_free_table;
1194 } 1194 }
1195 1195
1196 if (count == 1) { 1196 if (count == 1) {
1197 if (rq_data_dir(rq) == WRITE && nvme_rdma_queue_idx(queue) && 1197 if (rq_data_dir(rq) == WRITE && nvme_rdma_queue_idx(queue) &&
1198 blk_rq_payload_bytes(rq) <= 1198 blk_rq_payload_bytes(rq) <=
1199 nvme_rdma_inline_data_size(queue)) 1199 nvme_rdma_inline_data_size(queue)) {
1200 return nvme_rdma_map_sg_inline(queue, req, c); 1200 ret = nvme_rdma_map_sg_inline(queue, req, c);
1201 goto out;
1202 }
1201 1203
1202 if (dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) 1204 if (dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) {
1203 return nvme_rdma_map_sg_single(queue, req, c); 1205 ret = nvme_rdma_map_sg_single(queue, req, c);
1206 goto out;
1207 }
1204 } 1208 }
1205 1209
1206 return nvme_rdma_map_sg_fr(queue, req, c, count); 1210 ret = nvme_rdma_map_sg_fr(queue, req, c, count);
1211out:
1212 if (unlikely(ret))
1213 goto out_unmap_sg;
1214
1215 return 0;
1216
1217out_unmap_sg:
1218 ib_dma_unmap_sg(ibdev, req->sg_table.sgl,
1219 req->nents, rq_data_dir(rq) ==
1220 WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
1221out_free_table:
1222 sg_free_table_chained(&req->sg_table, true);
1223 return ret;
1207} 1224}
1208 1225
1209static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc) 1226static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc)
@@ -1613,15 +1630,14 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
1613 struct nvme_rdma_qe *sqe = &req->sqe; 1630 struct nvme_rdma_qe *sqe = &req->sqe;
1614 struct nvme_command *c = sqe->data; 1631 struct nvme_command *c = sqe->data;
1615 struct ib_device *dev; 1632 struct ib_device *dev;
1633 bool queue_ready = test_bit(NVME_RDMA_Q_LIVE, &queue->flags);
1616 blk_status_t ret; 1634 blk_status_t ret;
1617 int err; 1635 int err;
1618 1636
1619 WARN_ON_ONCE(rq->tag < 0); 1637 WARN_ON_ONCE(rq->tag < 0);
1620 1638
1621 ret = nvmf_check_if_ready(&queue->ctrl->ctrl, rq, 1639 if (!nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
1622 test_bit(NVME_RDMA_Q_LIVE, &queue->flags), true); 1640 return nvmf_fail_nonready_command(rq);
1623 if (unlikely(ret))
1624 return ret;
1625 1641
1626 dev = queue->device->dev; 1642 dev = queue->device->dev;
1627 ib_dma_sync_single_for_cpu(dev, sqe->dma, 1643 ib_dma_sync_single_for_cpu(dev, sqe->dma,
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 962532842769..38803576d5e1 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -119,9 +119,11 @@ static void nvmet_execute_get_log_page_smart(struct nvmet_req *req)
119 else 119 else
120 status = nvmet_get_smart_log_nsid(req, log); 120 status = nvmet_get_smart_log_nsid(req, log);
121 if (status) 121 if (status)
122 goto out; 122 goto out_free_log;
123 123
124 status = nvmet_copy_to_sgl(req, 0, log, sizeof(*log)); 124 status = nvmet_copy_to_sgl(req, 0, log, sizeof(*log));
125out_free_log:
126 kfree(log);
125out: 127out:
126 nvmet_req_complete(req, status); 128 nvmet_req_complete(req, status);
127} 129}
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 1304ec3a7ede..d8d91f04bd7e 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -158,12 +158,11 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
158 struct nvme_loop_queue *queue = hctx->driver_data; 158 struct nvme_loop_queue *queue = hctx->driver_data;
159 struct request *req = bd->rq; 159 struct request *req = bd->rq;
160 struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req); 160 struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req);
161 bool queue_ready = test_bit(NVME_LOOP_Q_LIVE, &queue->flags);
161 blk_status_t ret; 162 blk_status_t ret;
162 163
163 ret = nvmf_check_if_ready(&queue->ctrl->ctrl, req, 164 if (!nvmf_check_ready(&queue->ctrl->ctrl, req, queue_ready))
164 test_bit(NVME_LOOP_Q_LIVE, &queue->flags), true); 165 return nvmf_fail_nonready_command(req);
165 if (unlikely(ret))
166 return ret;
167 166
168 ret = nvme_setup_cmd(ns, req, &iod->cmd); 167 ret = nvme_setup_cmd(ns, req, &iod->cmd);
169 if (ret) 168 if (ret)
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index fb355173f3c7..e3147eb74222 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -281,8 +281,6 @@ void blk_freeze_queue_start(struct request_queue *q);
281void blk_mq_freeze_queue_wait(struct request_queue *q); 281void blk_mq_freeze_queue_wait(struct request_queue *q);
282int blk_mq_freeze_queue_wait_timeout(struct request_queue *q, 282int blk_mq_freeze_queue_wait_timeout(struct request_queue *q,
283 unsigned long timeout); 283 unsigned long timeout);
284int blk_mq_tagset_iter(struct blk_mq_tag_set *set, void *data,
285 int (reinit_request)(void *, struct request *));
286 284
287int blk_mq_map_queues(struct blk_mq_tag_set *set); 285int blk_mq_map_queues(struct blk_mq_tag_set *set);
288void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues); 286void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index bca3a92eb55f..9154570edf29 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -127,6 +127,8 @@ typedef __u32 __bitwise req_flags_t;
127#define RQF_ZONE_WRITE_LOCKED ((__force req_flags_t)(1 << 19)) 127#define RQF_ZONE_WRITE_LOCKED ((__force req_flags_t)(1 << 19))
128/* already slept for hybrid poll */ 128/* already slept for hybrid poll */
129#define RQF_MQ_POLL_SLEPT ((__force req_flags_t)(1 << 20)) 129#define RQF_MQ_POLL_SLEPT ((__force req_flags_t)(1 << 20))
130/* ->timeout has been called, don't expire again */
131#define RQF_TIMED_OUT ((__force req_flags_t)(1 << 21))
130 132
131/* flags that prevent us from merging requests: */ 133/* flags that prevent us from merging requests: */
132#define RQF_NOMERGE_FLAGS \ 134#define RQF_NOMERGE_FLAGS \
@@ -560,7 +562,6 @@ struct request_queue {
560 unsigned int dma_alignment; 562 unsigned int dma_alignment;
561 563
562 struct blk_queue_tag *queue_tags; 564 struct blk_queue_tag *queue_tags;
563 struct list_head tag_busy_list;
564 565
565 unsigned int nr_sorted; 566 unsigned int nr_sorted;
566 unsigned int in_flight[2]; 567 unsigned int in_flight[2];
@@ -1373,7 +1374,6 @@ extern void blk_queue_end_tag(struct request_queue *, struct request *);
1373extern int blk_queue_init_tags(struct request_queue *, int, struct blk_queue_tag *, int); 1374extern int blk_queue_init_tags(struct request_queue *, int, struct blk_queue_tag *, int);
1374extern void blk_queue_free_tags(struct request_queue *); 1375extern void blk_queue_free_tags(struct request_queue *);
1375extern int blk_queue_resize_tags(struct request_queue *, int); 1376extern int blk_queue_resize_tags(struct request_queue *, int);
1376extern void blk_queue_invalidate_tags(struct request_queue *);
1377extern struct blk_queue_tag *blk_init_tags(int, int); 1377extern struct blk_queue_tag *blk_init_tags(int, int);
1378extern void blk_free_tags(struct blk_queue_tag *); 1378extern void blk_free_tags(struct blk_queue_tag *);
1379 1379