diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-06-16 16:37:55 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-06-16 16:37:55 -0400 |
commit | 265c5596da61a5467b6b3bfbd9ba637f867d3927 (patch) | |
tree | 92eac309ddb4bc97afef3c2586db1fa32d280073 | |
parent | 5e7b9212a4a887f42221376445df52cd5991d100 (diff) | |
parent | d6c73964f1e2a07f75057fb32ae46f6599036f93 (diff) |
Merge tag 'for-linus-20180616' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe:
"A collection of fixes that should go into -rc1. This contains:
- bsg_open vs bsg_unregister race fix (Anatoliy)
- NVMe pull request from Christoph, with fixes for regressions in
this window, FC connect/reconnect path code unification, and a
trace point addition.
- timeout fix (Christoph)
- remove a few unused functions (Christoph)
- blk-mq tag_set reinit fix (Roman)"
* tag 'for-linus-20180616' of git://git.kernel.dk/linux-block:
bsg: fix race of bsg_open and bsg_unregister
block: remov blk_queue_invalidate_tags
nvme-fabrics: fix and refine state checks in __nvmf_check_ready
nvme-fabrics: handle the admin-only case properly in nvmf_check_ready
nvme-fabrics: refactor queue ready check
blk-mq: remove blk_mq_tagset_iter
nvme: remove nvme_reinit_tagset
nvme-fc: fix nulling of queue data on reconnect
nvme-fc: remove reinit_request routine
blk-mq: don't time out requests again that are in the timeout handler
nvme-fc: change controllers first connect to use reconnect path
nvme: don't rely on the changed namespace list log
nvmet: free smart-log buffer after use
nvme-rdma: fix error flow during mapping request data
nvme: add bio remapping tracepoint
nvme: fix NULL pointer dereference in nvme_init_subsystem
blk-mq: reinit q->tag_set_list entry only after grace period
-rw-r--r-- | Documentation/block/biodoc.txt | 15 | ||||
-rw-r--r-- | block/blk-mq-tag.c | 29 | ||||
-rw-r--r-- | block/blk-mq.c | 8 | ||||
-rw-r--r-- | block/blk-tag.c | 22 | ||||
-rw-r--r-- | block/bsg.c | 22 | ||||
-rw-r--r-- | drivers/nvme/host/core.c | 48 | ||||
-rw-r--r-- | drivers/nvme/host/fabrics.c | 86 | ||||
-rw-r--r-- | drivers/nvme/host/fabrics.h | 14 | ||||
-rw-r--r-- | drivers/nvme/host/fc.c | 144 | ||||
-rw-r--r-- | drivers/nvme/host/multipath.c | 4 | ||||
-rw-r--r-- | drivers/nvme/host/nvme.h | 2 | ||||
-rw-r--r-- | drivers/nvme/host/rdma.c | 38 | ||||
-rw-r--r-- | drivers/nvme/target/admin-cmd.c | 4 | ||||
-rw-r--r-- | drivers/nvme/target/loop.c | 7 | ||||
-rw-r--r-- | include/linux/blk-mq.h | 2 | ||||
-rw-r--r-- | include/linux/blkdev.h | 4 |
16 files changed, 174 insertions, 275 deletions
diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt index 86927029a52d..207eca58efaa 100644 --- a/Documentation/block/biodoc.txt +++ b/Documentation/block/biodoc.txt | |||
@@ -752,18 +752,6 @@ completion of the request to the block layer. This means ending tag | |||
752 | operations before calling end_that_request_last()! For an example of a user | 752 | operations before calling end_that_request_last()! For an example of a user |
753 | of these helpers, see the IDE tagged command queueing support. | 753 | of these helpers, see the IDE tagged command queueing support. |
754 | 754 | ||
755 | Certain hardware conditions may dictate a need to invalidate the block tag | ||
756 | queue. For instance, on IDE any tagged request error needs to clear both | ||
757 | the hardware and software block queue and enable the driver to sanely restart | ||
758 | all the outstanding requests. There's a third helper to do that: | ||
759 | |||
760 | blk_queue_invalidate_tags(struct request_queue *q) | ||
761 | |||
762 | Clear the internal block tag queue and re-add all the pending requests | ||
763 | to the request queue. The driver will receive them again on the | ||
764 | next request_fn run, just like it did the first time it encountered | ||
765 | them. | ||
766 | |||
767 | 3.2.5.2 Tag info | 755 | 3.2.5.2 Tag info |
768 | 756 | ||
769 | Some block functions exist to query current tag status or to go from a | 757 | Some block functions exist to query current tag status or to go from a |
@@ -805,8 +793,7 @@ Internally, block manages tags in the blk_queue_tag structure: | |||
805 | Most of the above is simple and straight forward, however busy_list may need | 793 | Most of the above is simple and straight forward, however busy_list may need |
806 | a bit of explaining. Normally we don't care too much about request ordering, | 794 | a bit of explaining. Normally we don't care too much about request ordering, |
807 | but in the event of any barrier requests in the tag queue we need to ensure | 795 | but in the event of any barrier requests in the tag queue we need to ensure |
808 | that requests are restarted in the order they were queue. This may happen | 796 | that requests are restarted in the order they were queue. |
809 | if the driver needs to use blk_queue_invalidate_tags(). | ||
810 | 797 | ||
811 | 3.3 I/O Submission | 798 | 3.3 I/O Submission |
812 | 799 | ||
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c index 70356a2a11ab..09b2ee6694fb 100644 --- a/block/blk-mq-tag.c +++ b/block/blk-mq-tag.c | |||
@@ -311,35 +311,6 @@ void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, | |||
311 | } | 311 | } |
312 | EXPORT_SYMBOL(blk_mq_tagset_busy_iter); | 312 | EXPORT_SYMBOL(blk_mq_tagset_busy_iter); |
313 | 313 | ||
314 | int blk_mq_tagset_iter(struct blk_mq_tag_set *set, void *data, | ||
315 | int (fn)(void *, struct request *)) | ||
316 | { | ||
317 | int i, j, ret = 0; | ||
318 | |||
319 | if (WARN_ON_ONCE(!fn)) | ||
320 | goto out; | ||
321 | |||
322 | for (i = 0; i < set->nr_hw_queues; i++) { | ||
323 | struct blk_mq_tags *tags = set->tags[i]; | ||
324 | |||
325 | if (!tags) | ||
326 | continue; | ||
327 | |||
328 | for (j = 0; j < tags->nr_tags; j++) { | ||
329 | if (!tags->static_rqs[j]) | ||
330 | continue; | ||
331 | |||
332 | ret = fn(data, tags->static_rqs[j]); | ||
333 | if (ret) | ||
334 | goto out; | ||
335 | } | ||
336 | } | ||
337 | |||
338 | out: | ||
339 | return ret; | ||
340 | } | ||
341 | EXPORT_SYMBOL_GPL(blk_mq_tagset_iter); | ||
342 | |||
343 | void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn, | 314 | void blk_mq_queue_tag_busy_iter(struct request_queue *q, busy_iter_fn *fn, |
344 | void *priv) | 315 | void *priv) |
345 | { | 316 | { |
diff --git a/block/blk-mq.c b/block/blk-mq.c index e9da5e6a8526..70c65bb6c013 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c | |||
@@ -671,6 +671,7 @@ static void __blk_mq_requeue_request(struct request *rq) | |||
671 | 671 | ||
672 | if (blk_mq_request_started(rq)) { | 672 | if (blk_mq_request_started(rq)) { |
673 | WRITE_ONCE(rq->state, MQ_RQ_IDLE); | 673 | WRITE_ONCE(rq->state, MQ_RQ_IDLE); |
674 | rq->rq_flags &= ~RQF_TIMED_OUT; | ||
674 | if (q->dma_drain_size && blk_rq_bytes(rq)) | 675 | if (q->dma_drain_size && blk_rq_bytes(rq)) |
675 | rq->nr_phys_segments--; | 676 | rq->nr_phys_segments--; |
676 | } | 677 | } |
@@ -770,6 +771,7 @@ EXPORT_SYMBOL(blk_mq_tag_to_rq); | |||
770 | 771 | ||
771 | static void blk_mq_rq_timed_out(struct request *req, bool reserved) | 772 | static void blk_mq_rq_timed_out(struct request *req, bool reserved) |
772 | { | 773 | { |
774 | req->rq_flags |= RQF_TIMED_OUT; | ||
773 | if (req->q->mq_ops->timeout) { | 775 | if (req->q->mq_ops->timeout) { |
774 | enum blk_eh_timer_return ret; | 776 | enum blk_eh_timer_return ret; |
775 | 777 | ||
@@ -779,6 +781,7 @@ static void blk_mq_rq_timed_out(struct request *req, bool reserved) | |||
779 | WARN_ON_ONCE(ret != BLK_EH_RESET_TIMER); | 781 | WARN_ON_ONCE(ret != BLK_EH_RESET_TIMER); |
780 | } | 782 | } |
781 | 783 | ||
784 | req->rq_flags &= ~RQF_TIMED_OUT; | ||
782 | blk_add_timer(req); | 785 | blk_add_timer(req); |
783 | } | 786 | } |
784 | 787 | ||
@@ -788,6 +791,8 @@ static bool blk_mq_req_expired(struct request *rq, unsigned long *next) | |||
788 | 791 | ||
789 | if (blk_mq_rq_state(rq) != MQ_RQ_IN_FLIGHT) | 792 | if (blk_mq_rq_state(rq) != MQ_RQ_IN_FLIGHT) |
790 | return false; | 793 | return false; |
794 | if (rq->rq_flags & RQF_TIMED_OUT) | ||
795 | return false; | ||
791 | 796 | ||
792 | deadline = blk_rq_deadline(rq); | 797 | deadline = blk_rq_deadline(rq); |
793 | if (time_after_eq(jiffies, deadline)) | 798 | if (time_after_eq(jiffies, deadline)) |
@@ -2349,7 +2354,6 @@ static void blk_mq_del_queue_tag_set(struct request_queue *q) | |||
2349 | 2354 | ||
2350 | mutex_lock(&set->tag_list_lock); | 2355 | mutex_lock(&set->tag_list_lock); |
2351 | list_del_rcu(&q->tag_set_list); | 2356 | list_del_rcu(&q->tag_set_list); |
2352 | INIT_LIST_HEAD(&q->tag_set_list); | ||
2353 | if (list_is_singular(&set->tag_list)) { | 2357 | if (list_is_singular(&set->tag_list)) { |
2354 | /* just transitioned to unshared */ | 2358 | /* just transitioned to unshared */ |
2355 | set->flags &= ~BLK_MQ_F_TAG_SHARED; | 2359 | set->flags &= ~BLK_MQ_F_TAG_SHARED; |
@@ -2357,8 +2361,8 @@ static void blk_mq_del_queue_tag_set(struct request_queue *q) | |||
2357 | blk_mq_update_tag_set_depth(set, false); | 2361 | blk_mq_update_tag_set_depth(set, false); |
2358 | } | 2362 | } |
2359 | mutex_unlock(&set->tag_list_lock); | 2363 | mutex_unlock(&set->tag_list_lock); |
2360 | |||
2361 | synchronize_rcu(); | 2364 | synchronize_rcu(); |
2365 | INIT_LIST_HEAD(&q->tag_set_list); | ||
2362 | } | 2366 | } |
2363 | 2367 | ||
2364 | static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set, | 2368 | static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set, |
diff --git a/block/blk-tag.c b/block/blk-tag.c index 24b20d86bcbc..fbc153aef166 100644 --- a/block/blk-tag.c +++ b/block/blk-tag.c | |||
@@ -188,7 +188,6 @@ int blk_queue_init_tags(struct request_queue *q, int depth, | |||
188 | */ | 188 | */ |
189 | q->queue_tags = tags; | 189 | q->queue_tags = tags; |
190 | queue_flag_set_unlocked(QUEUE_FLAG_QUEUED, q); | 190 | queue_flag_set_unlocked(QUEUE_FLAG_QUEUED, q); |
191 | INIT_LIST_HEAD(&q->tag_busy_list); | ||
192 | return 0; | 191 | return 0; |
193 | } | 192 | } |
194 | EXPORT_SYMBOL(blk_queue_init_tags); | 193 | EXPORT_SYMBOL(blk_queue_init_tags); |
@@ -374,27 +373,6 @@ int blk_queue_start_tag(struct request_queue *q, struct request *rq) | |||
374 | rq->tag = tag; | 373 | rq->tag = tag; |
375 | bqt->tag_index[tag] = rq; | 374 | bqt->tag_index[tag] = rq; |
376 | blk_start_request(rq); | 375 | blk_start_request(rq); |
377 | list_add(&rq->queuelist, &q->tag_busy_list); | ||
378 | return 0; | 376 | return 0; |
379 | } | 377 | } |
380 | EXPORT_SYMBOL(blk_queue_start_tag); | 378 | EXPORT_SYMBOL(blk_queue_start_tag); |
381 | |||
382 | /** | ||
383 | * blk_queue_invalidate_tags - invalidate all pending tags | ||
384 | * @q: the request queue for the device | ||
385 | * | ||
386 | * Description: | ||
387 | * Hardware conditions may dictate a need to stop all pending requests. | ||
388 | * In this case, we will safely clear the block side of the tag queue and | ||
389 | * readd all requests to the request queue in the right order. | ||
390 | **/ | ||
391 | void blk_queue_invalidate_tags(struct request_queue *q) | ||
392 | { | ||
393 | struct list_head *tmp, *n; | ||
394 | |||
395 | lockdep_assert_held(q->queue_lock); | ||
396 | |||
397 | list_for_each_safe(tmp, n, &q->tag_busy_list) | ||
398 | blk_requeue_request(q, list_entry_rq(tmp)); | ||
399 | } | ||
400 | EXPORT_SYMBOL(blk_queue_invalidate_tags); | ||
diff --git a/block/bsg.c b/block/bsg.c index 132e657e2d91..66602c489956 100644 --- a/block/bsg.c +++ b/block/bsg.c | |||
@@ -693,6 +693,8 @@ static struct bsg_device *bsg_add_device(struct inode *inode, | |||
693 | struct bsg_device *bd; | 693 | struct bsg_device *bd; |
694 | unsigned char buf[32]; | 694 | unsigned char buf[32]; |
695 | 695 | ||
696 | lockdep_assert_held(&bsg_mutex); | ||
697 | |||
696 | if (!blk_get_queue(rq)) | 698 | if (!blk_get_queue(rq)) |
697 | return ERR_PTR(-ENXIO); | 699 | return ERR_PTR(-ENXIO); |
698 | 700 | ||
@@ -707,14 +709,12 @@ static struct bsg_device *bsg_add_device(struct inode *inode, | |||
707 | bsg_set_block(bd, file); | 709 | bsg_set_block(bd, file); |
708 | 710 | ||
709 | atomic_set(&bd->ref_count, 1); | 711 | atomic_set(&bd->ref_count, 1); |
710 | mutex_lock(&bsg_mutex); | ||
711 | hlist_add_head(&bd->dev_list, bsg_dev_idx_hash(iminor(inode))); | 712 | hlist_add_head(&bd->dev_list, bsg_dev_idx_hash(iminor(inode))); |
712 | 713 | ||
713 | strncpy(bd->name, dev_name(rq->bsg_dev.class_dev), sizeof(bd->name) - 1); | 714 | strncpy(bd->name, dev_name(rq->bsg_dev.class_dev), sizeof(bd->name) - 1); |
714 | bsg_dbg(bd, "bound to <%s>, max queue %d\n", | 715 | bsg_dbg(bd, "bound to <%s>, max queue %d\n", |
715 | format_dev_t(buf, inode->i_rdev), bd->max_queue); | 716 | format_dev_t(buf, inode->i_rdev), bd->max_queue); |
716 | 717 | ||
717 | mutex_unlock(&bsg_mutex); | ||
718 | return bd; | 718 | return bd; |
719 | } | 719 | } |
720 | 720 | ||
@@ -722,7 +722,7 @@ static struct bsg_device *__bsg_get_device(int minor, struct request_queue *q) | |||
722 | { | 722 | { |
723 | struct bsg_device *bd; | 723 | struct bsg_device *bd; |
724 | 724 | ||
725 | mutex_lock(&bsg_mutex); | 725 | lockdep_assert_held(&bsg_mutex); |
726 | 726 | ||
727 | hlist_for_each_entry(bd, bsg_dev_idx_hash(minor), dev_list) { | 727 | hlist_for_each_entry(bd, bsg_dev_idx_hash(minor), dev_list) { |
728 | if (bd->queue == q) { | 728 | if (bd->queue == q) { |
@@ -732,7 +732,6 @@ static struct bsg_device *__bsg_get_device(int minor, struct request_queue *q) | |||
732 | } | 732 | } |
733 | bd = NULL; | 733 | bd = NULL; |
734 | found: | 734 | found: |
735 | mutex_unlock(&bsg_mutex); | ||
736 | return bd; | 735 | return bd; |
737 | } | 736 | } |
738 | 737 | ||
@@ -746,17 +745,18 @@ static struct bsg_device *bsg_get_device(struct inode *inode, struct file *file) | |||
746 | */ | 745 | */ |
747 | mutex_lock(&bsg_mutex); | 746 | mutex_lock(&bsg_mutex); |
748 | bcd = idr_find(&bsg_minor_idr, iminor(inode)); | 747 | bcd = idr_find(&bsg_minor_idr, iminor(inode)); |
749 | mutex_unlock(&bsg_mutex); | ||
750 | 748 | ||
751 | if (!bcd) | 749 | if (!bcd) { |
752 | return ERR_PTR(-ENODEV); | 750 | bd = ERR_PTR(-ENODEV); |
751 | goto out_unlock; | ||
752 | } | ||
753 | 753 | ||
754 | bd = __bsg_get_device(iminor(inode), bcd->queue); | 754 | bd = __bsg_get_device(iminor(inode), bcd->queue); |
755 | if (bd) | 755 | if (!bd) |
756 | return bd; | 756 | bd = bsg_add_device(inode, bcd->queue, file); |
757 | |||
758 | bd = bsg_add_device(inode, bcd->queue, file); | ||
759 | 757 | ||
758 | out_unlock: | ||
759 | mutex_unlock(&bsg_mutex); | ||
760 | return bd; | 760 | return bd; |
761 | } | 761 | } |
762 | 762 | ||
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index effb1309682e..21710a7460c8 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c | |||
@@ -2208,7 +2208,7 @@ static int nvme_init_subsystem(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id) | |||
2208 | * Verify that the subsystem actually supports multiple | 2208 | * Verify that the subsystem actually supports multiple |
2209 | * controllers, else bail out. | 2209 | * controllers, else bail out. |
2210 | */ | 2210 | */ |
2211 | if (!ctrl->opts->discovery_nqn && | 2211 | if (!(ctrl->opts && ctrl->opts->discovery_nqn) && |
2212 | nvme_active_ctrls(found) && !(id->cmic & (1 << 1))) { | 2212 | nvme_active_ctrls(found) && !(id->cmic & (1 << 1))) { |
2213 | dev_err(ctrl->device, | 2213 | dev_err(ctrl->device, |
2214 | "ignoring ctrl due to duplicate subnqn (%s).\n", | 2214 | "ignoring ctrl due to duplicate subnqn (%s).\n", |
@@ -3197,40 +3197,28 @@ static void nvme_scan_ns_sequential(struct nvme_ctrl *ctrl, unsigned nn) | |||
3197 | nvme_remove_invalid_namespaces(ctrl, nn); | 3197 | nvme_remove_invalid_namespaces(ctrl, nn); |
3198 | } | 3198 | } |
3199 | 3199 | ||
3200 | static bool nvme_scan_changed_ns_log(struct nvme_ctrl *ctrl) | 3200 | static void nvme_clear_changed_ns_log(struct nvme_ctrl *ctrl) |
3201 | { | 3201 | { |
3202 | size_t log_size = NVME_MAX_CHANGED_NAMESPACES * sizeof(__le32); | 3202 | size_t log_size = NVME_MAX_CHANGED_NAMESPACES * sizeof(__le32); |
3203 | __le32 *log; | 3203 | __le32 *log; |
3204 | int error, i; | 3204 | int error; |
3205 | bool ret = false; | ||
3206 | 3205 | ||
3207 | log = kzalloc(log_size, GFP_KERNEL); | 3206 | log = kzalloc(log_size, GFP_KERNEL); |
3208 | if (!log) | 3207 | if (!log) |
3209 | return false; | 3208 | return; |
3210 | 3209 | ||
3210 | /* | ||
3211 | * We need to read the log to clear the AEN, but we don't want to rely | ||
3212 | * on it for the changed namespace information as userspace could have | ||
3213 | * raced with us in reading the log page, which could cause us to miss | ||
3214 | * updates. | ||
3215 | */ | ||
3211 | error = nvme_get_log(ctrl, NVME_LOG_CHANGED_NS, log, log_size); | 3216 | error = nvme_get_log(ctrl, NVME_LOG_CHANGED_NS, log, log_size); |
3212 | if (error) { | 3217 | if (error) |
3213 | dev_warn(ctrl->device, | 3218 | dev_warn(ctrl->device, |
3214 | "reading changed ns log failed: %d\n", error); | 3219 | "reading changed ns log failed: %d\n", error); |
3215 | goto out_free_log; | ||
3216 | } | ||
3217 | |||
3218 | if (log[0] == cpu_to_le32(0xffffffff)) | ||
3219 | goto out_free_log; | ||
3220 | |||
3221 | for (i = 0; i < NVME_MAX_CHANGED_NAMESPACES; i++) { | ||
3222 | u32 nsid = le32_to_cpu(log[i]); | ||
3223 | 3220 | ||
3224 | if (nsid == 0) | ||
3225 | break; | ||
3226 | dev_info(ctrl->device, "rescanning namespace %d.\n", nsid); | ||
3227 | nvme_validate_ns(ctrl, nsid); | ||
3228 | } | ||
3229 | ret = true; | ||
3230 | |||
3231 | out_free_log: | ||
3232 | kfree(log); | 3221 | kfree(log); |
3233 | return ret; | ||
3234 | } | 3222 | } |
3235 | 3223 | ||
3236 | static void nvme_scan_work(struct work_struct *work) | 3224 | static void nvme_scan_work(struct work_struct *work) |
@@ -3246,9 +3234,8 @@ static void nvme_scan_work(struct work_struct *work) | |||
3246 | WARN_ON_ONCE(!ctrl->tagset); | 3234 | WARN_ON_ONCE(!ctrl->tagset); |
3247 | 3235 | ||
3248 | if (test_and_clear_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events)) { | 3236 | if (test_and_clear_bit(NVME_AER_NOTICE_NS_CHANGED, &ctrl->events)) { |
3249 | if (nvme_scan_changed_ns_log(ctrl)) | ||
3250 | goto out_sort_namespaces; | ||
3251 | dev_info(ctrl->device, "rescanning namespaces.\n"); | 3237 | dev_info(ctrl->device, "rescanning namespaces.\n"); |
3238 | nvme_clear_changed_ns_log(ctrl); | ||
3252 | } | 3239 | } |
3253 | 3240 | ||
3254 | if (nvme_identify_ctrl(ctrl, &id)) | 3241 | if (nvme_identify_ctrl(ctrl, &id)) |
@@ -3263,7 +3250,6 @@ static void nvme_scan_work(struct work_struct *work) | |||
3263 | nvme_scan_ns_sequential(ctrl, nn); | 3250 | nvme_scan_ns_sequential(ctrl, nn); |
3264 | out_free_id: | 3251 | out_free_id: |
3265 | kfree(id); | 3252 | kfree(id); |
3266 | out_sort_namespaces: | ||
3267 | down_write(&ctrl->namespaces_rwsem); | 3253 | down_write(&ctrl->namespaces_rwsem); |
3268 | list_sort(NULL, &ctrl->namespaces, ns_cmp); | 3254 | list_sort(NULL, &ctrl->namespaces, ns_cmp); |
3269 | up_write(&ctrl->namespaces_rwsem); | 3255 | up_write(&ctrl->namespaces_rwsem); |
@@ -3641,16 +3627,6 @@ void nvme_start_queues(struct nvme_ctrl *ctrl) | |||
3641 | } | 3627 | } |
3642 | EXPORT_SYMBOL_GPL(nvme_start_queues); | 3628 | EXPORT_SYMBOL_GPL(nvme_start_queues); |
3643 | 3629 | ||
3644 | int nvme_reinit_tagset(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set) | ||
3645 | { | ||
3646 | if (!ctrl->ops->reinit_request) | ||
3647 | return 0; | ||
3648 | |||
3649 | return blk_mq_tagset_iter(set, set->driver_data, | ||
3650 | ctrl->ops->reinit_request); | ||
3651 | } | ||
3652 | EXPORT_SYMBOL_GPL(nvme_reinit_tagset); | ||
3653 | |||
3654 | int __init nvme_core_init(void) | 3630 | int __init nvme_core_init(void) |
3655 | { | 3631 | { |
3656 | int result = -ENOMEM; | 3632 | int result = -ENOMEM; |
diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index fa32c1216409..903eb4545e26 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c | |||
@@ -536,67 +536,55 @@ static struct nvmf_transport_ops *nvmf_lookup_transport( | |||
536 | return NULL; | 536 | return NULL; |
537 | } | 537 | } |
538 | 538 | ||
539 | blk_status_t nvmf_check_if_ready(struct nvme_ctrl *ctrl, struct request *rq, | 539 | /* |
540 | bool queue_live, bool is_connected) | 540 | * For something we're not in a state to send to the device the default action |
541 | * is to busy it and retry it after the controller state is recovered. However, | ||
542 | * anything marked for failfast or nvme multipath is immediately failed. | ||
543 | * | ||
544 | * Note: commands used to initialize the controller will be marked for failfast. | ||
545 | * Note: nvme cli/ioctl commands are marked for failfast. | ||
546 | */ | ||
547 | blk_status_t nvmf_fail_nonready_command(struct request *rq) | ||
541 | { | 548 | { |
542 | struct nvme_command *cmd = nvme_req(rq)->cmd; | 549 | if (!blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH)) |
550 | return BLK_STS_RESOURCE; | ||
551 | nvme_req(rq)->status = NVME_SC_ABORT_REQ; | ||
552 | return BLK_STS_IOERR; | ||
553 | } | ||
554 | EXPORT_SYMBOL_GPL(nvmf_fail_nonready_command); | ||
543 | 555 | ||
544 | if (likely(ctrl->state == NVME_CTRL_LIVE && is_connected)) | 556 | bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq, |
545 | return BLK_STS_OK; | 557 | bool queue_live) |
558 | { | ||
559 | struct nvme_request *req = nvme_req(rq); | ||
560 | |||
561 | /* | ||
562 | * If we are in some state of setup or teardown only allow | ||
563 | * internally generated commands. | ||
564 | */ | ||
565 | if (!blk_rq_is_passthrough(rq) || (req->flags & NVME_REQ_USERCMD)) | ||
566 | return false; | ||
546 | 567 | ||
568 | /* | ||
569 | * Only allow commands on a live queue, except for the connect command, | ||
570 | * which is require to set the queue live in the appropinquate states. | ||
571 | */ | ||
547 | switch (ctrl->state) { | 572 | switch (ctrl->state) { |
548 | case NVME_CTRL_NEW: | 573 | case NVME_CTRL_NEW: |
549 | case NVME_CTRL_CONNECTING: | 574 | case NVME_CTRL_CONNECTING: |
550 | case NVME_CTRL_DELETING: | 575 | if (req->cmd->common.opcode == nvme_fabrics_command && |
551 | /* | 576 | req->cmd->fabrics.fctype == nvme_fabrics_type_connect) |
552 | * This is the case of starting a new or deleting an association | 577 | return true; |
553 | * but connectivity was lost before it was fully created or torn | ||
554 | * down. We need to error the commands used to initialize the | ||
555 | * controller so the reconnect can go into a retry attempt. The | ||
556 | * commands should all be marked REQ_FAILFAST_DRIVER, which will | ||
557 | * hit the reject path below. Anything else will be queued while | ||
558 | * the state settles. | ||
559 | */ | ||
560 | if (!is_connected) | ||
561 | break; | ||
562 | |||
563 | /* | ||
564 | * If queue is live, allow only commands that are internally | ||
565 | * generated pass through. These are commands on the admin | ||
566 | * queue to initialize the controller. This will reject any | ||
567 | * ioctl admin cmds received while initializing. | ||
568 | */ | ||
569 | if (queue_live && !(nvme_req(rq)->flags & NVME_REQ_USERCMD)) | ||
570 | return BLK_STS_OK; | ||
571 | |||
572 | /* | ||
573 | * If the queue is not live, allow only a connect command. This | ||
574 | * will reject any ioctl admin cmd as well as initialization | ||
575 | * commands if the controller reverted the queue to non-live. | ||
576 | */ | ||
577 | if (!queue_live && blk_rq_is_passthrough(rq) && | ||
578 | cmd->common.opcode == nvme_fabrics_command && | ||
579 | cmd->fabrics.fctype == nvme_fabrics_type_connect) | ||
580 | return BLK_STS_OK; | ||
581 | break; | 578 | break; |
582 | default: | 579 | default: |
583 | break; | 580 | break; |
581 | case NVME_CTRL_DEAD: | ||
582 | return false; | ||
584 | } | 583 | } |
585 | 584 | ||
586 | /* | 585 | return queue_live; |
587 | * Any other new io is something we're not in a state to send to the | ||
588 | * device. Default action is to busy it and retry it after the | ||
589 | * controller state is recovered. However, anything marked for failfast | ||
590 | * or nvme multipath is immediately failed. Note: commands used to | ||
591 | * initialize the controller will be marked for failfast. | ||
592 | * Note: nvme cli/ioctl commands are marked for failfast. | ||
593 | */ | ||
594 | if (!blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH)) | ||
595 | return BLK_STS_RESOURCE; | ||
596 | nvme_req(rq)->status = NVME_SC_ABORT_REQ; | ||
597 | return BLK_STS_IOERR; | ||
598 | } | 586 | } |
599 | EXPORT_SYMBOL_GPL(nvmf_check_if_ready); | 587 | EXPORT_SYMBOL_GPL(__nvmf_check_ready); |
600 | 588 | ||
601 | static const match_table_t opt_tokens = { | 589 | static const match_table_t opt_tokens = { |
602 | { NVMF_OPT_TRANSPORT, "transport=%s" }, | 590 | { NVMF_OPT_TRANSPORT, "transport=%s" }, |
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index 7491a0bbf711..e1818a27aa2d 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h | |||
@@ -162,7 +162,17 @@ void nvmf_unregister_transport(struct nvmf_transport_ops *ops); | |||
162 | void nvmf_free_options(struct nvmf_ctrl_options *opts); | 162 | void nvmf_free_options(struct nvmf_ctrl_options *opts); |
163 | int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size); | 163 | int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size); |
164 | bool nvmf_should_reconnect(struct nvme_ctrl *ctrl); | 164 | bool nvmf_should_reconnect(struct nvme_ctrl *ctrl); |
165 | blk_status_t nvmf_check_if_ready(struct nvme_ctrl *ctrl, | 165 | blk_status_t nvmf_fail_nonready_command(struct request *rq); |
166 | struct request *rq, bool queue_live, bool is_connected); | 166 | bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq, |
167 | bool queue_live); | ||
168 | |||
169 | static inline bool nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq, | ||
170 | bool queue_live) | ||
171 | { | ||
172 | if (likely(ctrl->state == NVME_CTRL_LIVE || | ||
173 | ctrl->state == NVME_CTRL_ADMIN_ONLY)) | ||
174 | return true; | ||
175 | return __nvmf_check_ready(ctrl, rq, queue_live); | ||
176 | } | ||
167 | 177 | ||
168 | #endif /* _NVME_FABRICS_H */ | 178 | #endif /* _NVME_FABRICS_H */ |
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 0bad65803271..b528a2f5826c 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c | |||
@@ -142,6 +142,7 @@ struct nvme_fc_ctrl { | |||
142 | struct nvme_fc_rport *rport; | 142 | struct nvme_fc_rport *rport; |
143 | u32 cnum; | 143 | u32 cnum; |
144 | 144 | ||
145 | bool ioq_live; | ||
145 | bool assoc_active; | 146 | bool assoc_active; |
146 | u64 association_id; | 147 | u64 association_id; |
147 | 148 | ||
@@ -1470,21 +1471,6 @@ nvme_fc_xmt_disconnect_assoc(struct nvme_fc_ctrl *ctrl) | |||
1470 | 1471 | ||
1471 | static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg); | 1472 | static void nvme_fc_error_recovery(struct nvme_fc_ctrl *ctrl, char *errmsg); |
1472 | 1473 | ||
1473 | static int | ||
1474 | nvme_fc_reinit_request(void *data, struct request *rq) | ||
1475 | { | ||
1476 | struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(rq); | ||
1477 | struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; | ||
1478 | |||
1479 | memset(cmdiu, 0, sizeof(*cmdiu)); | ||
1480 | cmdiu->scsi_id = NVME_CMD_SCSI_ID; | ||
1481 | cmdiu->fc_id = NVME_CMD_FC_ID; | ||
1482 | cmdiu->iu_len = cpu_to_be16(sizeof(*cmdiu) / sizeof(u32)); | ||
1483 | memset(&op->rsp_iu, 0, sizeof(op->rsp_iu)); | ||
1484 | |||
1485 | return 0; | ||
1486 | } | ||
1487 | |||
1488 | static void | 1474 | static void |
1489 | __nvme_fc_exit_request(struct nvme_fc_ctrl *ctrl, | 1475 | __nvme_fc_exit_request(struct nvme_fc_ctrl *ctrl, |
1490 | struct nvme_fc_fcp_op *op) | 1476 | struct nvme_fc_fcp_op *op) |
@@ -1893,6 +1879,7 @@ nvme_fc_free_queue(struct nvme_fc_queue *queue) | |||
1893 | */ | 1879 | */ |
1894 | 1880 | ||
1895 | queue->connection_id = 0; | 1881 | queue->connection_id = 0; |
1882 | atomic_set(&queue->csn, 1); | ||
1896 | } | 1883 | } |
1897 | 1884 | ||
1898 | static void | 1885 | static void |
@@ -2279,14 +2266,13 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx, | |||
2279 | struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; | 2266 | struct nvme_fc_cmd_iu *cmdiu = &op->cmd_iu; |
2280 | struct nvme_command *sqe = &cmdiu->sqe; | 2267 | struct nvme_command *sqe = &cmdiu->sqe; |
2281 | enum nvmefc_fcp_datadir io_dir; | 2268 | enum nvmefc_fcp_datadir io_dir; |
2269 | bool queue_ready = test_bit(NVME_FC_Q_LIVE, &queue->flags); | ||
2282 | u32 data_len; | 2270 | u32 data_len; |
2283 | blk_status_t ret; | 2271 | blk_status_t ret; |
2284 | 2272 | ||
2285 | ret = nvmf_check_if_ready(&queue->ctrl->ctrl, rq, | 2273 | if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE || |
2286 | test_bit(NVME_FC_Q_LIVE, &queue->flags), | 2274 | !nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready)) |
2287 | ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE); | 2275 | return nvmf_fail_nonready_command(rq); |
2288 | if (unlikely(ret)) | ||
2289 | return ret; | ||
2290 | 2276 | ||
2291 | ret = nvme_setup_cmd(ns, rq, sqe); | 2277 | ret = nvme_setup_cmd(ns, rq, sqe); |
2292 | if (ret) | 2278 | if (ret) |
@@ -2463,6 +2449,8 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl) | |||
2463 | if (ret) | 2449 | if (ret) |
2464 | goto out_delete_hw_queues; | 2450 | goto out_delete_hw_queues; |
2465 | 2451 | ||
2452 | ctrl->ioq_live = true; | ||
2453 | |||
2466 | return 0; | 2454 | return 0; |
2467 | 2455 | ||
2468 | out_delete_hw_queues: | 2456 | out_delete_hw_queues: |
@@ -2480,7 +2468,7 @@ out_free_tag_set: | |||
2480 | } | 2468 | } |
2481 | 2469 | ||
2482 | static int | 2470 | static int |
2483 | nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl) | 2471 | nvme_fc_recreate_io_queues(struct nvme_fc_ctrl *ctrl) |
2484 | { | 2472 | { |
2485 | struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; | 2473 | struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; |
2486 | unsigned int nr_io_queues; | 2474 | unsigned int nr_io_queues; |
@@ -2500,12 +2488,6 @@ nvme_fc_reinit_io_queues(struct nvme_fc_ctrl *ctrl) | |||
2500 | if (ctrl->ctrl.queue_count == 1) | 2488 | if (ctrl->ctrl.queue_count == 1) |
2501 | return 0; | 2489 | return 0; |
2502 | 2490 | ||
2503 | nvme_fc_init_io_queues(ctrl); | ||
2504 | |||
2505 | ret = nvme_reinit_tagset(&ctrl->ctrl, ctrl->ctrl.tagset); | ||
2506 | if (ret) | ||
2507 | goto out_free_io_queues; | ||
2508 | |||
2509 | ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1); | 2491 | ret = nvme_fc_create_hw_io_queues(ctrl, ctrl->ctrl.sqsize + 1); |
2510 | if (ret) | 2492 | if (ret) |
2511 | goto out_free_io_queues; | 2493 | goto out_free_io_queues; |
@@ -2603,8 +2585,6 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) | |||
2603 | * Create the admin queue | 2585 | * Create the admin queue |
2604 | */ | 2586 | */ |
2605 | 2587 | ||
2606 | nvme_fc_init_queue(ctrl, 0); | ||
2607 | |||
2608 | ret = __nvme_fc_create_hw_queue(ctrl, &ctrl->queues[0], 0, | 2588 | ret = __nvme_fc_create_hw_queue(ctrl, &ctrl->queues[0], 0, |
2609 | NVME_AQ_DEPTH); | 2589 | NVME_AQ_DEPTH); |
2610 | if (ret) | 2590 | if (ret) |
@@ -2615,8 +2595,7 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) | |||
2615 | if (ret) | 2595 | if (ret) |
2616 | goto out_delete_hw_queue; | 2596 | goto out_delete_hw_queue; |
2617 | 2597 | ||
2618 | if (ctrl->ctrl.state != NVME_CTRL_NEW) | 2598 | blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); |
2619 | blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); | ||
2620 | 2599 | ||
2621 | ret = nvmf_connect_admin_queue(&ctrl->ctrl); | 2600 | ret = nvmf_connect_admin_queue(&ctrl->ctrl); |
2622 | if (ret) | 2601 | if (ret) |
@@ -2689,10 +2668,10 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) | |||
2689 | */ | 2668 | */ |
2690 | 2669 | ||
2691 | if (ctrl->ctrl.queue_count > 1) { | 2670 | if (ctrl->ctrl.queue_count > 1) { |
2692 | if (ctrl->ctrl.state == NVME_CTRL_NEW) | 2671 | if (!ctrl->ioq_live) |
2693 | ret = nvme_fc_create_io_queues(ctrl); | 2672 | ret = nvme_fc_create_io_queues(ctrl); |
2694 | else | 2673 | else |
2695 | ret = nvme_fc_reinit_io_queues(ctrl); | 2674 | ret = nvme_fc_recreate_io_queues(ctrl); |
2696 | if (ret) | 2675 | if (ret) |
2697 | goto out_term_aen_ops; | 2676 | goto out_term_aen_ops; |
2698 | } | 2677 | } |
@@ -2776,8 +2755,7 @@ nvme_fc_delete_association(struct nvme_fc_ctrl *ctrl) | |||
2776 | * use blk_mq_tagset_busy_itr() and the transport routine to | 2755 | * use blk_mq_tagset_busy_itr() and the transport routine to |
2777 | * terminate the exchanges. | 2756 | * terminate the exchanges. |
2778 | */ | 2757 | */ |
2779 | if (ctrl->ctrl.state != NVME_CTRL_NEW) | 2758 | blk_mq_quiesce_queue(ctrl->ctrl.admin_q); |
2780 | blk_mq_quiesce_queue(ctrl->ctrl.admin_q); | ||
2781 | blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, | 2759 | blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, |
2782 | nvme_fc_terminate_exchange, &ctrl->ctrl); | 2760 | nvme_fc_terminate_exchange, &ctrl->ctrl); |
2783 | 2761 | ||
@@ -2917,7 +2895,6 @@ static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = { | |||
2917 | .submit_async_event = nvme_fc_submit_async_event, | 2895 | .submit_async_event = nvme_fc_submit_async_event, |
2918 | .delete_ctrl = nvme_fc_delete_ctrl, | 2896 | .delete_ctrl = nvme_fc_delete_ctrl, |
2919 | .get_address = nvmf_get_address, | 2897 | .get_address = nvmf_get_address, |
2920 | .reinit_request = nvme_fc_reinit_request, | ||
2921 | }; | 2898 | }; |
2922 | 2899 | ||
2923 | static void | 2900 | static void |
@@ -2934,7 +2911,7 @@ nvme_fc_connect_ctrl_work(struct work_struct *work) | |||
2934 | nvme_fc_reconnect_or_delete(ctrl, ret); | 2911 | nvme_fc_reconnect_or_delete(ctrl, ret); |
2935 | else | 2912 | else |
2936 | dev_info(ctrl->ctrl.device, | 2913 | dev_info(ctrl->ctrl.device, |
2937 | "NVME-FC{%d}: controller reconnect complete\n", | 2914 | "NVME-FC{%d}: controller connect complete\n", |
2938 | ctrl->cnum); | 2915 | ctrl->cnum); |
2939 | } | 2916 | } |
2940 | 2917 | ||
@@ -2982,7 +2959,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, | |||
2982 | { | 2959 | { |
2983 | struct nvme_fc_ctrl *ctrl; | 2960 | struct nvme_fc_ctrl *ctrl; |
2984 | unsigned long flags; | 2961 | unsigned long flags; |
2985 | int ret, idx, retry; | 2962 | int ret, idx; |
2986 | 2963 | ||
2987 | if (!(rport->remoteport.port_role & | 2964 | if (!(rport->remoteport.port_role & |
2988 | (FC_PORT_ROLE_NVME_DISCOVERY | FC_PORT_ROLE_NVME_TARGET))) { | 2965 | (FC_PORT_ROLE_NVME_DISCOVERY | FC_PORT_ROLE_NVME_TARGET))) { |
@@ -3009,11 +2986,13 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, | |||
3009 | } | 2986 | } |
3010 | 2987 | ||
3011 | ctrl->ctrl.opts = opts; | 2988 | ctrl->ctrl.opts = opts; |
2989 | ctrl->ctrl.nr_reconnects = 0; | ||
3012 | INIT_LIST_HEAD(&ctrl->ctrl_list); | 2990 | INIT_LIST_HEAD(&ctrl->ctrl_list); |
3013 | ctrl->lport = lport; | 2991 | ctrl->lport = lport; |
3014 | ctrl->rport = rport; | 2992 | ctrl->rport = rport; |
3015 | ctrl->dev = lport->dev; | 2993 | ctrl->dev = lport->dev; |
3016 | ctrl->cnum = idx; | 2994 | ctrl->cnum = idx; |
2995 | ctrl->ioq_live = false; | ||
3017 | ctrl->assoc_active = false; | 2996 | ctrl->assoc_active = false; |
3018 | init_waitqueue_head(&ctrl->ioabort_wait); | 2997 | init_waitqueue_head(&ctrl->ioabort_wait); |
3019 | 2998 | ||
@@ -3032,6 +3011,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, | |||
3032 | 3011 | ||
3033 | ctrl->ctrl.sqsize = opts->queue_size - 1; | 3012 | ctrl->ctrl.sqsize = opts->queue_size - 1; |
3034 | ctrl->ctrl.kato = opts->kato; | 3013 | ctrl->ctrl.kato = opts->kato; |
3014 | ctrl->ctrl.cntlid = 0xffff; | ||
3035 | 3015 | ||
3036 | ret = -ENOMEM; | 3016 | ret = -ENOMEM; |
3037 | ctrl->queues = kcalloc(ctrl->ctrl.queue_count, | 3017 | ctrl->queues = kcalloc(ctrl->ctrl.queue_count, |
@@ -3039,6 +3019,8 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, | |||
3039 | if (!ctrl->queues) | 3019 | if (!ctrl->queues) |
3040 | goto out_free_ida; | 3020 | goto out_free_ida; |
3041 | 3021 | ||
3022 | nvme_fc_init_queue(ctrl, 0); | ||
3023 | |||
3042 | memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set)); | 3024 | memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set)); |
3043 | ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops; | 3025 | ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops; |
3044 | ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH; | 3026 | ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH; |
@@ -3081,62 +3063,24 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, | |||
3081 | list_add_tail(&ctrl->ctrl_list, &rport->ctrl_list); | 3063 | list_add_tail(&ctrl->ctrl_list, &rport->ctrl_list); |
3082 | spin_unlock_irqrestore(&rport->lock, flags); | 3064 | spin_unlock_irqrestore(&rport->lock, flags); |
3083 | 3065 | ||
3084 | /* | 3066 | if (!nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_RESETTING) || |
3085 | * It's possible that transactions used to create the association | 3067 | !nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_CONNECTING)) { |
3086 | * may fail. Examples: CreateAssociation LS or CreateIOConnection | ||
3087 | * LS gets dropped/corrupted/fails; or a frame gets dropped or a | ||
3088 | * command times out for one of the actions to init the controller | ||
3089 | * (Connect, Get/Set_Property, Set_Features, etc). Many of these | ||
3090 | * transport errors (frame drop, LS failure) inherently must kill | ||
3091 | * the association. The transport is coded so that any command used | ||
3092 | * to create the association (prior to a LIVE state transition | ||
3093 | * while NEW or CONNECTING) will fail if it completes in error or | ||
3094 | * times out. | ||
3095 | * | ||
3096 | * As such: as the connect request was mostly likely due to a | ||
3097 | * udev event that discovered the remote port, meaning there is | ||
3098 | * not an admin or script there to restart if the connect | ||
3099 | * request fails, retry the initial connection creation up to | ||
3100 | * three times before giving up and declaring failure. | ||
3101 | */ | ||
3102 | for (retry = 0; retry < 3; retry++) { | ||
3103 | ret = nvme_fc_create_association(ctrl); | ||
3104 | if (!ret) | ||
3105 | break; | ||
3106 | } | ||
3107 | |||
3108 | if (ret) { | ||
3109 | nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING); | ||
3110 | cancel_work_sync(&ctrl->ctrl.reset_work); | ||
3111 | cancel_delayed_work_sync(&ctrl->connect_work); | ||
3112 | |||
3113 | /* couldn't schedule retry - fail out */ | ||
3114 | dev_err(ctrl->ctrl.device, | 3068 | dev_err(ctrl->ctrl.device, |
3115 | "NVME-FC{%d}: Connect retry failed\n", ctrl->cnum); | 3069 | "NVME-FC{%d}: failed to init ctrl state\n", ctrl->cnum); |
3116 | 3070 | goto fail_ctrl; | |
3117 | ctrl->ctrl.opts = NULL; | 3071 | } |
3118 | 3072 | ||
3119 | /* initiate nvme ctrl ref counting teardown */ | 3073 | nvme_get_ctrl(&ctrl->ctrl); |
3120 | nvme_uninit_ctrl(&ctrl->ctrl); | ||
3121 | 3074 | ||
3122 | /* Remove core ctrl ref. */ | 3075 | if (!queue_delayed_work(nvme_wq, &ctrl->connect_work, 0)) { |
3123 | nvme_put_ctrl(&ctrl->ctrl); | 3076 | nvme_put_ctrl(&ctrl->ctrl); |
3124 | 3077 | dev_err(ctrl->ctrl.device, | |
3125 | /* as we're past the point where we transition to the ref | 3078 | "NVME-FC{%d}: failed to schedule initial connect\n", |
3126 | * counting teardown path, if we return a bad pointer here, | 3079 | ctrl->cnum); |
3127 | * the calling routine, thinking it's prior to the | 3080 | goto fail_ctrl; |
3128 | * transition, will do an rport put. Since the teardown | ||
3129 | * path also does a rport put, we do an extra get here to | ||
3130 | * so proper order/teardown happens. | ||
3131 | */ | ||
3132 | nvme_fc_rport_get(rport); | ||
3133 | |||
3134 | if (ret > 0) | ||
3135 | ret = -EIO; | ||
3136 | return ERR_PTR(ret); | ||
3137 | } | 3081 | } |
3138 | 3082 | ||
3139 | nvme_get_ctrl(&ctrl->ctrl); | 3083 | flush_delayed_work(&ctrl->connect_work); |
3140 | 3084 | ||
3141 | dev_info(ctrl->ctrl.device, | 3085 | dev_info(ctrl->ctrl.device, |
3142 | "NVME-FC{%d}: new ctrl: NQN \"%s\"\n", | 3086 | "NVME-FC{%d}: new ctrl: NQN \"%s\"\n", |
@@ -3144,6 +3088,30 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, | |||
3144 | 3088 | ||
3145 | return &ctrl->ctrl; | 3089 | return &ctrl->ctrl; |
3146 | 3090 | ||
3091 | fail_ctrl: | ||
3092 | nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_DELETING); | ||
3093 | cancel_work_sync(&ctrl->ctrl.reset_work); | ||
3094 | cancel_delayed_work_sync(&ctrl->connect_work); | ||
3095 | |||
3096 | ctrl->ctrl.opts = NULL; | ||
3097 | |||
3098 | /* initiate nvme ctrl ref counting teardown */ | ||
3099 | nvme_uninit_ctrl(&ctrl->ctrl); | ||
3100 | |||
3101 | /* Remove core ctrl ref. */ | ||
3102 | nvme_put_ctrl(&ctrl->ctrl); | ||
3103 | |||
3104 | /* as we're past the point where we transition to the ref | ||
3105 | * counting teardown path, if we return a bad pointer here, | ||
3106 | * the calling routine, thinking it's prior to the | ||
3107 | * transition, will do an rport put. Since the teardown | ||
3108 | * path also does a rport put, we do an extra get here to | ||
3109 | * so proper order/teardown happens. | ||
3110 | */ | ||
3111 | nvme_fc_rport_get(rport); | ||
3112 | |||
3113 | return ERR_PTR(-EIO); | ||
3114 | |||
3147 | out_cleanup_admin_q: | 3115 | out_cleanup_admin_q: |
3148 | blk_cleanup_queue(ctrl->ctrl.admin_q); | 3116 | blk_cleanup_queue(ctrl->ctrl.admin_q); |
3149 | out_free_admin_tag_set: | 3117 | out_free_admin_tag_set: |
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index d7b664ae5923..1ffd3e8b13a1 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c | |||
@@ -12,6 +12,7 @@ | |||
12 | */ | 12 | */ |
13 | 13 | ||
14 | #include <linux/moduleparam.h> | 14 | #include <linux/moduleparam.h> |
15 | #include <trace/events/block.h> | ||
15 | #include "nvme.h" | 16 | #include "nvme.h" |
16 | 17 | ||
17 | static bool multipath = true; | 18 | static bool multipath = true; |
@@ -111,6 +112,9 @@ static blk_qc_t nvme_ns_head_make_request(struct request_queue *q, | |||
111 | if (likely(ns)) { | 112 | if (likely(ns)) { |
112 | bio->bi_disk = ns->disk; | 113 | bio->bi_disk = ns->disk; |
113 | bio->bi_opf |= REQ_NVME_MPATH; | 114 | bio->bi_opf |= REQ_NVME_MPATH; |
115 | trace_block_bio_remap(bio->bi_disk->queue, bio, | ||
116 | disk_devt(ns->head->disk), | ||
117 | bio->bi_iter.bi_sector); | ||
114 | ret = direct_make_request(bio); | 118 | ret = direct_make_request(bio); |
115 | } else if (!list_empty_careful(&head->list)) { | 119 | } else if (!list_empty_careful(&head->list)) { |
116 | dev_warn_ratelimited(dev, "no path available - requeuing I/O\n"); | 120 | dev_warn_ratelimited(dev, "no path available - requeuing I/O\n"); |
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 34df07d44f80..231807cbc849 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h | |||
@@ -321,7 +321,6 @@ struct nvme_ctrl_ops { | |||
321 | void (*submit_async_event)(struct nvme_ctrl *ctrl); | 321 | void (*submit_async_event)(struct nvme_ctrl *ctrl); |
322 | void (*delete_ctrl)(struct nvme_ctrl *ctrl); | 322 | void (*delete_ctrl)(struct nvme_ctrl *ctrl); |
323 | int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size); | 323 | int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size); |
324 | int (*reinit_request)(void *data, struct request *rq); | ||
325 | void (*stop_ctrl)(struct nvme_ctrl *ctrl); | 324 | void (*stop_ctrl)(struct nvme_ctrl *ctrl); |
326 | }; | 325 | }; |
327 | 326 | ||
@@ -416,7 +415,6 @@ void nvme_unfreeze(struct nvme_ctrl *ctrl); | |||
416 | void nvme_wait_freeze(struct nvme_ctrl *ctrl); | 415 | void nvme_wait_freeze(struct nvme_ctrl *ctrl); |
417 | void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout); | 416 | void nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout); |
418 | void nvme_start_freeze(struct nvme_ctrl *ctrl); | 417 | void nvme_start_freeze(struct nvme_ctrl *ctrl); |
419 | int nvme_reinit_tagset(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set); | ||
420 | 418 | ||
421 | #define NVME_QID_ANY -1 | 419 | #define NVME_QID_ANY -1 |
422 | struct request *nvme_alloc_request(struct request_queue *q, | 420 | struct request *nvme_alloc_request(struct request_queue *q, |
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 2aba03876d84..c9424da0d23e 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c | |||
@@ -1189,21 +1189,38 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, | |||
1189 | count = ib_dma_map_sg(ibdev, req->sg_table.sgl, req->nents, | 1189 | count = ib_dma_map_sg(ibdev, req->sg_table.sgl, req->nents, |
1190 | rq_data_dir(rq) == WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE); | 1190 | rq_data_dir(rq) == WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE); |
1191 | if (unlikely(count <= 0)) { | 1191 | if (unlikely(count <= 0)) { |
1192 | sg_free_table_chained(&req->sg_table, true); | 1192 | ret = -EIO; |
1193 | return -EIO; | 1193 | goto out_free_table; |
1194 | } | 1194 | } |
1195 | 1195 | ||
1196 | if (count == 1) { | 1196 | if (count == 1) { |
1197 | if (rq_data_dir(rq) == WRITE && nvme_rdma_queue_idx(queue) && | 1197 | if (rq_data_dir(rq) == WRITE && nvme_rdma_queue_idx(queue) && |
1198 | blk_rq_payload_bytes(rq) <= | 1198 | blk_rq_payload_bytes(rq) <= |
1199 | nvme_rdma_inline_data_size(queue)) | 1199 | nvme_rdma_inline_data_size(queue)) { |
1200 | return nvme_rdma_map_sg_inline(queue, req, c); | 1200 | ret = nvme_rdma_map_sg_inline(queue, req, c); |
1201 | goto out; | ||
1202 | } | ||
1201 | 1203 | ||
1202 | if (dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) | 1204 | if (dev->pd->flags & IB_PD_UNSAFE_GLOBAL_RKEY) { |
1203 | return nvme_rdma_map_sg_single(queue, req, c); | 1205 | ret = nvme_rdma_map_sg_single(queue, req, c); |
1206 | goto out; | ||
1207 | } | ||
1204 | } | 1208 | } |
1205 | 1209 | ||
1206 | return nvme_rdma_map_sg_fr(queue, req, c, count); | 1210 | ret = nvme_rdma_map_sg_fr(queue, req, c, count); |
1211 | out: | ||
1212 | if (unlikely(ret)) | ||
1213 | goto out_unmap_sg; | ||
1214 | |||
1215 | return 0; | ||
1216 | |||
1217 | out_unmap_sg: | ||
1218 | ib_dma_unmap_sg(ibdev, req->sg_table.sgl, | ||
1219 | req->nents, rq_data_dir(rq) == | ||
1220 | WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE); | ||
1221 | out_free_table: | ||
1222 | sg_free_table_chained(&req->sg_table, true); | ||
1223 | return ret; | ||
1207 | } | 1224 | } |
1208 | 1225 | ||
1209 | static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc) | 1226 | static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc) |
@@ -1613,15 +1630,14 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, | |||
1613 | struct nvme_rdma_qe *sqe = &req->sqe; | 1630 | struct nvme_rdma_qe *sqe = &req->sqe; |
1614 | struct nvme_command *c = sqe->data; | 1631 | struct nvme_command *c = sqe->data; |
1615 | struct ib_device *dev; | 1632 | struct ib_device *dev; |
1633 | bool queue_ready = test_bit(NVME_RDMA_Q_LIVE, &queue->flags); | ||
1616 | blk_status_t ret; | 1634 | blk_status_t ret; |
1617 | int err; | 1635 | int err; |
1618 | 1636 | ||
1619 | WARN_ON_ONCE(rq->tag < 0); | 1637 | WARN_ON_ONCE(rq->tag < 0); |
1620 | 1638 | ||
1621 | ret = nvmf_check_if_ready(&queue->ctrl->ctrl, rq, | 1639 | if (!nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready)) |
1622 | test_bit(NVME_RDMA_Q_LIVE, &queue->flags), true); | 1640 | return nvmf_fail_nonready_command(rq); |
1623 | if (unlikely(ret)) | ||
1624 | return ret; | ||
1625 | 1641 | ||
1626 | dev = queue->device->dev; | 1642 | dev = queue->device->dev; |
1627 | ib_dma_sync_single_for_cpu(dev, sqe->dma, | 1643 | ib_dma_sync_single_for_cpu(dev, sqe->dma, |
diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 962532842769..38803576d5e1 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c | |||
@@ -119,9 +119,11 @@ static void nvmet_execute_get_log_page_smart(struct nvmet_req *req) | |||
119 | else | 119 | else |
120 | status = nvmet_get_smart_log_nsid(req, log); | 120 | status = nvmet_get_smart_log_nsid(req, log); |
121 | if (status) | 121 | if (status) |
122 | goto out; | 122 | goto out_free_log; |
123 | 123 | ||
124 | status = nvmet_copy_to_sgl(req, 0, log, sizeof(*log)); | 124 | status = nvmet_copy_to_sgl(req, 0, log, sizeof(*log)); |
125 | out_free_log: | ||
126 | kfree(log); | ||
125 | out: | 127 | out: |
126 | nvmet_req_complete(req, status); | 128 | nvmet_req_complete(req, status); |
127 | } | 129 | } |
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 1304ec3a7ede..d8d91f04bd7e 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c | |||
@@ -158,12 +158,11 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, | |||
158 | struct nvme_loop_queue *queue = hctx->driver_data; | 158 | struct nvme_loop_queue *queue = hctx->driver_data; |
159 | struct request *req = bd->rq; | 159 | struct request *req = bd->rq; |
160 | struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req); | 160 | struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req); |
161 | bool queue_ready = test_bit(NVME_LOOP_Q_LIVE, &queue->flags); | ||
161 | blk_status_t ret; | 162 | blk_status_t ret; |
162 | 163 | ||
163 | ret = nvmf_check_if_ready(&queue->ctrl->ctrl, req, | 164 | if (!nvmf_check_ready(&queue->ctrl->ctrl, req, queue_ready)) |
164 | test_bit(NVME_LOOP_Q_LIVE, &queue->flags), true); | 165 | return nvmf_fail_nonready_command(req); |
165 | if (unlikely(ret)) | ||
166 | return ret; | ||
167 | 166 | ||
168 | ret = nvme_setup_cmd(ns, req, &iod->cmd); | 167 | ret = nvme_setup_cmd(ns, req, &iod->cmd); |
169 | if (ret) | 168 | if (ret) |
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index fb355173f3c7..e3147eb74222 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h | |||
@@ -281,8 +281,6 @@ void blk_freeze_queue_start(struct request_queue *q); | |||
281 | void blk_mq_freeze_queue_wait(struct request_queue *q); | 281 | void blk_mq_freeze_queue_wait(struct request_queue *q); |
282 | int blk_mq_freeze_queue_wait_timeout(struct request_queue *q, | 282 | int blk_mq_freeze_queue_wait_timeout(struct request_queue *q, |
283 | unsigned long timeout); | 283 | unsigned long timeout); |
284 | int blk_mq_tagset_iter(struct blk_mq_tag_set *set, void *data, | ||
285 | int (reinit_request)(void *, struct request *)); | ||
286 | 284 | ||
287 | int blk_mq_map_queues(struct blk_mq_tag_set *set); | 285 | int blk_mq_map_queues(struct blk_mq_tag_set *set); |
288 | void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues); | 286 | void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues); |
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index bca3a92eb55f..9154570edf29 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h | |||
@@ -127,6 +127,8 @@ typedef __u32 __bitwise req_flags_t; | |||
127 | #define RQF_ZONE_WRITE_LOCKED ((__force req_flags_t)(1 << 19)) | 127 | #define RQF_ZONE_WRITE_LOCKED ((__force req_flags_t)(1 << 19)) |
128 | /* already slept for hybrid poll */ | 128 | /* already slept for hybrid poll */ |
129 | #define RQF_MQ_POLL_SLEPT ((__force req_flags_t)(1 << 20)) | 129 | #define RQF_MQ_POLL_SLEPT ((__force req_flags_t)(1 << 20)) |
130 | /* ->timeout has been called, don't expire again */ | ||
131 | #define RQF_TIMED_OUT ((__force req_flags_t)(1 << 21)) | ||
130 | 132 | ||
131 | /* flags that prevent us from merging requests: */ | 133 | /* flags that prevent us from merging requests: */ |
132 | #define RQF_NOMERGE_FLAGS \ | 134 | #define RQF_NOMERGE_FLAGS \ |
@@ -560,7 +562,6 @@ struct request_queue { | |||
560 | unsigned int dma_alignment; | 562 | unsigned int dma_alignment; |
561 | 563 | ||
562 | struct blk_queue_tag *queue_tags; | 564 | struct blk_queue_tag *queue_tags; |
563 | struct list_head tag_busy_list; | ||
564 | 565 | ||
565 | unsigned int nr_sorted; | 566 | unsigned int nr_sorted; |
566 | unsigned int in_flight[2]; | 567 | unsigned int in_flight[2]; |
@@ -1373,7 +1374,6 @@ extern void blk_queue_end_tag(struct request_queue *, struct request *); | |||
1373 | extern int blk_queue_init_tags(struct request_queue *, int, struct blk_queue_tag *, int); | 1374 | extern int blk_queue_init_tags(struct request_queue *, int, struct blk_queue_tag *, int); |
1374 | extern void blk_queue_free_tags(struct request_queue *); | 1375 | extern void blk_queue_free_tags(struct request_queue *); |
1375 | extern int blk_queue_resize_tags(struct request_queue *, int); | 1376 | extern int blk_queue_resize_tags(struct request_queue *, int); |
1376 | extern void blk_queue_invalidate_tags(struct request_queue *); | ||
1377 | extern struct blk_queue_tag *blk_init_tags(int, int); | 1377 | extern struct blk_queue_tag *blk_init_tags(int, int); |
1378 | extern void blk_free_tags(struct blk_queue_tag *); | 1378 | extern void blk_free_tags(struct blk_queue_tag *); |
1379 | 1379 | ||