diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-12-01 08:05:45 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-12-01 08:05:45 -0500 |
commit | 75f64f68afa165ebe139cca2adb4df0a229a06de (patch) | |
tree | 6aee1ee80863ea121c31f15ec794f2b9943ccbfd | |
parent | df8ba95c572a187ed2aa7403e97a7a7f58c01f00 (diff) | |
parent | ed565371e368f014db237aacf42b27b40b1bd247 (diff) |
Merge branch 'for-linus' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe:
"A selection of fixes/changes that should make it into this series.
This contains:
- NVMe, two merges, containing:
- pci-e, rdma, and fc fixes
- Device quirks
- Fix for a badblocks leak in null_blk
- bcache fix from Rui Hua for a race condition regression where
-EINTR was returned to upper layers that didn't expect it.
- Regression fix for blktrace for a bug introduced in this series.
- blktrace cleanup for cgroup id.
- bdi registration error handling.
- Small series with cleanups for blk-wbt.
- Various little fixes for typos and the like.
Nothing earth shattering, most important are the NVMe and bcache fixes"
* 'for-linus' of git://git.kernel.dk/linux-block: (34 commits)
nvme-pci: fix NULL pointer dereference in nvme_free_host_mem()
nvme-rdma: fix memory leak during queue allocation
blktrace: fix trace mutex deadlock
nvme-rdma: Use mr pool
nvme-rdma: Check remotely invalidated rkey matches our expected rkey
nvme-rdma: wait for local invalidation before completing a request
nvme-rdma: don't complete requests before a send work request has completed
nvme-rdma: don't suppress send completions
bcache: check return value of register_shrinker
bcache: recover data from backing when data is clean
bcache: Fix building error on MIPS
bcache: add a comment in journal bucket reading
nvme-fc: don't use bit masks for set/test_bit() numbers
blk-wbt: fix comments typo
blk-wbt: move wbt_clear_stat to common place in wbt_done
blk-sysfs: remove NULL pointer checking in queue_wb_lat_store
blk-wbt: remove duplicated setting in wbt_init
nvme-pci: add quirk for delay before CHK RDY for WDC SN200
block: remove useless assignment in bio_split
null_blk: fix dev->badblocks leak
...
-rw-r--r-- | block/bio.c | 2 | ||||
-rw-r--r-- | block/blk-sysfs.c | 5 | ||||
-rw-r--r-- | block/blk-wbt.c | 7 | ||||
-rw-r--r-- | block/genhd.c | 9 | ||||
-rw-r--r-- | drivers/block/null_blk.c | 5 | ||||
-rw-r--r-- | drivers/md/bcache/alloc.c | 2 | ||||
-rw-r--r-- | drivers/md/bcache/btree.c | 5 | ||||
-rw-r--r-- | drivers/md/bcache/extents.c | 2 | ||||
-rw-r--r-- | drivers/md/bcache/journal.c | 7 | ||||
-rw-r--r-- | drivers/md/bcache/request.c | 13 | ||||
-rw-r--r-- | drivers/nvme/host/core.c | 19 | ||||
-rw-r--r-- | drivers/nvme/host/fabrics.h | 30 | ||||
-rw-r--r-- | drivers/nvme/host/fc.c | 21 | ||||
-rw-r--r-- | drivers/nvme/host/multipath.c | 2 | ||||
-rw-r--r-- | drivers/nvme/host/nvme.h | 2 | ||||
-rw-r--r-- | drivers/nvme/host/pci.c | 17 | ||||
-rw-r--r-- | drivers/nvme/host/rdma.c | 266 | ||||
-rw-r--r-- | drivers/nvme/target/fc.c | 9 | ||||
-rw-r--r-- | drivers/nvme/target/loop.c | 25 | ||||
-rw-r--r-- | include/uapi/linux/bcache.h | 2 | ||||
-rw-r--r-- | kernel/trace/blktrace.c | 30 | ||||
-rw-r--r-- | mm/backing-dev.c | 22 |
22 files changed, 291 insertions, 211 deletions
diff --git a/block/bio.c b/block/bio.c index 228229f3bb76..8bfdea58159b 100644 --- a/block/bio.c +++ b/block/bio.c | |||
@@ -1819,7 +1819,7 @@ EXPORT_SYMBOL(bio_endio); | |||
1819 | struct bio *bio_split(struct bio *bio, int sectors, | 1819 | struct bio *bio_split(struct bio *bio, int sectors, |
1820 | gfp_t gfp, struct bio_set *bs) | 1820 | gfp_t gfp, struct bio_set *bs) |
1821 | { | 1821 | { |
1822 | struct bio *split = NULL; | 1822 | struct bio *split; |
1823 | 1823 | ||
1824 | BUG_ON(sectors <= 0); | 1824 | BUG_ON(sectors <= 0); |
1825 | BUG_ON(sectors >= bio_sectors(bio)); | 1825 | BUG_ON(sectors >= bio_sectors(bio)); |
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index e54be402899d..870484eaed1f 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c | |||
@@ -450,12 +450,9 @@ static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page, | |||
450 | ret = wbt_init(q); | 450 | ret = wbt_init(q); |
451 | if (ret) | 451 | if (ret) |
452 | return ret; | 452 | return ret; |
453 | |||
454 | rwb = q->rq_wb; | ||
455 | if (!rwb) | ||
456 | return -EINVAL; | ||
457 | } | 453 | } |
458 | 454 | ||
455 | rwb = q->rq_wb; | ||
459 | if (val == -1) | 456 | if (val == -1) |
460 | rwb->min_lat_nsec = wbt_default_latency_nsec(q); | 457 | rwb->min_lat_nsec = wbt_default_latency_nsec(q); |
461 | else if (val >= 0) | 458 | else if (val >= 0) |
diff --git a/block/blk-wbt.c b/block/blk-wbt.c index b252da0e4c11..ae8de9780085 100644 --- a/block/blk-wbt.c +++ b/block/blk-wbt.c | |||
@@ -178,12 +178,11 @@ void wbt_done(struct rq_wb *rwb, struct blk_issue_stat *stat) | |||
178 | 178 | ||
179 | if (wbt_is_read(stat)) | 179 | if (wbt_is_read(stat)) |
180 | wb_timestamp(rwb, &rwb->last_comp); | 180 | wb_timestamp(rwb, &rwb->last_comp); |
181 | wbt_clear_state(stat); | ||
182 | } else { | 181 | } else { |
183 | WARN_ON_ONCE(stat == rwb->sync_cookie); | 182 | WARN_ON_ONCE(stat == rwb->sync_cookie); |
184 | __wbt_done(rwb, wbt_stat_to_mask(stat)); | 183 | __wbt_done(rwb, wbt_stat_to_mask(stat)); |
185 | wbt_clear_state(stat); | ||
186 | } | 184 | } |
185 | wbt_clear_state(stat); | ||
187 | } | 186 | } |
188 | 187 | ||
189 | /* | 188 | /* |
@@ -482,7 +481,7 @@ static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw) | |||
482 | 481 | ||
483 | /* | 482 | /* |
484 | * At this point we know it's a buffered write. If this is | 483 | * At this point we know it's a buffered write. If this is |
485 | * kswapd trying to free memory, or REQ_SYNC is set, set, then | 484 | * kswapd trying to free memory, or REQ_SYNC is set, then |
486 | * it's WB_SYNC_ALL writeback, and we'll use the max limit for | 485 | * it's WB_SYNC_ALL writeback, and we'll use the max limit for |
487 | * that. If the write is marked as a background write, then use | 486 | * that. If the write is marked as a background write, then use |
488 | * the idle limit, or go to normal if we haven't had competing | 487 | * the idle limit, or go to normal if we haven't had competing |
@@ -723,8 +722,6 @@ int wbt_init(struct request_queue *q) | |||
723 | init_waitqueue_head(&rwb->rq_wait[i].wait); | 722 | init_waitqueue_head(&rwb->rq_wait[i].wait); |
724 | } | 723 | } |
725 | 724 | ||
726 | rwb->wc = 1; | ||
727 | rwb->queue_depth = RWB_DEF_DEPTH; | ||
728 | rwb->last_comp = rwb->last_issue = jiffies; | 725 | rwb->last_comp = rwb->last_issue = jiffies; |
729 | rwb->queue = q; | 726 | rwb->queue = q; |
730 | rwb->win_nsec = RWB_WINDOW_NSEC; | 727 | rwb->win_nsec = RWB_WINDOW_NSEC; |
diff --git a/block/genhd.c b/block/genhd.c index c2223f12a805..96a66f671720 100644 --- a/block/genhd.c +++ b/block/genhd.c | |||
@@ -671,10 +671,13 @@ void device_add_disk(struct device *parent, struct gendisk *disk) | |||
671 | disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO; | 671 | disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO; |
672 | disk->flags |= GENHD_FL_NO_PART_SCAN; | 672 | disk->flags |= GENHD_FL_NO_PART_SCAN; |
673 | } else { | 673 | } else { |
674 | int ret; | ||
675 | |||
674 | /* Register BDI before referencing it from bdev */ | 676 | /* Register BDI before referencing it from bdev */ |
675 | disk_to_dev(disk)->devt = devt; | 677 | disk_to_dev(disk)->devt = devt; |
676 | bdi_register_owner(disk->queue->backing_dev_info, | 678 | ret = bdi_register_owner(disk->queue->backing_dev_info, |
677 | disk_to_dev(disk)); | 679 | disk_to_dev(disk)); |
680 | WARN_ON(ret); | ||
678 | blk_register_region(disk_devt(disk), disk->minors, NULL, | 681 | blk_register_region(disk_devt(disk), disk->minors, NULL, |
679 | exact_match, exact_lock, disk); | 682 | exact_match, exact_lock, disk); |
680 | } | 683 | } |
@@ -1389,7 +1392,7 @@ struct gendisk *__alloc_disk_node(int minors, int node_id) | |||
1389 | 1392 | ||
1390 | if (minors > DISK_MAX_PARTS) { | 1393 | if (minors > DISK_MAX_PARTS) { |
1391 | printk(KERN_ERR | 1394 | printk(KERN_ERR |
1392 | "block: can't allocated more than %d partitions\n", | 1395 | "block: can't allocate more than %d partitions\n", |
1393 | DISK_MAX_PARTS); | 1396 | DISK_MAX_PARTS); |
1394 | minors = DISK_MAX_PARTS; | 1397 | minors = DISK_MAX_PARTS; |
1395 | } | 1398 | } |
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index c61960deb74a..ccb9975a97fa 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c | |||
@@ -471,7 +471,6 @@ static void nullb_device_release(struct config_item *item) | |||
471 | { | 471 | { |
472 | struct nullb_device *dev = to_nullb_device(item); | 472 | struct nullb_device *dev = to_nullb_device(item); |
473 | 473 | ||
474 | badblocks_exit(&dev->badblocks); | ||
475 | null_free_device_storage(dev, false); | 474 | null_free_device_storage(dev, false); |
476 | null_free_dev(dev); | 475 | null_free_dev(dev); |
477 | } | 476 | } |
@@ -582,6 +581,10 @@ static struct nullb_device *null_alloc_dev(void) | |||
582 | 581 | ||
583 | static void null_free_dev(struct nullb_device *dev) | 582 | static void null_free_dev(struct nullb_device *dev) |
584 | { | 583 | { |
584 | if (!dev) | ||
585 | return; | ||
586 | |||
587 | badblocks_exit(&dev->badblocks); | ||
585 | kfree(dev); | 588 | kfree(dev); |
586 | } | 589 | } |
587 | 590 | ||
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c index a27d85232ce1..a0cc1bc6d884 100644 --- a/drivers/md/bcache/alloc.c +++ b/drivers/md/bcache/alloc.c | |||
@@ -490,7 +490,7 @@ int __bch_bucket_alloc_set(struct cache_set *c, unsigned reserve, | |||
490 | if (b == -1) | 490 | if (b == -1) |
491 | goto err; | 491 | goto err; |
492 | 492 | ||
493 | k->ptr[i] = PTR(ca->buckets[b].gen, | 493 | k->ptr[i] = MAKE_PTR(ca->buckets[b].gen, |
494 | bucket_to_sector(c, b), | 494 | bucket_to_sector(c, b), |
495 | ca->sb.nr_this_dev); | 495 | ca->sb.nr_this_dev); |
496 | 496 | ||
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 11c5503d31dc..81e8dc3dbe5e 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c | |||
@@ -807,7 +807,10 @@ int bch_btree_cache_alloc(struct cache_set *c) | |||
807 | c->shrink.scan_objects = bch_mca_scan; | 807 | c->shrink.scan_objects = bch_mca_scan; |
808 | c->shrink.seeks = 4; | 808 | c->shrink.seeks = 4; |
809 | c->shrink.batch = c->btree_pages * 2; | 809 | c->shrink.batch = c->btree_pages * 2; |
810 | register_shrinker(&c->shrink); | 810 | |
811 | if (register_shrinker(&c->shrink)) | ||
812 | pr_warn("bcache: %s: could not register shrinker", | ||
813 | __func__); | ||
811 | 814 | ||
812 | return 0; | 815 | return 0; |
813 | } | 816 | } |
diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c index 41c238fc3733..f9d391711595 100644 --- a/drivers/md/bcache/extents.c +++ b/drivers/md/bcache/extents.c | |||
@@ -585,7 +585,7 @@ static bool bch_extent_merge(struct btree_keys *bk, struct bkey *l, struct bkey | |||
585 | return false; | 585 | return false; |
586 | 586 | ||
587 | for (i = 0; i < KEY_PTRS(l); i++) | 587 | for (i = 0; i < KEY_PTRS(l); i++) |
588 | if (l->ptr[i] + PTR(0, KEY_SIZE(l), 0) != r->ptr[i] || | 588 | if (l->ptr[i] + MAKE_PTR(0, KEY_SIZE(l), 0) != r->ptr[i] || |
589 | PTR_BUCKET_NR(b->c, l, i) != PTR_BUCKET_NR(b->c, r, i)) | 589 | PTR_BUCKET_NR(b->c, l, i) != PTR_BUCKET_NR(b->c, r, i)) |
590 | return false; | 590 | return false; |
591 | 591 | ||
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c index 02a98ddb592d..a87165c1d8e5 100644 --- a/drivers/md/bcache/journal.c +++ b/drivers/md/bcache/journal.c | |||
@@ -170,6 +170,11 @@ int bch_journal_read(struct cache_set *c, struct list_head *list) | |||
170 | * find a sequence of buckets with valid journal entries | 170 | * find a sequence of buckets with valid journal entries |
171 | */ | 171 | */ |
172 | for (i = 0; i < ca->sb.njournal_buckets; i++) { | 172 | for (i = 0; i < ca->sb.njournal_buckets; i++) { |
173 | /* | ||
174 | * We must try the index l with ZERO first for | ||
175 | * correctness due to the scenario that the journal | ||
176 | * bucket is circular buffer which might have wrapped | ||
177 | */ | ||
173 | l = (i * 2654435769U) % ca->sb.njournal_buckets; | 178 | l = (i * 2654435769U) % ca->sb.njournal_buckets; |
174 | 179 | ||
175 | if (test_bit(l, bitmap)) | 180 | if (test_bit(l, bitmap)) |
@@ -507,7 +512,7 @@ static void journal_reclaim(struct cache_set *c) | |||
507 | continue; | 512 | continue; |
508 | 513 | ||
509 | ja->cur_idx = next; | 514 | ja->cur_idx = next; |
510 | k->ptr[n++] = PTR(0, | 515 | k->ptr[n++] = MAKE_PTR(0, |
511 | bucket_to_sector(c, ca->sb.d[ja->cur_idx]), | 516 | bucket_to_sector(c, ca->sb.d[ja->cur_idx]), |
512 | ca->sb.nr_this_dev); | 517 | ca->sb.nr_this_dev); |
513 | } | 518 | } |
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 3a7aed7282b2..643c3021624f 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c | |||
@@ -708,16 +708,15 @@ static void cached_dev_read_error(struct closure *cl) | |||
708 | { | 708 | { |
709 | struct search *s = container_of(cl, struct search, cl); | 709 | struct search *s = container_of(cl, struct search, cl); |
710 | struct bio *bio = &s->bio.bio; | 710 | struct bio *bio = &s->bio.bio; |
711 | struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); | ||
712 | 711 | ||
713 | /* | 712 | /* |
714 | * If cache device is dirty (dc->has_dirty is non-zero), then | 713 | * If read request hit dirty data (s->read_dirty_data is true), |
715 | * recovery a failed read request from cached device may get a | 714 | * then recovery a failed read request from cached device may |
716 | * stale data back. So read failure recovery is only permitted | 715 | * get a stale data back. So read failure recovery is only |
717 | * when cache device is clean. | 716 | * permitted when read request hit clean data in cache device, |
717 | * or when cache read race happened. | ||
718 | */ | 718 | */ |
719 | if (s->recoverable && | 719 | if (s->recoverable && !s->read_dirty_data) { |
720 | (dc && !atomic_read(&dc->has_dirty))) { | ||
721 | /* Retry from the backing device: */ | 720 | /* Retry from the backing device: */ |
722 | trace_bcache_read_retry(s->orig_bio); | 721 | trace_bcache_read_retry(s->orig_bio); |
723 | 722 | ||
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 25da74d310d1..f837d666cbd4 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c | |||
@@ -1449,19 +1449,19 @@ static int nvme_pr_command(struct block_device *bdev, u32 cdw10, | |||
1449 | int srcu_idx, ret; | 1449 | int srcu_idx, ret; |
1450 | u8 data[16] = { 0, }; | 1450 | u8 data[16] = { 0, }; |
1451 | 1451 | ||
1452 | ns = nvme_get_ns_from_disk(bdev->bd_disk, &head, &srcu_idx); | ||
1453 | if (unlikely(!ns)) | ||
1454 | return -EWOULDBLOCK; | ||
1455 | |||
1452 | put_unaligned_le64(key, &data[0]); | 1456 | put_unaligned_le64(key, &data[0]); |
1453 | put_unaligned_le64(sa_key, &data[8]); | 1457 | put_unaligned_le64(sa_key, &data[8]); |
1454 | 1458 | ||
1455 | memset(&c, 0, sizeof(c)); | 1459 | memset(&c, 0, sizeof(c)); |
1456 | c.common.opcode = op; | 1460 | c.common.opcode = op; |
1457 | c.common.nsid = cpu_to_le32(head->ns_id); | 1461 | c.common.nsid = cpu_to_le32(ns->head->ns_id); |
1458 | c.common.cdw10[0] = cpu_to_le32(cdw10); | 1462 | c.common.cdw10[0] = cpu_to_le32(cdw10); |
1459 | 1463 | ||
1460 | ns = nvme_get_ns_from_disk(bdev->bd_disk, &head, &srcu_idx); | 1464 | ret = nvme_submit_sync_cmd(ns->queue, &c, data, 16); |
1461 | if (unlikely(!ns)) | ||
1462 | ret = -EWOULDBLOCK; | ||
1463 | else | ||
1464 | ret = nvme_submit_sync_cmd(ns->queue, &c, data, 16); | ||
1465 | nvme_put_ns_from_disk(head, srcu_idx); | 1465 | nvme_put_ns_from_disk(head, srcu_idx); |
1466 | return ret; | 1466 | return ret; |
1467 | } | 1467 | } |
@@ -2961,8 +2961,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) | |||
2961 | 2961 | ||
2962 | static void nvme_ns_remove(struct nvme_ns *ns) | 2962 | static void nvme_ns_remove(struct nvme_ns *ns) |
2963 | { | 2963 | { |
2964 | struct nvme_ns_head *head = ns->head; | ||
2965 | |||
2966 | if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags)) | 2964 | if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags)) |
2967 | return; | 2965 | return; |
2968 | 2966 | ||
@@ -2980,15 +2978,14 @@ static void nvme_ns_remove(struct nvme_ns *ns) | |||
2980 | 2978 | ||
2981 | mutex_lock(&ns->ctrl->subsys->lock); | 2979 | mutex_lock(&ns->ctrl->subsys->lock); |
2982 | nvme_mpath_clear_current_path(ns); | 2980 | nvme_mpath_clear_current_path(ns); |
2983 | if (head) | 2981 | list_del_rcu(&ns->siblings); |
2984 | list_del_rcu(&ns->siblings); | ||
2985 | mutex_unlock(&ns->ctrl->subsys->lock); | 2982 | mutex_unlock(&ns->ctrl->subsys->lock); |
2986 | 2983 | ||
2987 | mutex_lock(&ns->ctrl->namespaces_mutex); | 2984 | mutex_lock(&ns->ctrl->namespaces_mutex); |
2988 | list_del_init(&ns->list); | 2985 | list_del_init(&ns->list); |
2989 | mutex_unlock(&ns->ctrl->namespaces_mutex); | 2986 | mutex_unlock(&ns->ctrl->namespaces_mutex); |
2990 | 2987 | ||
2991 | synchronize_srcu(&head->srcu); | 2988 | synchronize_srcu(&ns->head->srcu); |
2992 | nvme_put_ns(ns); | 2989 | nvme_put_ns(ns); |
2993 | } | 2990 | } |
2994 | 2991 | ||
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h index 42232e731f19..9ba614953607 100644 --- a/drivers/nvme/host/fabrics.h +++ b/drivers/nvme/host/fabrics.h | |||
@@ -156,4 +156,34 @@ void nvmf_free_options(struct nvmf_ctrl_options *opts); | |||
156 | int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size); | 156 | int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size); |
157 | bool nvmf_should_reconnect(struct nvme_ctrl *ctrl); | 157 | bool nvmf_should_reconnect(struct nvme_ctrl *ctrl); |
158 | 158 | ||
159 | static inline blk_status_t nvmf_check_init_req(struct nvme_ctrl *ctrl, | ||
160 | struct request *rq) | ||
161 | { | ||
162 | struct nvme_command *cmd = nvme_req(rq)->cmd; | ||
163 | |||
164 | /* | ||
165 | * We cannot accept any other command until the connect command has | ||
166 | * completed, so only allow connect to pass. | ||
167 | */ | ||
168 | if (!blk_rq_is_passthrough(rq) || | ||
169 | cmd->common.opcode != nvme_fabrics_command || | ||
170 | cmd->fabrics.fctype != nvme_fabrics_type_connect) { | ||
171 | /* | ||
172 | * Reconnecting state means transport disruption, which can take | ||
173 | * a long time and even might fail permanently, fail fast to | ||
174 | * give upper layers a chance to failover. | ||
175 | * Deleting state means that the ctrl will never accept commands | ||
176 | * again, fail it permanently. | ||
177 | */ | ||
178 | if (ctrl->state == NVME_CTRL_RECONNECTING || | ||
179 | ctrl->state == NVME_CTRL_DELETING) { | ||
180 | nvme_req(rq)->status = NVME_SC_ABORT_REQ; | ||
181 | return BLK_STS_IOERR; | ||
182 | } | ||
183 | return BLK_STS_RESOURCE; /* try again later */ | ||
184 | } | ||
185 | |||
186 | return BLK_STS_OK; | ||
187 | } | ||
188 | |||
159 | #endif /* _NVME_FABRICS_H */ | 189 | #endif /* _NVME_FABRICS_H */ |
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 7ab0be55c7d0..0a8af4daef89 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c | |||
@@ -31,7 +31,8 @@ | |||
31 | 31 | ||
32 | 32 | ||
33 | enum nvme_fc_queue_flags { | 33 | enum nvme_fc_queue_flags { |
34 | NVME_FC_Q_CONNECTED = (1 << 0), | 34 | NVME_FC_Q_CONNECTED = 0, |
35 | NVME_FC_Q_LIVE, | ||
35 | }; | 36 | }; |
36 | 37 | ||
37 | #define NVMEFC_QUEUE_DELAY 3 /* ms units */ | 38 | #define NVMEFC_QUEUE_DELAY 3 /* ms units */ |
@@ -1927,6 +1928,7 @@ nvme_fc_free_queue(struct nvme_fc_queue *queue) | |||
1927 | if (!test_and_clear_bit(NVME_FC_Q_CONNECTED, &queue->flags)) | 1928 | if (!test_and_clear_bit(NVME_FC_Q_CONNECTED, &queue->flags)) |
1928 | return; | 1929 | return; |
1929 | 1930 | ||
1931 | clear_bit(NVME_FC_Q_LIVE, &queue->flags); | ||
1930 | /* | 1932 | /* |
1931 | * Current implementation never disconnects a single queue. | 1933 | * Current implementation never disconnects a single queue. |
1932 | * It always terminates a whole association. So there is never | 1934 | * It always terminates a whole association. So there is never |
@@ -1934,7 +1936,6 @@ nvme_fc_free_queue(struct nvme_fc_queue *queue) | |||
1934 | */ | 1936 | */ |
1935 | 1937 | ||
1936 | queue->connection_id = 0; | 1938 | queue->connection_id = 0; |
1937 | clear_bit(NVME_FC_Q_CONNECTED, &queue->flags); | ||
1938 | } | 1939 | } |
1939 | 1940 | ||
1940 | static void | 1941 | static void |
@@ -2013,6 +2014,8 @@ nvme_fc_connect_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize) | |||
2013 | ret = nvmf_connect_io_queue(&ctrl->ctrl, i); | 2014 | ret = nvmf_connect_io_queue(&ctrl->ctrl, i); |
2014 | if (ret) | 2015 | if (ret) |
2015 | break; | 2016 | break; |
2017 | |||
2018 | set_bit(NVME_FC_Q_LIVE, &ctrl->queues[i].flags); | ||
2016 | } | 2019 | } |
2017 | 2020 | ||
2018 | return ret; | 2021 | return ret; |
@@ -2320,6 +2323,14 @@ busy: | |||
2320 | return BLK_STS_RESOURCE; | 2323 | return BLK_STS_RESOURCE; |
2321 | } | 2324 | } |
2322 | 2325 | ||
2326 | static inline blk_status_t nvme_fc_is_ready(struct nvme_fc_queue *queue, | ||
2327 | struct request *rq) | ||
2328 | { | ||
2329 | if (unlikely(!test_bit(NVME_FC_Q_LIVE, &queue->flags))) | ||
2330 | return nvmf_check_init_req(&queue->ctrl->ctrl, rq); | ||
2331 | return BLK_STS_OK; | ||
2332 | } | ||
2333 | |||
2323 | static blk_status_t | 2334 | static blk_status_t |
2324 | nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx, | 2335 | nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx, |
2325 | const struct blk_mq_queue_data *bd) | 2336 | const struct blk_mq_queue_data *bd) |
@@ -2335,6 +2346,10 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx, | |||
2335 | u32 data_len; | 2346 | u32 data_len; |
2336 | blk_status_t ret; | 2347 | blk_status_t ret; |
2337 | 2348 | ||
2349 | ret = nvme_fc_is_ready(queue, rq); | ||
2350 | if (unlikely(ret)) | ||
2351 | return ret; | ||
2352 | |||
2338 | ret = nvme_setup_cmd(ns, rq, sqe); | 2353 | ret = nvme_setup_cmd(ns, rq, sqe); |
2339 | if (ret) | 2354 | if (ret) |
2340 | return ret; | 2355 | return ret; |
@@ -2727,6 +2742,8 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl) | |||
2727 | if (ret) | 2742 | if (ret) |
2728 | goto out_disconnect_admin_queue; | 2743 | goto out_disconnect_admin_queue; |
2729 | 2744 | ||
2745 | set_bit(NVME_FC_Q_LIVE, &ctrl->queues[0].flags); | ||
2746 | |||
2730 | /* | 2747 | /* |
2731 | * Check controller capabilities | 2748 | * Check controller capabilities |
2732 | * | 2749 | * |
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index 78d92151a904..1218a9fca846 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c | |||
@@ -131,7 +131,7 @@ static blk_qc_t nvme_ns_head_make_request(struct request_queue *q, | |||
131 | bio->bi_opf |= REQ_NVME_MPATH; | 131 | bio->bi_opf |= REQ_NVME_MPATH; |
132 | ret = direct_make_request(bio); | 132 | ret = direct_make_request(bio); |
133 | } else if (!list_empty_careful(&head->list)) { | 133 | } else if (!list_empty_careful(&head->list)) { |
134 | dev_warn_ratelimited(dev, "no path available - requeing I/O\n"); | 134 | dev_warn_ratelimited(dev, "no path available - requeuing I/O\n"); |
135 | 135 | ||
136 | spin_lock_irq(&head->requeue_lock); | 136 | spin_lock_irq(&head->requeue_lock); |
137 | bio_list_add(&head->requeue_list, bio); | 137 | bio_list_add(&head->requeue_list, bio); |
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index c0873a68872f..ea1aa5283e8e 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h | |||
@@ -114,7 +114,7 @@ static inline struct nvme_request *nvme_req(struct request *req) | |||
114 | * NVME_QUIRK_DELAY_BEFORE_CHK_RDY quirk enabled. The value (in ms) was | 114 | * NVME_QUIRK_DELAY_BEFORE_CHK_RDY quirk enabled. The value (in ms) was |
115 | * found empirically. | 115 | * found empirically. |
116 | */ | 116 | */ |
117 | #define NVME_QUIRK_DELAY_AMOUNT 2000 | 117 | #define NVME_QUIRK_DELAY_AMOUNT 2300 |
118 | 118 | ||
119 | enum nvme_ctrl_state { | 119 | enum nvme_ctrl_state { |
120 | NVME_CTRL_NEW, | 120 | NVME_CTRL_NEW, |
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index a11cfd470089..f5800c3c9082 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c | |||
@@ -1759,6 +1759,7 @@ static void nvme_free_host_mem(struct nvme_dev *dev) | |||
1759 | dev->nr_host_mem_descs * sizeof(*dev->host_mem_descs), | 1759 | dev->nr_host_mem_descs * sizeof(*dev->host_mem_descs), |
1760 | dev->host_mem_descs, dev->host_mem_descs_dma); | 1760 | dev->host_mem_descs, dev->host_mem_descs_dma); |
1761 | dev->host_mem_descs = NULL; | 1761 | dev->host_mem_descs = NULL; |
1762 | dev->nr_host_mem_descs = 0; | ||
1762 | } | 1763 | } |
1763 | 1764 | ||
1764 | static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred, | 1765 | static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred, |
@@ -1787,7 +1788,7 @@ static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred, | |||
1787 | if (!bufs) | 1788 | if (!bufs) |
1788 | goto out_free_descs; | 1789 | goto out_free_descs; |
1789 | 1790 | ||
1790 | for (size = 0; size < preferred; size += len) { | 1791 | for (size = 0; size < preferred && i < max_entries; size += len) { |
1791 | dma_addr_t dma_addr; | 1792 | dma_addr_t dma_addr; |
1792 | 1793 | ||
1793 | len = min_t(u64, chunk_size, preferred - size); | 1794 | len = min_t(u64, chunk_size, preferred - size); |
@@ -2428,7 +2429,7 @@ static int nvme_dev_map(struct nvme_dev *dev) | |||
2428 | return -ENODEV; | 2429 | return -ENODEV; |
2429 | } | 2430 | } |
2430 | 2431 | ||
2431 | static unsigned long check_dell_samsung_bug(struct pci_dev *pdev) | 2432 | static unsigned long check_vendor_combination_bug(struct pci_dev *pdev) |
2432 | { | 2433 | { |
2433 | if (pdev->vendor == 0x144d && pdev->device == 0xa802) { | 2434 | if (pdev->vendor == 0x144d && pdev->device == 0xa802) { |
2434 | /* | 2435 | /* |
@@ -2443,6 +2444,14 @@ static unsigned long check_dell_samsung_bug(struct pci_dev *pdev) | |||
2443 | (dmi_match(DMI_PRODUCT_NAME, "XPS 15 9550") || | 2444 | (dmi_match(DMI_PRODUCT_NAME, "XPS 15 9550") || |
2444 | dmi_match(DMI_PRODUCT_NAME, "Precision 5510"))) | 2445 | dmi_match(DMI_PRODUCT_NAME, "Precision 5510"))) |
2445 | return NVME_QUIRK_NO_DEEPEST_PS; | 2446 | return NVME_QUIRK_NO_DEEPEST_PS; |
2447 | } else if (pdev->vendor == 0x144d && pdev->device == 0xa804) { | ||
2448 | /* | ||
2449 | * Samsung SSD 960 EVO drops off the PCIe bus after system | ||
2450 | * suspend on a Ryzen board, ASUS PRIME B350M-A. | ||
2451 | */ | ||
2452 | if (dmi_match(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC.") && | ||
2453 | dmi_match(DMI_BOARD_NAME, "PRIME B350M-A")) | ||
2454 | return NVME_QUIRK_NO_APST; | ||
2446 | } | 2455 | } |
2447 | 2456 | ||
2448 | return 0; | 2457 | return 0; |
@@ -2482,7 +2491,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) | |||
2482 | if (result) | 2491 | if (result) |
2483 | goto unmap; | 2492 | goto unmap; |
2484 | 2493 | ||
2485 | quirks |= check_dell_samsung_bug(pdev); | 2494 | quirks |= check_vendor_combination_bug(pdev); |
2486 | 2495 | ||
2487 | result = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops, | 2496 | result = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops, |
2488 | quirks); | 2497 | quirks); |
@@ -2665,6 +2674,8 @@ static const struct pci_device_id nvme_id_table[] = { | |||
2665 | .driver_data = NVME_QUIRK_IDENTIFY_CNS, }, | 2674 | .driver_data = NVME_QUIRK_IDENTIFY_CNS, }, |
2666 | { PCI_DEVICE(0x1c58, 0x0003), /* HGST adapter */ | 2675 | { PCI_DEVICE(0x1c58, 0x0003), /* HGST adapter */ |
2667 | .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, | 2676 | .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, |
2677 | { PCI_DEVICE(0x1c58, 0x0023), /* WDC SN200 adapter */ | ||
2678 | .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, | ||
2668 | { PCI_DEVICE(0x1c5f, 0x0540), /* Memblaze Pblaze4 adapter */ | 2679 | { PCI_DEVICE(0x1c5f, 0x0540), /* Memblaze Pblaze4 adapter */ |
2669 | .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, | 2680 | .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, |
2670 | { PCI_DEVICE(0x144d, 0xa821), /* Samsung PM1725 */ | 2681 | { PCI_DEVICE(0x144d, 0xa821), /* Samsung PM1725 */ |
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 4f9bf2f815c3..37af56596be6 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c | |||
@@ -15,6 +15,7 @@ | |||
15 | #include <linux/module.h> | 15 | #include <linux/module.h> |
16 | #include <linux/init.h> | 16 | #include <linux/init.h> |
17 | #include <linux/slab.h> | 17 | #include <linux/slab.h> |
18 | #include <rdma/mr_pool.h> | ||
18 | #include <linux/err.h> | 19 | #include <linux/err.h> |
19 | #include <linux/string.h> | 20 | #include <linux/string.h> |
20 | #include <linux/atomic.h> | 21 | #include <linux/atomic.h> |
@@ -59,6 +60,9 @@ struct nvme_rdma_request { | |||
59 | struct nvme_request req; | 60 | struct nvme_request req; |
60 | struct ib_mr *mr; | 61 | struct ib_mr *mr; |
61 | struct nvme_rdma_qe sqe; | 62 | struct nvme_rdma_qe sqe; |
63 | union nvme_result result; | ||
64 | __le16 status; | ||
65 | refcount_t ref; | ||
62 | struct ib_sge sge[1 + NVME_RDMA_MAX_INLINE_SEGMENTS]; | 66 | struct ib_sge sge[1 + NVME_RDMA_MAX_INLINE_SEGMENTS]; |
63 | u32 num_sge; | 67 | u32 num_sge; |
64 | int nents; | 68 | int nents; |
@@ -73,11 +77,11 @@ struct nvme_rdma_request { | |||
73 | enum nvme_rdma_queue_flags { | 77 | enum nvme_rdma_queue_flags { |
74 | NVME_RDMA_Q_ALLOCATED = 0, | 78 | NVME_RDMA_Q_ALLOCATED = 0, |
75 | NVME_RDMA_Q_LIVE = 1, | 79 | NVME_RDMA_Q_LIVE = 1, |
80 | NVME_RDMA_Q_TR_READY = 2, | ||
76 | }; | 81 | }; |
77 | 82 | ||
78 | struct nvme_rdma_queue { | 83 | struct nvme_rdma_queue { |
79 | struct nvme_rdma_qe *rsp_ring; | 84 | struct nvme_rdma_qe *rsp_ring; |
80 | atomic_t sig_count; | ||
81 | int queue_size; | 85 | int queue_size; |
82 | size_t cmnd_capsule_len; | 86 | size_t cmnd_capsule_len; |
83 | struct nvme_rdma_ctrl *ctrl; | 87 | struct nvme_rdma_ctrl *ctrl; |
@@ -258,32 +262,6 @@ static int nvme_rdma_create_qp(struct nvme_rdma_queue *queue, const int factor) | |||
258 | return ret; | 262 | return ret; |
259 | } | 263 | } |
260 | 264 | ||
261 | static int nvme_rdma_reinit_request(void *data, struct request *rq) | ||
262 | { | ||
263 | struct nvme_rdma_ctrl *ctrl = data; | ||
264 | struct nvme_rdma_device *dev = ctrl->device; | ||
265 | struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); | ||
266 | int ret = 0; | ||
267 | |||
268 | if (WARN_ON_ONCE(!req->mr)) | ||
269 | return 0; | ||
270 | |||
271 | ib_dereg_mr(req->mr); | ||
272 | |||
273 | req->mr = ib_alloc_mr(dev->pd, IB_MR_TYPE_MEM_REG, | ||
274 | ctrl->max_fr_pages); | ||
275 | if (IS_ERR(req->mr)) { | ||
276 | ret = PTR_ERR(req->mr); | ||
277 | req->mr = NULL; | ||
278 | goto out; | ||
279 | } | ||
280 | |||
281 | req->mr->need_inval = false; | ||
282 | |||
283 | out: | ||
284 | return ret; | ||
285 | } | ||
286 | |||
287 | static void nvme_rdma_exit_request(struct blk_mq_tag_set *set, | 265 | static void nvme_rdma_exit_request(struct blk_mq_tag_set *set, |
288 | struct request *rq, unsigned int hctx_idx) | 266 | struct request *rq, unsigned int hctx_idx) |
289 | { | 267 | { |
@@ -293,9 +271,6 @@ static void nvme_rdma_exit_request(struct blk_mq_tag_set *set, | |||
293 | struct nvme_rdma_queue *queue = &ctrl->queues[queue_idx]; | 271 | struct nvme_rdma_queue *queue = &ctrl->queues[queue_idx]; |
294 | struct nvme_rdma_device *dev = queue->device; | 272 | struct nvme_rdma_device *dev = queue->device; |
295 | 273 | ||
296 | if (req->mr) | ||
297 | ib_dereg_mr(req->mr); | ||
298 | |||
299 | nvme_rdma_free_qe(dev->dev, &req->sqe, sizeof(struct nvme_command), | 274 | nvme_rdma_free_qe(dev->dev, &req->sqe, sizeof(struct nvme_command), |
300 | DMA_TO_DEVICE); | 275 | DMA_TO_DEVICE); |
301 | } | 276 | } |
@@ -317,21 +292,9 @@ static int nvme_rdma_init_request(struct blk_mq_tag_set *set, | |||
317 | if (ret) | 292 | if (ret) |
318 | return ret; | 293 | return ret; |
319 | 294 | ||
320 | req->mr = ib_alloc_mr(dev->pd, IB_MR_TYPE_MEM_REG, | ||
321 | ctrl->max_fr_pages); | ||
322 | if (IS_ERR(req->mr)) { | ||
323 | ret = PTR_ERR(req->mr); | ||
324 | goto out_free_qe; | ||
325 | } | ||
326 | |||
327 | req->queue = queue; | 295 | req->queue = queue; |
328 | 296 | ||
329 | return 0; | 297 | return 0; |
330 | |||
331 | out_free_qe: | ||
332 | nvme_rdma_free_qe(dev->dev, &req->sqe, sizeof(struct nvme_command), | ||
333 | DMA_TO_DEVICE); | ||
334 | return -ENOMEM; | ||
335 | } | 298 | } |
336 | 299 | ||
337 | static int nvme_rdma_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, | 300 | static int nvme_rdma_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, |
@@ -428,10 +391,23 @@ out_err: | |||
428 | 391 | ||
429 | static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue) | 392 | static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue) |
430 | { | 393 | { |
431 | struct nvme_rdma_device *dev = queue->device; | 394 | struct nvme_rdma_device *dev; |
432 | struct ib_device *ibdev = dev->dev; | 395 | struct ib_device *ibdev; |
433 | 396 | ||
434 | rdma_destroy_qp(queue->cm_id); | 397 | if (!test_and_clear_bit(NVME_RDMA_Q_TR_READY, &queue->flags)) |
398 | return; | ||
399 | |||
400 | dev = queue->device; | ||
401 | ibdev = dev->dev; | ||
402 | |||
403 | ib_mr_pool_destroy(queue->qp, &queue->qp->rdma_mrs); | ||
404 | |||
405 | /* | ||
406 | * The cm_id object might have been destroyed during RDMA connection | ||
407 | * establishment error flow to avoid getting other cma events, thus | ||
408 | * the destruction of the QP shouldn't use rdma_cm API. | ||
409 | */ | ||
410 | ib_destroy_qp(queue->qp); | ||
435 | ib_free_cq(queue->ib_cq); | 411 | ib_free_cq(queue->ib_cq); |
436 | 412 | ||
437 | nvme_rdma_free_ring(ibdev, queue->rsp_ring, queue->queue_size, | 413 | nvme_rdma_free_ring(ibdev, queue->rsp_ring, queue->queue_size, |
@@ -440,6 +416,12 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue) | |||
440 | nvme_rdma_dev_put(dev); | 416 | nvme_rdma_dev_put(dev); |
441 | } | 417 | } |
442 | 418 | ||
419 | static int nvme_rdma_get_max_fr_pages(struct ib_device *ibdev) | ||
420 | { | ||
421 | return min_t(u32, NVME_RDMA_MAX_SEGMENTS, | ||
422 | ibdev->attrs.max_fast_reg_page_list_len); | ||
423 | } | ||
424 | |||
443 | static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) | 425 | static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) |
444 | { | 426 | { |
445 | struct ib_device *ibdev; | 427 | struct ib_device *ibdev; |
@@ -482,8 +464,24 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) | |||
482 | goto out_destroy_qp; | 464 | goto out_destroy_qp; |
483 | } | 465 | } |
484 | 466 | ||
467 | ret = ib_mr_pool_init(queue->qp, &queue->qp->rdma_mrs, | ||
468 | queue->queue_size, | ||
469 | IB_MR_TYPE_MEM_REG, | ||
470 | nvme_rdma_get_max_fr_pages(ibdev)); | ||
471 | if (ret) { | ||
472 | dev_err(queue->ctrl->ctrl.device, | ||
473 | "failed to initialize MR pool sized %d for QID %d\n", | ||
474 | queue->queue_size, idx); | ||
475 | goto out_destroy_ring; | ||
476 | } | ||
477 | |||
478 | set_bit(NVME_RDMA_Q_TR_READY, &queue->flags); | ||
479 | |||
485 | return 0; | 480 | return 0; |
486 | 481 | ||
482 | out_destroy_ring: | ||
483 | nvme_rdma_free_ring(ibdev, queue->rsp_ring, queue->queue_size, | ||
484 | sizeof(struct nvme_completion), DMA_FROM_DEVICE); | ||
487 | out_destroy_qp: | 485 | out_destroy_qp: |
488 | rdma_destroy_qp(queue->cm_id); | 486 | rdma_destroy_qp(queue->cm_id); |
489 | out_destroy_ib_cq: | 487 | out_destroy_ib_cq: |
@@ -510,7 +508,6 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl, | |||
510 | queue->cmnd_capsule_len = sizeof(struct nvme_command); | 508 | queue->cmnd_capsule_len = sizeof(struct nvme_command); |
511 | 509 | ||
512 | queue->queue_size = queue_size; | 510 | queue->queue_size = queue_size; |
513 | atomic_set(&queue->sig_count, 0); | ||
514 | 511 | ||
515 | queue->cm_id = rdma_create_id(&init_net, nvme_rdma_cm_handler, queue, | 512 | queue->cm_id = rdma_create_id(&init_net, nvme_rdma_cm_handler, queue, |
516 | RDMA_PS_TCP, IB_QPT_RC); | 513 | RDMA_PS_TCP, IB_QPT_RC); |
@@ -546,6 +543,7 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl, | |||
546 | 543 | ||
547 | out_destroy_cm_id: | 544 | out_destroy_cm_id: |
548 | rdma_destroy_id(queue->cm_id); | 545 | rdma_destroy_id(queue->cm_id); |
546 | nvme_rdma_destroy_queue_ib(queue); | ||
549 | return ret; | 547 | return ret; |
550 | } | 548 | } |
551 | 549 | ||
@@ -756,8 +754,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, | |||
756 | 754 | ||
757 | ctrl->device = ctrl->queues[0].device; | 755 | ctrl->device = ctrl->queues[0].device; |
758 | 756 | ||
759 | ctrl->max_fr_pages = min_t(u32, NVME_RDMA_MAX_SEGMENTS, | 757 | ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev); |
760 | ctrl->device->dev->attrs.max_fast_reg_page_list_len); | ||
761 | 758 | ||
762 | if (new) { | 759 | if (new) { |
763 | ctrl->ctrl.admin_tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, true); | 760 | ctrl->ctrl.admin_tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, true); |
@@ -771,10 +768,6 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, | |||
771 | error = PTR_ERR(ctrl->ctrl.admin_q); | 768 | error = PTR_ERR(ctrl->ctrl.admin_q); |
772 | goto out_free_tagset; | 769 | goto out_free_tagset; |
773 | } | 770 | } |
774 | } else { | ||
775 | error = nvme_reinit_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset); | ||
776 | if (error) | ||
777 | goto out_free_queue; | ||
778 | } | 771 | } |
779 | 772 | ||
780 | error = nvme_rdma_start_queue(ctrl, 0); | 773 | error = nvme_rdma_start_queue(ctrl, 0); |
@@ -854,10 +847,6 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) | |||
854 | goto out_free_tag_set; | 847 | goto out_free_tag_set; |
855 | } | 848 | } |
856 | } else { | 849 | } else { |
857 | ret = nvme_reinit_tagset(&ctrl->ctrl, ctrl->ctrl.tagset); | ||
858 | if (ret) | ||
859 | goto out_free_io_queues; | ||
860 | |||
861 | blk_mq_update_nr_hw_queues(&ctrl->tag_set, | 850 | blk_mq_update_nr_hw_queues(&ctrl->tag_set, |
862 | ctrl->ctrl.queue_count - 1); | 851 | ctrl->ctrl.queue_count - 1); |
863 | } | 852 | } |
@@ -1018,8 +1007,18 @@ static void nvme_rdma_memreg_done(struct ib_cq *cq, struct ib_wc *wc) | |||
1018 | 1007 | ||
1019 | static void nvme_rdma_inv_rkey_done(struct ib_cq *cq, struct ib_wc *wc) | 1008 | static void nvme_rdma_inv_rkey_done(struct ib_cq *cq, struct ib_wc *wc) |
1020 | { | 1009 | { |
1021 | if (unlikely(wc->status != IB_WC_SUCCESS)) | 1010 | struct nvme_rdma_request *req = |
1011 | container_of(wc->wr_cqe, struct nvme_rdma_request, reg_cqe); | ||
1012 | struct request *rq = blk_mq_rq_from_pdu(req); | ||
1013 | |||
1014 | if (unlikely(wc->status != IB_WC_SUCCESS)) { | ||
1022 | nvme_rdma_wr_error(cq, wc, "LOCAL_INV"); | 1015 | nvme_rdma_wr_error(cq, wc, "LOCAL_INV"); |
1016 | return; | ||
1017 | } | ||
1018 | |||
1019 | if (refcount_dec_and_test(&req->ref)) | ||
1020 | nvme_end_request(rq, req->status, req->result); | ||
1021 | |||
1023 | } | 1022 | } |
1024 | 1023 | ||
1025 | static int nvme_rdma_inv_rkey(struct nvme_rdma_queue *queue, | 1024 | static int nvme_rdma_inv_rkey(struct nvme_rdma_queue *queue, |
@@ -1030,7 +1029,7 @@ static int nvme_rdma_inv_rkey(struct nvme_rdma_queue *queue, | |||
1030 | .opcode = IB_WR_LOCAL_INV, | 1029 | .opcode = IB_WR_LOCAL_INV, |
1031 | .next = NULL, | 1030 | .next = NULL, |
1032 | .num_sge = 0, | 1031 | .num_sge = 0, |
1033 | .send_flags = 0, | 1032 | .send_flags = IB_SEND_SIGNALED, |
1034 | .ex.invalidate_rkey = req->mr->rkey, | 1033 | .ex.invalidate_rkey = req->mr->rkey, |
1035 | }; | 1034 | }; |
1036 | 1035 | ||
@@ -1044,22 +1043,15 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue, | |||
1044 | struct request *rq) | 1043 | struct request *rq) |
1045 | { | 1044 | { |
1046 | struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); | 1045 | struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); |
1047 | struct nvme_rdma_ctrl *ctrl = queue->ctrl; | ||
1048 | struct nvme_rdma_device *dev = queue->device; | 1046 | struct nvme_rdma_device *dev = queue->device; |
1049 | struct ib_device *ibdev = dev->dev; | 1047 | struct ib_device *ibdev = dev->dev; |
1050 | int res; | ||
1051 | 1048 | ||
1052 | if (!blk_rq_bytes(rq)) | 1049 | if (!blk_rq_bytes(rq)) |
1053 | return; | 1050 | return; |
1054 | 1051 | ||
1055 | if (req->mr->need_inval && test_bit(NVME_RDMA_Q_LIVE, &req->queue->flags)) { | 1052 | if (req->mr) { |
1056 | res = nvme_rdma_inv_rkey(queue, req); | 1053 | ib_mr_pool_put(queue->qp, &queue->qp->rdma_mrs, req->mr); |
1057 | if (unlikely(res < 0)) { | 1054 | req->mr = NULL; |
1058 | dev_err(ctrl->ctrl.device, | ||
1059 | "Queueing INV WR for rkey %#x failed (%d)\n", | ||
1060 | req->mr->rkey, res); | ||
1061 | nvme_rdma_error_recovery(queue->ctrl); | ||
1062 | } | ||
1063 | } | 1055 | } |
1064 | 1056 | ||
1065 | ib_dma_unmap_sg(ibdev, req->sg_table.sgl, | 1057 | ib_dma_unmap_sg(ibdev, req->sg_table.sgl, |
@@ -1118,12 +1110,18 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue, | |||
1118 | struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl; | 1110 | struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl; |
1119 | int nr; | 1111 | int nr; |
1120 | 1112 | ||
1113 | req->mr = ib_mr_pool_get(queue->qp, &queue->qp->rdma_mrs); | ||
1114 | if (WARN_ON_ONCE(!req->mr)) | ||
1115 | return -EAGAIN; | ||
1116 | |||
1121 | /* | 1117 | /* |
1122 | * Align the MR to a 4K page size to match the ctrl page size and | 1118 | * Align the MR to a 4K page size to match the ctrl page size and |
1123 | * the block virtual boundary. | 1119 | * the block virtual boundary. |
1124 | */ | 1120 | */ |
1125 | nr = ib_map_mr_sg(req->mr, req->sg_table.sgl, count, NULL, SZ_4K); | 1121 | nr = ib_map_mr_sg(req->mr, req->sg_table.sgl, count, NULL, SZ_4K); |
1126 | if (unlikely(nr < count)) { | 1122 | if (unlikely(nr < count)) { |
1123 | ib_mr_pool_put(queue->qp, &queue->qp->rdma_mrs, req->mr); | ||
1124 | req->mr = NULL; | ||
1127 | if (nr < 0) | 1125 | if (nr < 0) |
1128 | return nr; | 1126 | return nr; |
1129 | return -EINVAL; | 1127 | return -EINVAL; |
@@ -1142,8 +1140,6 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue, | |||
1142 | IB_ACCESS_REMOTE_READ | | 1140 | IB_ACCESS_REMOTE_READ | |
1143 | IB_ACCESS_REMOTE_WRITE; | 1141 | IB_ACCESS_REMOTE_WRITE; |
1144 | 1142 | ||
1145 | req->mr->need_inval = true; | ||
1146 | |||
1147 | sg->addr = cpu_to_le64(req->mr->iova); | 1143 | sg->addr = cpu_to_le64(req->mr->iova); |
1148 | put_unaligned_le24(req->mr->length, sg->length); | 1144 | put_unaligned_le24(req->mr->length, sg->length); |
1149 | put_unaligned_le32(req->mr->rkey, sg->key); | 1145 | put_unaligned_le32(req->mr->rkey, sg->key); |
@@ -1163,7 +1159,7 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, | |||
1163 | 1159 | ||
1164 | req->num_sge = 1; | 1160 | req->num_sge = 1; |
1165 | req->inline_data = false; | 1161 | req->inline_data = false; |
1166 | req->mr->need_inval = false; | 1162 | refcount_set(&req->ref, 2); /* send and recv completions */ |
1167 | 1163 | ||
1168 | c->common.flags |= NVME_CMD_SGL_METABUF; | 1164 | c->common.flags |= NVME_CMD_SGL_METABUF; |
1169 | 1165 | ||
@@ -1200,25 +1196,24 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, | |||
1200 | 1196 | ||
1201 | static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc) | 1197 | static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc) |
1202 | { | 1198 | { |
1203 | if (unlikely(wc->status != IB_WC_SUCCESS)) | 1199 | struct nvme_rdma_qe *qe = |
1204 | nvme_rdma_wr_error(cq, wc, "SEND"); | 1200 | container_of(wc->wr_cqe, struct nvme_rdma_qe, cqe); |
1205 | } | 1201 | struct nvme_rdma_request *req = |
1202 | container_of(qe, struct nvme_rdma_request, sqe); | ||
1203 | struct request *rq = blk_mq_rq_from_pdu(req); | ||
1206 | 1204 | ||
1207 | /* | 1205 | if (unlikely(wc->status != IB_WC_SUCCESS)) { |
1208 | * We want to signal completion at least every queue depth/2. This returns the | 1206 | nvme_rdma_wr_error(cq, wc, "SEND"); |
1209 | * largest power of two that is not above half of (queue size + 1) to optimize | 1207 | return; |
1210 | * (avoid divisions). | 1208 | } |
1211 | */ | ||
1212 | static inline bool nvme_rdma_queue_sig_limit(struct nvme_rdma_queue *queue) | ||
1213 | { | ||
1214 | int limit = 1 << ilog2((queue->queue_size + 1) / 2); | ||
1215 | 1209 | ||
1216 | return (atomic_inc_return(&queue->sig_count) & (limit - 1)) == 0; | 1210 | if (refcount_dec_and_test(&req->ref)) |
1211 | nvme_end_request(rq, req->status, req->result); | ||
1217 | } | 1212 | } |
1218 | 1213 | ||
1219 | static int nvme_rdma_post_send(struct nvme_rdma_queue *queue, | 1214 | static int nvme_rdma_post_send(struct nvme_rdma_queue *queue, |
1220 | struct nvme_rdma_qe *qe, struct ib_sge *sge, u32 num_sge, | 1215 | struct nvme_rdma_qe *qe, struct ib_sge *sge, u32 num_sge, |
1221 | struct ib_send_wr *first, bool flush) | 1216 | struct ib_send_wr *first) |
1222 | { | 1217 | { |
1223 | struct ib_send_wr wr, *bad_wr; | 1218 | struct ib_send_wr wr, *bad_wr; |
1224 | int ret; | 1219 | int ret; |
@@ -1227,31 +1222,12 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue, | |||
1227 | sge->length = sizeof(struct nvme_command), | 1222 | sge->length = sizeof(struct nvme_command), |
1228 | sge->lkey = queue->device->pd->local_dma_lkey; | 1223 | sge->lkey = queue->device->pd->local_dma_lkey; |
1229 | 1224 | ||
1230 | qe->cqe.done = nvme_rdma_send_done; | ||
1231 | |||
1232 | wr.next = NULL; | 1225 | wr.next = NULL; |
1233 | wr.wr_cqe = &qe->cqe; | 1226 | wr.wr_cqe = &qe->cqe; |
1234 | wr.sg_list = sge; | 1227 | wr.sg_list = sge; |
1235 | wr.num_sge = num_sge; | 1228 | wr.num_sge = num_sge; |
1236 | wr.opcode = IB_WR_SEND; | 1229 | wr.opcode = IB_WR_SEND; |
1237 | wr.send_flags = 0; | 1230 | wr.send_flags = IB_SEND_SIGNALED; |
1238 | |||
1239 | /* | ||
1240 | * Unsignalled send completions are another giant desaster in the | ||
1241 | * IB Verbs spec: If we don't regularly post signalled sends | ||
1242 | * the send queue will fill up and only a QP reset will rescue us. | ||
1243 | * Would have been way to obvious to handle this in hardware or | ||
1244 | * at least the RDMA stack.. | ||
1245 | * | ||
1246 | * Always signal the flushes. The magic request used for the flush | ||
1247 | * sequencer is not allocated in our driver's tagset and it's | ||
1248 | * triggered to be freed by blk_cleanup_queue(). So we need to | ||
1249 | * always mark it as signaled to ensure that the "wr_cqe", which is | ||
1250 | * embedded in request's payload, is not freed when __ib_process_cq() | ||
1251 | * calls wr_cqe->done(). | ||
1252 | */ | ||
1253 | if (nvme_rdma_queue_sig_limit(queue) || flush) | ||
1254 | wr.send_flags |= IB_SEND_SIGNALED; | ||
1255 | 1231 | ||
1256 | if (first) | 1232 | if (first) |
1257 | first->next = ≀ | 1233 | first->next = ≀ |
@@ -1301,6 +1277,12 @@ static struct blk_mq_tags *nvme_rdma_tagset(struct nvme_rdma_queue *queue) | |||
1301 | return queue->ctrl->tag_set.tags[queue_idx - 1]; | 1277 | return queue->ctrl->tag_set.tags[queue_idx - 1]; |
1302 | } | 1278 | } |
1303 | 1279 | ||
1280 | static void nvme_rdma_async_done(struct ib_cq *cq, struct ib_wc *wc) | ||
1281 | { | ||
1282 | if (unlikely(wc->status != IB_WC_SUCCESS)) | ||
1283 | nvme_rdma_wr_error(cq, wc, "ASYNC"); | ||
1284 | } | ||
1285 | |||
1304 | static void nvme_rdma_submit_async_event(struct nvme_ctrl *arg) | 1286 | static void nvme_rdma_submit_async_event(struct nvme_ctrl *arg) |
1305 | { | 1287 | { |
1306 | struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(arg); | 1288 | struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(arg); |
@@ -1319,10 +1301,12 @@ static void nvme_rdma_submit_async_event(struct nvme_ctrl *arg) | |||
1319 | cmd->common.flags |= NVME_CMD_SGL_METABUF; | 1301 | cmd->common.flags |= NVME_CMD_SGL_METABUF; |
1320 | nvme_rdma_set_sg_null(cmd); | 1302 | nvme_rdma_set_sg_null(cmd); |
1321 | 1303 | ||
1304 | sqe->cqe.done = nvme_rdma_async_done; | ||
1305 | |||
1322 | ib_dma_sync_single_for_device(dev, sqe->dma, sizeof(*cmd), | 1306 | ib_dma_sync_single_for_device(dev, sqe->dma, sizeof(*cmd), |
1323 | DMA_TO_DEVICE); | 1307 | DMA_TO_DEVICE); |
1324 | 1308 | ||
1325 | ret = nvme_rdma_post_send(queue, sqe, &sge, 1, NULL, false); | 1309 | ret = nvme_rdma_post_send(queue, sqe, &sge, 1, NULL); |
1326 | WARN_ON_ONCE(ret); | 1310 | WARN_ON_ONCE(ret); |
1327 | } | 1311 | } |
1328 | 1312 | ||
@@ -1343,14 +1327,34 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue, | |||
1343 | } | 1327 | } |
1344 | req = blk_mq_rq_to_pdu(rq); | 1328 | req = blk_mq_rq_to_pdu(rq); |
1345 | 1329 | ||
1346 | if (rq->tag == tag) | 1330 | req->status = cqe->status; |
1347 | ret = 1; | 1331 | req->result = cqe->result; |
1348 | 1332 | ||
1349 | if ((wc->wc_flags & IB_WC_WITH_INVALIDATE) && | 1333 | if (wc->wc_flags & IB_WC_WITH_INVALIDATE) { |
1350 | wc->ex.invalidate_rkey == req->mr->rkey) | 1334 | if (unlikely(wc->ex.invalidate_rkey != req->mr->rkey)) { |
1351 | req->mr->need_inval = false; | 1335 | dev_err(queue->ctrl->ctrl.device, |
1336 | "Bogus remote invalidation for rkey %#x\n", | ||
1337 | req->mr->rkey); | ||
1338 | nvme_rdma_error_recovery(queue->ctrl); | ||
1339 | } | ||
1340 | } else if (req->mr) { | ||
1341 | ret = nvme_rdma_inv_rkey(queue, req); | ||
1342 | if (unlikely(ret < 0)) { | ||
1343 | dev_err(queue->ctrl->ctrl.device, | ||
1344 | "Queueing INV WR for rkey %#x failed (%d)\n", | ||
1345 | req->mr->rkey, ret); | ||
1346 | nvme_rdma_error_recovery(queue->ctrl); | ||
1347 | } | ||
1348 | /* the local invalidation completion will end the request */ | ||
1349 | return 0; | ||
1350 | } | ||
1351 | |||
1352 | if (refcount_dec_and_test(&req->ref)) { | ||
1353 | if (rq->tag == tag) | ||
1354 | ret = 1; | ||
1355 | nvme_end_request(rq, req->status, req->result); | ||
1356 | } | ||
1352 | 1357 | ||
1353 | nvme_end_request(rq, cqe->status, cqe->result); | ||
1354 | return ret; | 1358 | return ret; |
1355 | } | 1359 | } |
1356 | 1360 | ||
@@ -1591,31 +1595,11 @@ nvme_rdma_timeout(struct request *rq, bool reserved) | |||
1591 | * We cannot accept any other command until the Connect command has completed. | 1595 | * We cannot accept any other command until the Connect command has completed. |
1592 | */ | 1596 | */ |
1593 | static inline blk_status_t | 1597 | static inline blk_status_t |
1594 | nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue, struct request *rq) | 1598 | nvme_rdma_is_ready(struct nvme_rdma_queue *queue, struct request *rq) |
1595 | { | 1599 | { |
1596 | if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags))) { | 1600 | if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags))) |
1597 | struct nvme_command *cmd = nvme_req(rq)->cmd; | 1601 | return nvmf_check_init_req(&queue->ctrl->ctrl, rq); |
1598 | 1602 | return BLK_STS_OK; | |
1599 | if (!blk_rq_is_passthrough(rq) || | ||
1600 | cmd->common.opcode != nvme_fabrics_command || | ||
1601 | cmd->fabrics.fctype != nvme_fabrics_type_connect) { | ||
1602 | /* | ||
1603 | * reconnecting state means transport disruption, which | ||
1604 | * can take a long time and even might fail permanently, | ||
1605 | * fail fast to give upper layers a chance to failover. | ||
1606 | * deleting state means that the ctrl will never accept | ||
1607 | * commands again, fail it permanently. | ||
1608 | */ | ||
1609 | if (queue->ctrl->ctrl.state == NVME_CTRL_RECONNECTING || | ||
1610 | queue->ctrl->ctrl.state == NVME_CTRL_DELETING) { | ||
1611 | nvme_req(rq)->status = NVME_SC_ABORT_REQ; | ||
1612 | return BLK_STS_IOERR; | ||
1613 | } | ||
1614 | return BLK_STS_RESOURCE; /* try again later */ | ||
1615 | } | ||
1616 | } | ||
1617 | |||
1618 | return 0; | ||
1619 | } | 1603 | } |
1620 | 1604 | ||
1621 | static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, | 1605 | static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, |
@@ -1627,14 +1611,13 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, | |||
1627 | struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); | 1611 | struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); |
1628 | struct nvme_rdma_qe *sqe = &req->sqe; | 1612 | struct nvme_rdma_qe *sqe = &req->sqe; |
1629 | struct nvme_command *c = sqe->data; | 1613 | struct nvme_command *c = sqe->data; |
1630 | bool flush = false; | ||
1631 | struct ib_device *dev; | 1614 | struct ib_device *dev; |
1632 | blk_status_t ret; | 1615 | blk_status_t ret; |
1633 | int err; | 1616 | int err; |
1634 | 1617 | ||
1635 | WARN_ON_ONCE(rq->tag < 0); | 1618 | WARN_ON_ONCE(rq->tag < 0); |
1636 | 1619 | ||
1637 | ret = nvme_rdma_queue_is_ready(queue, rq); | 1620 | ret = nvme_rdma_is_ready(queue, rq); |
1638 | if (unlikely(ret)) | 1621 | if (unlikely(ret)) |
1639 | return ret; | 1622 | return ret; |
1640 | 1623 | ||
@@ -1656,13 +1639,13 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, | |||
1656 | goto err; | 1639 | goto err; |
1657 | } | 1640 | } |
1658 | 1641 | ||
1642 | sqe->cqe.done = nvme_rdma_send_done; | ||
1643 | |||
1659 | ib_dma_sync_single_for_device(dev, sqe->dma, | 1644 | ib_dma_sync_single_for_device(dev, sqe->dma, |
1660 | sizeof(struct nvme_command), DMA_TO_DEVICE); | 1645 | sizeof(struct nvme_command), DMA_TO_DEVICE); |
1661 | 1646 | ||
1662 | if (req_op(rq) == REQ_OP_FLUSH) | ||
1663 | flush = true; | ||
1664 | err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge, | 1647 | err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge, |
1665 | req->mr->need_inval ? &req->reg_wr.wr : NULL, flush); | 1648 | req->mr ? &req->reg_wr.wr : NULL); |
1666 | if (unlikely(err)) { | 1649 | if (unlikely(err)) { |
1667 | nvme_rdma_unmap_data(queue, rq); | 1650 | nvme_rdma_unmap_data(queue, rq); |
1668 | goto err; | 1651 | goto err; |
@@ -1810,7 +1793,6 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { | |||
1810 | .submit_async_event = nvme_rdma_submit_async_event, | 1793 | .submit_async_event = nvme_rdma_submit_async_event, |
1811 | .delete_ctrl = nvme_rdma_delete_ctrl, | 1794 | .delete_ctrl = nvme_rdma_delete_ctrl, |
1812 | .get_address = nvmf_get_address, | 1795 | .get_address = nvmf_get_address, |
1813 | .reinit_request = nvme_rdma_reinit_request, | ||
1814 | }; | 1796 | }; |
1815 | 1797 | ||
1816 | static inline bool | 1798 | static inline bool |
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 664d3013f68f..5fd86039e353 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c | |||
@@ -533,15 +533,15 @@ nvmet_fc_free_fcp_iod(struct nvmet_fc_tgt_queue *queue, | |||
533 | 533 | ||
534 | tgtport->ops->fcp_req_release(&tgtport->fc_target_port, fcpreq); | 534 | tgtport->ops->fcp_req_release(&tgtport->fc_target_port, fcpreq); |
535 | 535 | ||
536 | /* release the queue lookup reference on the completed IO */ | ||
537 | nvmet_fc_tgt_q_put(queue); | ||
538 | |||
536 | spin_lock_irqsave(&queue->qlock, flags); | 539 | spin_lock_irqsave(&queue->qlock, flags); |
537 | deferfcp = list_first_entry_or_null(&queue->pending_cmd_list, | 540 | deferfcp = list_first_entry_or_null(&queue->pending_cmd_list, |
538 | struct nvmet_fc_defer_fcp_req, req_list); | 541 | struct nvmet_fc_defer_fcp_req, req_list); |
539 | if (!deferfcp) { | 542 | if (!deferfcp) { |
540 | list_add_tail(&fod->fcp_list, &fod->queue->fod_list); | 543 | list_add_tail(&fod->fcp_list, &fod->queue->fod_list); |
541 | spin_unlock_irqrestore(&queue->qlock, flags); | 544 | spin_unlock_irqrestore(&queue->qlock, flags); |
542 | |||
543 | /* Release reference taken at queue lookup and fod allocation */ | ||
544 | nvmet_fc_tgt_q_put(queue); | ||
545 | return; | 545 | return; |
546 | } | 546 | } |
547 | 547 | ||
@@ -760,6 +760,9 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue) | |||
760 | tgtport->ops->fcp_req_release(&tgtport->fc_target_port, | 760 | tgtport->ops->fcp_req_release(&tgtport->fc_target_port, |
761 | deferfcp->fcp_req); | 761 | deferfcp->fcp_req); |
762 | 762 | ||
763 | /* release the queue lookup reference */ | ||
764 | nvmet_fc_tgt_q_put(queue); | ||
765 | |||
763 | kfree(deferfcp); | 766 | kfree(deferfcp); |
764 | 767 | ||
765 | spin_lock_irqsave(&queue->qlock, flags); | 768 | spin_lock_irqsave(&queue->qlock, flags); |
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 96d390416789..1e21b286f299 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c | |||
@@ -52,10 +52,15 @@ static inline struct nvme_loop_ctrl *to_loop_ctrl(struct nvme_ctrl *ctrl) | |||
52 | return container_of(ctrl, struct nvme_loop_ctrl, ctrl); | 52 | return container_of(ctrl, struct nvme_loop_ctrl, ctrl); |
53 | } | 53 | } |
54 | 54 | ||
55 | enum nvme_loop_queue_flags { | ||
56 | NVME_LOOP_Q_LIVE = 0, | ||
57 | }; | ||
58 | |||
55 | struct nvme_loop_queue { | 59 | struct nvme_loop_queue { |
56 | struct nvmet_cq nvme_cq; | 60 | struct nvmet_cq nvme_cq; |
57 | struct nvmet_sq nvme_sq; | 61 | struct nvmet_sq nvme_sq; |
58 | struct nvme_loop_ctrl *ctrl; | 62 | struct nvme_loop_ctrl *ctrl; |
63 | unsigned long flags; | ||
59 | }; | 64 | }; |
60 | 65 | ||
61 | static struct nvmet_port *nvmet_loop_port; | 66 | static struct nvmet_port *nvmet_loop_port; |
@@ -144,6 +149,14 @@ nvme_loop_timeout(struct request *rq, bool reserved) | |||
144 | return BLK_EH_HANDLED; | 149 | return BLK_EH_HANDLED; |
145 | } | 150 | } |
146 | 151 | ||
152 | static inline blk_status_t nvme_loop_is_ready(struct nvme_loop_queue *queue, | ||
153 | struct request *rq) | ||
154 | { | ||
155 | if (unlikely(!test_bit(NVME_LOOP_Q_LIVE, &queue->flags))) | ||
156 | return nvmf_check_init_req(&queue->ctrl->ctrl, rq); | ||
157 | return BLK_STS_OK; | ||
158 | } | ||
159 | |||
147 | static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, | 160 | static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, |
148 | const struct blk_mq_queue_data *bd) | 161 | const struct blk_mq_queue_data *bd) |
149 | { | 162 | { |
@@ -153,6 +166,10 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, | |||
153 | struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req); | 166 | struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req); |
154 | blk_status_t ret; | 167 | blk_status_t ret; |
155 | 168 | ||
169 | ret = nvme_loop_is_ready(queue, req); | ||
170 | if (unlikely(ret)) | ||
171 | return ret; | ||
172 | |||
156 | ret = nvme_setup_cmd(ns, req, &iod->cmd); | 173 | ret = nvme_setup_cmd(ns, req, &iod->cmd); |
157 | if (ret) | 174 | if (ret) |
158 | return ret; | 175 | return ret; |
@@ -267,6 +284,7 @@ static const struct blk_mq_ops nvme_loop_admin_mq_ops = { | |||
267 | 284 | ||
268 | static void nvme_loop_destroy_admin_queue(struct nvme_loop_ctrl *ctrl) | 285 | static void nvme_loop_destroy_admin_queue(struct nvme_loop_ctrl *ctrl) |
269 | { | 286 | { |
287 | clear_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags); | ||
270 | nvmet_sq_destroy(&ctrl->queues[0].nvme_sq); | 288 | nvmet_sq_destroy(&ctrl->queues[0].nvme_sq); |
271 | blk_cleanup_queue(ctrl->ctrl.admin_q); | 289 | blk_cleanup_queue(ctrl->ctrl.admin_q); |
272 | blk_mq_free_tag_set(&ctrl->admin_tag_set); | 290 | blk_mq_free_tag_set(&ctrl->admin_tag_set); |
@@ -297,8 +315,10 @@ static void nvme_loop_destroy_io_queues(struct nvme_loop_ctrl *ctrl) | |||
297 | { | 315 | { |
298 | int i; | 316 | int i; |
299 | 317 | ||
300 | for (i = 1; i < ctrl->ctrl.queue_count; i++) | 318 | for (i = 1; i < ctrl->ctrl.queue_count; i++) { |
319 | clear_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[i].flags); | ||
301 | nvmet_sq_destroy(&ctrl->queues[i].nvme_sq); | 320 | nvmet_sq_destroy(&ctrl->queues[i].nvme_sq); |
321 | } | ||
302 | } | 322 | } |
303 | 323 | ||
304 | static int nvme_loop_init_io_queues(struct nvme_loop_ctrl *ctrl) | 324 | static int nvme_loop_init_io_queues(struct nvme_loop_ctrl *ctrl) |
@@ -338,6 +358,7 @@ static int nvme_loop_connect_io_queues(struct nvme_loop_ctrl *ctrl) | |||
338 | ret = nvmf_connect_io_queue(&ctrl->ctrl, i); | 358 | ret = nvmf_connect_io_queue(&ctrl->ctrl, i); |
339 | if (ret) | 359 | if (ret) |
340 | return ret; | 360 | return ret; |
361 | set_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[i].flags); | ||
341 | } | 362 | } |
342 | 363 | ||
343 | return 0; | 364 | return 0; |
@@ -380,6 +401,8 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) | |||
380 | if (error) | 401 | if (error) |
381 | goto out_cleanup_queue; | 402 | goto out_cleanup_queue; |
382 | 403 | ||
404 | set_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags); | ||
405 | |||
383 | error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->ctrl.cap); | 406 | error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->ctrl.cap); |
384 | if (error) { | 407 | if (error) { |
385 | dev_err(ctrl->ctrl.device, | 408 | dev_err(ctrl->ctrl.device, |
diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h index 90fc490f973f..821f71a2e48f 100644 --- a/include/uapi/linux/bcache.h +++ b/include/uapi/linux/bcache.h | |||
@@ -91,7 +91,7 @@ PTR_FIELD(PTR_GEN, 0, 8) | |||
91 | 91 | ||
92 | #define PTR_CHECK_DEV ((1 << PTR_DEV_BITS) - 1) | 92 | #define PTR_CHECK_DEV ((1 << PTR_DEV_BITS) - 1) |
93 | 93 | ||
94 | #define PTR(gen, offset, dev) \ | 94 | #define MAKE_PTR(gen, offset, dev) \ |
95 | ((((__u64) dev) << 51) | ((__u64) offset) << 8 | gen) | 95 | ((((__u64) dev) << 51) | ((__u64) offset) << 8 | gen) |
96 | 96 | ||
97 | /* Bkey utility code */ | 97 | /* Bkey utility code */ |
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 206e0e2ace53..987d9a9ae283 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c | |||
@@ -591,7 +591,7 @@ static int __blk_trace_setup(struct request_queue *q, char *name, dev_t dev, | |||
591 | return ret; | 591 | return ret; |
592 | 592 | ||
593 | if (copy_to_user(arg, &buts, sizeof(buts))) { | 593 | if (copy_to_user(arg, &buts, sizeof(buts))) { |
594 | blk_trace_remove(q); | 594 | __blk_trace_remove(q); |
595 | return -EFAULT; | 595 | return -EFAULT; |
596 | } | 596 | } |
597 | return 0; | 597 | return 0; |
@@ -637,7 +637,7 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name, | |||
637 | return ret; | 637 | return ret; |
638 | 638 | ||
639 | if (copy_to_user(arg, &buts.name, ARRAY_SIZE(buts.name))) { | 639 | if (copy_to_user(arg, &buts.name, ARRAY_SIZE(buts.name))) { |
640 | blk_trace_remove(q); | 640 | __blk_trace_remove(q); |
641 | return -EFAULT; | 641 | return -EFAULT; |
642 | } | 642 | } |
643 | 643 | ||
@@ -872,7 +872,7 @@ static void blk_add_trace_rq_complete(void *ignore, struct request *rq, | |||
872 | * | 872 | * |
873 | **/ | 873 | **/ |
874 | static void blk_add_trace_bio(struct request_queue *q, struct bio *bio, | 874 | static void blk_add_trace_bio(struct request_queue *q, struct bio *bio, |
875 | u32 what, int error, union kernfs_node_id *cgid) | 875 | u32 what, int error) |
876 | { | 876 | { |
877 | struct blk_trace *bt = q->blk_trace; | 877 | struct blk_trace *bt = q->blk_trace; |
878 | 878 | ||
@@ -880,22 +880,21 @@ static void blk_add_trace_bio(struct request_queue *q, struct bio *bio, | |||
880 | return; | 880 | return; |
881 | 881 | ||
882 | __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, | 882 | __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, |
883 | bio_op(bio), bio->bi_opf, what, error, 0, NULL, cgid); | 883 | bio_op(bio), bio->bi_opf, what, error, 0, NULL, |
884 | blk_trace_bio_get_cgid(q, bio)); | ||
884 | } | 885 | } |
885 | 886 | ||
886 | static void blk_add_trace_bio_bounce(void *ignore, | 887 | static void blk_add_trace_bio_bounce(void *ignore, |
887 | struct request_queue *q, struct bio *bio) | 888 | struct request_queue *q, struct bio *bio) |
888 | { | 889 | { |
889 | blk_add_trace_bio(q, bio, BLK_TA_BOUNCE, 0, | 890 | blk_add_trace_bio(q, bio, BLK_TA_BOUNCE, 0); |
890 | blk_trace_bio_get_cgid(q, bio)); | ||
891 | } | 891 | } |
892 | 892 | ||
893 | static void blk_add_trace_bio_complete(void *ignore, | 893 | static void blk_add_trace_bio_complete(void *ignore, |
894 | struct request_queue *q, struct bio *bio, | 894 | struct request_queue *q, struct bio *bio, |
895 | int error) | 895 | int error) |
896 | { | 896 | { |
897 | blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error, | 897 | blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error); |
898 | blk_trace_bio_get_cgid(q, bio)); | ||
899 | } | 898 | } |
900 | 899 | ||
901 | static void blk_add_trace_bio_backmerge(void *ignore, | 900 | static void blk_add_trace_bio_backmerge(void *ignore, |
@@ -903,8 +902,7 @@ static void blk_add_trace_bio_backmerge(void *ignore, | |||
903 | struct request *rq, | 902 | struct request *rq, |
904 | struct bio *bio) | 903 | struct bio *bio) |
905 | { | 904 | { |
906 | blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE, 0, | 905 | blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE, 0); |
907 | blk_trace_bio_get_cgid(q, bio)); | ||
908 | } | 906 | } |
909 | 907 | ||
910 | static void blk_add_trace_bio_frontmerge(void *ignore, | 908 | static void blk_add_trace_bio_frontmerge(void *ignore, |
@@ -912,15 +910,13 @@ static void blk_add_trace_bio_frontmerge(void *ignore, | |||
912 | struct request *rq, | 910 | struct request *rq, |
913 | struct bio *bio) | 911 | struct bio *bio) |
914 | { | 912 | { |
915 | blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE, 0, | 913 | blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE, 0); |
916 | blk_trace_bio_get_cgid(q, bio)); | ||
917 | } | 914 | } |
918 | 915 | ||
919 | static void blk_add_trace_bio_queue(void *ignore, | 916 | static void blk_add_trace_bio_queue(void *ignore, |
920 | struct request_queue *q, struct bio *bio) | 917 | struct request_queue *q, struct bio *bio) |
921 | { | 918 | { |
922 | blk_add_trace_bio(q, bio, BLK_TA_QUEUE, 0, | 919 | blk_add_trace_bio(q, bio, BLK_TA_QUEUE, 0); |
923 | blk_trace_bio_get_cgid(q, bio)); | ||
924 | } | 920 | } |
925 | 921 | ||
926 | static void blk_add_trace_getrq(void *ignore, | 922 | static void blk_add_trace_getrq(void *ignore, |
@@ -928,8 +924,7 @@ static void blk_add_trace_getrq(void *ignore, | |||
928 | struct bio *bio, int rw) | 924 | struct bio *bio, int rw) |
929 | { | 925 | { |
930 | if (bio) | 926 | if (bio) |
931 | blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0, | 927 | blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0); |
932 | blk_trace_bio_get_cgid(q, bio)); | ||
933 | else { | 928 | else { |
934 | struct blk_trace *bt = q->blk_trace; | 929 | struct blk_trace *bt = q->blk_trace; |
935 | 930 | ||
@@ -945,8 +940,7 @@ static void blk_add_trace_sleeprq(void *ignore, | |||
945 | struct bio *bio, int rw) | 940 | struct bio *bio, int rw) |
946 | { | 941 | { |
947 | if (bio) | 942 | if (bio) |
948 | blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ, 0, | 943 | blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ, 0); |
949 | blk_trace_bio_get_cgid(q, bio)); | ||
950 | else { | 944 | else { |
951 | struct blk_trace *bt = q->blk_trace; | 945 | struct blk_trace *bt = q->blk_trace; |
952 | 946 | ||
diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 74b52dfd5852..84b2dc76f140 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c | |||
@@ -113,11 +113,23 @@ static const struct file_operations bdi_debug_stats_fops = { | |||
113 | .release = single_release, | 113 | .release = single_release, |
114 | }; | 114 | }; |
115 | 115 | ||
116 | static void bdi_debug_register(struct backing_dev_info *bdi, const char *name) | 116 | static int bdi_debug_register(struct backing_dev_info *bdi, const char *name) |
117 | { | 117 | { |
118 | if (!bdi_debug_root) | ||
119 | return -ENOMEM; | ||
120 | |||
118 | bdi->debug_dir = debugfs_create_dir(name, bdi_debug_root); | 121 | bdi->debug_dir = debugfs_create_dir(name, bdi_debug_root); |
122 | if (!bdi->debug_dir) | ||
123 | return -ENOMEM; | ||
124 | |||
119 | bdi->debug_stats = debugfs_create_file("stats", 0444, bdi->debug_dir, | 125 | bdi->debug_stats = debugfs_create_file("stats", 0444, bdi->debug_dir, |
120 | bdi, &bdi_debug_stats_fops); | 126 | bdi, &bdi_debug_stats_fops); |
127 | if (!bdi->debug_stats) { | ||
128 | debugfs_remove(bdi->debug_dir); | ||
129 | return -ENOMEM; | ||
130 | } | ||
131 | |||
132 | return 0; | ||
121 | } | 133 | } |
122 | 134 | ||
123 | static void bdi_debug_unregister(struct backing_dev_info *bdi) | 135 | static void bdi_debug_unregister(struct backing_dev_info *bdi) |
@@ -129,9 +141,10 @@ static void bdi_debug_unregister(struct backing_dev_info *bdi) | |||
129 | static inline void bdi_debug_init(void) | 141 | static inline void bdi_debug_init(void) |
130 | { | 142 | { |
131 | } | 143 | } |
132 | static inline void bdi_debug_register(struct backing_dev_info *bdi, | 144 | static inline int bdi_debug_register(struct backing_dev_info *bdi, |
133 | const char *name) | 145 | const char *name) |
134 | { | 146 | { |
147 | return 0; | ||
135 | } | 148 | } |
136 | static inline void bdi_debug_unregister(struct backing_dev_info *bdi) | 149 | static inline void bdi_debug_unregister(struct backing_dev_info *bdi) |
137 | { | 150 | { |
@@ -869,10 +882,13 @@ int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args) | |||
869 | if (IS_ERR(dev)) | 882 | if (IS_ERR(dev)) |
870 | return PTR_ERR(dev); | 883 | return PTR_ERR(dev); |
871 | 884 | ||
885 | if (bdi_debug_register(bdi, dev_name(dev))) { | ||
886 | device_destroy(bdi_class, dev->devt); | ||
887 | return -ENOMEM; | ||
888 | } | ||
872 | cgwb_bdi_register(bdi); | 889 | cgwb_bdi_register(bdi); |
873 | bdi->dev = dev; | 890 | bdi->dev = dev; |
874 | 891 | ||
875 | bdi_debug_register(bdi, dev_name(dev)); | ||
876 | set_bit(WB_registered, &bdi->wb.state); | 892 | set_bit(WB_registered, &bdi->wb.state); |
877 | 893 | ||
878 | spin_lock_bh(&bdi_lock); | 894 | spin_lock_bh(&bdi_lock); |