aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-12-01 08:05:45 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2017-12-01 08:05:45 -0500
commit75f64f68afa165ebe139cca2adb4df0a229a06de (patch)
tree6aee1ee80863ea121c31f15ec794f2b9943ccbfd
parentdf8ba95c572a187ed2aa7403e97a7a7f58c01f00 (diff)
parented565371e368f014db237aacf42b27b40b1bd247 (diff)
Merge branch 'for-linus' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe: "A selection of fixes/changes that should make it into this series. This contains: - NVMe, two merges, containing: - pci-e, rdma, and fc fixes - Device quirks - Fix for a badblocks leak in null_blk - bcache fix from Rui Hua for a race condition regression where -EINTR was returned to upper layers that didn't expect it. - Regression fix for blktrace for a bug introduced in this series. - blktrace cleanup for cgroup id. - bdi registration error handling. - Small series with cleanups for blk-wbt. - Various little fixes for typos and the like. Nothing earth shattering, most important are the NVMe and bcache fixes" * 'for-linus' of git://git.kernel.dk/linux-block: (34 commits) nvme-pci: fix NULL pointer dereference in nvme_free_host_mem() nvme-rdma: fix memory leak during queue allocation blktrace: fix trace mutex deadlock nvme-rdma: Use mr pool nvme-rdma: Check remotely invalidated rkey matches our expected rkey nvme-rdma: wait for local invalidation before completing a request nvme-rdma: don't complete requests before a send work request has completed nvme-rdma: don't suppress send completions bcache: check return value of register_shrinker bcache: recover data from backing when data is clean bcache: Fix building error on MIPS bcache: add a comment in journal bucket reading nvme-fc: don't use bit masks for set/test_bit() numbers blk-wbt: fix comments typo blk-wbt: move wbt_clear_stat to common place in wbt_done blk-sysfs: remove NULL pointer checking in queue_wb_lat_store blk-wbt: remove duplicated setting in wbt_init nvme-pci: add quirk for delay before CHK RDY for WDC SN200 block: remove useless assignment in bio_split null_blk: fix dev->badblocks leak ...
-rw-r--r--block/bio.c2
-rw-r--r--block/blk-sysfs.c5
-rw-r--r--block/blk-wbt.c7
-rw-r--r--block/genhd.c9
-rw-r--r--drivers/block/null_blk.c5
-rw-r--r--drivers/md/bcache/alloc.c2
-rw-r--r--drivers/md/bcache/btree.c5
-rw-r--r--drivers/md/bcache/extents.c2
-rw-r--r--drivers/md/bcache/journal.c7
-rw-r--r--drivers/md/bcache/request.c13
-rw-r--r--drivers/nvme/host/core.c19
-rw-r--r--drivers/nvme/host/fabrics.h30
-rw-r--r--drivers/nvme/host/fc.c21
-rw-r--r--drivers/nvme/host/multipath.c2
-rw-r--r--drivers/nvme/host/nvme.h2
-rw-r--r--drivers/nvme/host/pci.c17
-rw-r--r--drivers/nvme/host/rdma.c266
-rw-r--r--drivers/nvme/target/fc.c9
-rw-r--r--drivers/nvme/target/loop.c25
-rw-r--r--include/uapi/linux/bcache.h2
-rw-r--r--kernel/trace/blktrace.c30
-rw-r--r--mm/backing-dev.c22
22 files changed, 291 insertions, 211 deletions
diff --git a/block/bio.c b/block/bio.c
index 228229f3bb76..8bfdea58159b 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1819,7 +1819,7 @@ EXPORT_SYMBOL(bio_endio);
1819struct bio *bio_split(struct bio *bio, int sectors, 1819struct bio *bio_split(struct bio *bio, int sectors,
1820 gfp_t gfp, struct bio_set *bs) 1820 gfp_t gfp, struct bio_set *bs)
1821{ 1821{
1822 struct bio *split = NULL; 1822 struct bio *split;
1823 1823
1824 BUG_ON(sectors <= 0); 1824 BUG_ON(sectors <= 0);
1825 BUG_ON(sectors >= bio_sectors(bio)); 1825 BUG_ON(sectors >= bio_sectors(bio));
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index e54be402899d..870484eaed1f 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -450,12 +450,9 @@ static ssize_t queue_wb_lat_store(struct request_queue *q, const char *page,
450 ret = wbt_init(q); 450 ret = wbt_init(q);
451 if (ret) 451 if (ret)
452 return ret; 452 return ret;
453
454 rwb = q->rq_wb;
455 if (!rwb)
456 return -EINVAL;
457 } 453 }
458 454
455 rwb = q->rq_wb;
459 if (val == -1) 456 if (val == -1)
460 rwb->min_lat_nsec = wbt_default_latency_nsec(q); 457 rwb->min_lat_nsec = wbt_default_latency_nsec(q);
461 else if (val >= 0) 458 else if (val >= 0)
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index b252da0e4c11..ae8de9780085 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -178,12 +178,11 @@ void wbt_done(struct rq_wb *rwb, struct blk_issue_stat *stat)
178 178
179 if (wbt_is_read(stat)) 179 if (wbt_is_read(stat))
180 wb_timestamp(rwb, &rwb->last_comp); 180 wb_timestamp(rwb, &rwb->last_comp);
181 wbt_clear_state(stat);
182 } else { 181 } else {
183 WARN_ON_ONCE(stat == rwb->sync_cookie); 182 WARN_ON_ONCE(stat == rwb->sync_cookie);
184 __wbt_done(rwb, wbt_stat_to_mask(stat)); 183 __wbt_done(rwb, wbt_stat_to_mask(stat));
185 wbt_clear_state(stat);
186 } 184 }
185 wbt_clear_state(stat);
187} 186}
188 187
189/* 188/*
@@ -482,7 +481,7 @@ static inline unsigned int get_limit(struct rq_wb *rwb, unsigned long rw)
482 481
483 /* 482 /*
484 * At this point we know it's a buffered write. If this is 483 * At this point we know it's a buffered write. If this is
485 * kswapd trying to free memory, or REQ_SYNC is set, set, then 484 * kswapd trying to free memory, or REQ_SYNC is set, then
486 * it's WB_SYNC_ALL writeback, and we'll use the max limit for 485 * it's WB_SYNC_ALL writeback, and we'll use the max limit for
487 * that. If the write is marked as a background write, then use 486 * that. If the write is marked as a background write, then use
488 * the idle limit, or go to normal if we haven't had competing 487 * the idle limit, or go to normal if we haven't had competing
@@ -723,8 +722,6 @@ int wbt_init(struct request_queue *q)
723 init_waitqueue_head(&rwb->rq_wait[i].wait); 722 init_waitqueue_head(&rwb->rq_wait[i].wait);
724 } 723 }
725 724
726 rwb->wc = 1;
727 rwb->queue_depth = RWB_DEF_DEPTH;
728 rwb->last_comp = rwb->last_issue = jiffies; 725 rwb->last_comp = rwb->last_issue = jiffies;
729 rwb->queue = q; 726 rwb->queue = q;
730 rwb->win_nsec = RWB_WINDOW_NSEC; 727 rwb->win_nsec = RWB_WINDOW_NSEC;
diff --git a/block/genhd.c b/block/genhd.c
index c2223f12a805..96a66f671720 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -671,10 +671,13 @@ void device_add_disk(struct device *parent, struct gendisk *disk)
671 disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO; 671 disk->flags |= GENHD_FL_SUPPRESS_PARTITION_INFO;
672 disk->flags |= GENHD_FL_NO_PART_SCAN; 672 disk->flags |= GENHD_FL_NO_PART_SCAN;
673 } else { 673 } else {
674 int ret;
675
674 /* Register BDI before referencing it from bdev */ 676 /* Register BDI before referencing it from bdev */
675 disk_to_dev(disk)->devt = devt; 677 disk_to_dev(disk)->devt = devt;
676 bdi_register_owner(disk->queue->backing_dev_info, 678 ret = bdi_register_owner(disk->queue->backing_dev_info,
677 disk_to_dev(disk)); 679 disk_to_dev(disk));
680 WARN_ON(ret);
678 blk_register_region(disk_devt(disk), disk->minors, NULL, 681 blk_register_region(disk_devt(disk), disk->minors, NULL,
679 exact_match, exact_lock, disk); 682 exact_match, exact_lock, disk);
680 } 683 }
@@ -1389,7 +1392,7 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
1389 1392
1390 if (minors > DISK_MAX_PARTS) { 1393 if (minors > DISK_MAX_PARTS) {
1391 printk(KERN_ERR 1394 printk(KERN_ERR
1392 "block: can't allocated more than %d partitions\n", 1395 "block: can't allocate more than %d partitions\n",
1393 DISK_MAX_PARTS); 1396 DISK_MAX_PARTS);
1394 minors = DISK_MAX_PARTS; 1397 minors = DISK_MAX_PARTS;
1395 } 1398 }
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index c61960deb74a..ccb9975a97fa 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -471,7 +471,6 @@ static void nullb_device_release(struct config_item *item)
471{ 471{
472 struct nullb_device *dev = to_nullb_device(item); 472 struct nullb_device *dev = to_nullb_device(item);
473 473
474 badblocks_exit(&dev->badblocks);
475 null_free_device_storage(dev, false); 474 null_free_device_storage(dev, false);
476 null_free_dev(dev); 475 null_free_dev(dev);
477} 476}
@@ -582,6 +581,10 @@ static struct nullb_device *null_alloc_dev(void)
582 581
583static void null_free_dev(struct nullb_device *dev) 582static void null_free_dev(struct nullb_device *dev)
584{ 583{
584 if (!dev)
585 return;
586
587 badblocks_exit(&dev->badblocks);
585 kfree(dev); 588 kfree(dev);
586} 589}
587 590
diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index a27d85232ce1..a0cc1bc6d884 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -490,7 +490,7 @@ int __bch_bucket_alloc_set(struct cache_set *c, unsigned reserve,
490 if (b == -1) 490 if (b == -1)
491 goto err; 491 goto err;
492 492
493 k->ptr[i] = PTR(ca->buckets[b].gen, 493 k->ptr[i] = MAKE_PTR(ca->buckets[b].gen,
494 bucket_to_sector(c, b), 494 bucket_to_sector(c, b),
495 ca->sb.nr_this_dev); 495 ca->sb.nr_this_dev);
496 496
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 11c5503d31dc..81e8dc3dbe5e 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -807,7 +807,10 @@ int bch_btree_cache_alloc(struct cache_set *c)
807 c->shrink.scan_objects = bch_mca_scan; 807 c->shrink.scan_objects = bch_mca_scan;
808 c->shrink.seeks = 4; 808 c->shrink.seeks = 4;
809 c->shrink.batch = c->btree_pages * 2; 809 c->shrink.batch = c->btree_pages * 2;
810 register_shrinker(&c->shrink); 810
811 if (register_shrinker(&c->shrink))
812 pr_warn("bcache: %s: could not register shrinker",
813 __func__);
811 814
812 return 0; 815 return 0;
813} 816}
diff --git a/drivers/md/bcache/extents.c b/drivers/md/bcache/extents.c
index 41c238fc3733..f9d391711595 100644
--- a/drivers/md/bcache/extents.c
+++ b/drivers/md/bcache/extents.c
@@ -585,7 +585,7 @@ static bool bch_extent_merge(struct btree_keys *bk, struct bkey *l, struct bkey
585 return false; 585 return false;
586 586
587 for (i = 0; i < KEY_PTRS(l); i++) 587 for (i = 0; i < KEY_PTRS(l); i++)
588 if (l->ptr[i] + PTR(0, KEY_SIZE(l), 0) != r->ptr[i] || 588 if (l->ptr[i] + MAKE_PTR(0, KEY_SIZE(l), 0) != r->ptr[i] ||
589 PTR_BUCKET_NR(b->c, l, i) != PTR_BUCKET_NR(b->c, r, i)) 589 PTR_BUCKET_NR(b->c, l, i) != PTR_BUCKET_NR(b->c, r, i))
590 return false; 590 return false;
591 591
diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c
index 02a98ddb592d..a87165c1d8e5 100644
--- a/drivers/md/bcache/journal.c
+++ b/drivers/md/bcache/journal.c
@@ -170,6 +170,11 @@ int bch_journal_read(struct cache_set *c, struct list_head *list)
170 * find a sequence of buckets with valid journal entries 170 * find a sequence of buckets with valid journal entries
171 */ 171 */
172 for (i = 0; i < ca->sb.njournal_buckets; i++) { 172 for (i = 0; i < ca->sb.njournal_buckets; i++) {
173 /*
174 * We must try the index l with ZERO first for
175 * correctness due to the scenario that the journal
176 * bucket is circular buffer which might have wrapped
177 */
173 l = (i * 2654435769U) % ca->sb.njournal_buckets; 178 l = (i * 2654435769U) % ca->sb.njournal_buckets;
174 179
175 if (test_bit(l, bitmap)) 180 if (test_bit(l, bitmap))
@@ -507,7 +512,7 @@ static void journal_reclaim(struct cache_set *c)
507 continue; 512 continue;
508 513
509 ja->cur_idx = next; 514 ja->cur_idx = next;
510 k->ptr[n++] = PTR(0, 515 k->ptr[n++] = MAKE_PTR(0,
511 bucket_to_sector(c, ca->sb.d[ja->cur_idx]), 516 bucket_to_sector(c, ca->sb.d[ja->cur_idx]),
512 ca->sb.nr_this_dev); 517 ca->sb.nr_this_dev);
513 } 518 }
diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
index 3a7aed7282b2..643c3021624f 100644
--- a/drivers/md/bcache/request.c
+++ b/drivers/md/bcache/request.c
@@ -708,16 +708,15 @@ static void cached_dev_read_error(struct closure *cl)
708{ 708{
709 struct search *s = container_of(cl, struct search, cl); 709 struct search *s = container_of(cl, struct search, cl);
710 struct bio *bio = &s->bio.bio; 710 struct bio *bio = &s->bio.bio;
711 struct cached_dev *dc = container_of(s->d, struct cached_dev, disk);
712 711
713 /* 712 /*
714 * If cache device is dirty (dc->has_dirty is non-zero), then 713 * If read request hit dirty data (s->read_dirty_data is true),
715 * recovery a failed read request from cached device may get a 714 * then recovery a failed read request from cached device may
716 * stale data back. So read failure recovery is only permitted 715 * get a stale data back. So read failure recovery is only
717 * when cache device is clean. 716 * permitted when read request hit clean data in cache device,
717 * or when cache read race happened.
718 */ 718 */
719 if (s->recoverable && 719 if (s->recoverable && !s->read_dirty_data) {
720 (dc && !atomic_read(&dc->has_dirty))) {
721 /* Retry from the backing device: */ 720 /* Retry from the backing device: */
722 trace_bcache_read_retry(s->orig_bio); 721 trace_bcache_read_retry(s->orig_bio);
723 722
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 25da74d310d1..f837d666cbd4 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1449,19 +1449,19 @@ static int nvme_pr_command(struct block_device *bdev, u32 cdw10,
1449 int srcu_idx, ret; 1449 int srcu_idx, ret;
1450 u8 data[16] = { 0, }; 1450 u8 data[16] = { 0, };
1451 1451
1452 ns = nvme_get_ns_from_disk(bdev->bd_disk, &head, &srcu_idx);
1453 if (unlikely(!ns))
1454 return -EWOULDBLOCK;
1455
1452 put_unaligned_le64(key, &data[0]); 1456 put_unaligned_le64(key, &data[0]);
1453 put_unaligned_le64(sa_key, &data[8]); 1457 put_unaligned_le64(sa_key, &data[8]);
1454 1458
1455 memset(&c, 0, sizeof(c)); 1459 memset(&c, 0, sizeof(c));
1456 c.common.opcode = op; 1460 c.common.opcode = op;
1457 c.common.nsid = cpu_to_le32(head->ns_id); 1461 c.common.nsid = cpu_to_le32(ns->head->ns_id);
1458 c.common.cdw10[0] = cpu_to_le32(cdw10); 1462 c.common.cdw10[0] = cpu_to_le32(cdw10);
1459 1463
1460 ns = nvme_get_ns_from_disk(bdev->bd_disk, &head, &srcu_idx); 1464 ret = nvme_submit_sync_cmd(ns->queue, &c, data, 16);
1461 if (unlikely(!ns))
1462 ret = -EWOULDBLOCK;
1463 else
1464 ret = nvme_submit_sync_cmd(ns->queue, &c, data, 16);
1465 nvme_put_ns_from_disk(head, srcu_idx); 1465 nvme_put_ns_from_disk(head, srcu_idx);
1466 return ret; 1466 return ret;
1467} 1467}
@@ -2961,8 +2961,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
2961 2961
2962static void nvme_ns_remove(struct nvme_ns *ns) 2962static void nvme_ns_remove(struct nvme_ns *ns)
2963{ 2963{
2964 struct nvme_ns_head *head = ns->head;
2965
2966 if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags)) 2964 if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags))
2967 return; 2965 return;
2968 2966
@@ -2980,15 +2978,14 @@ static void nvme_ns_remove(struct nvme_ns *ns)
2980 2978
2981 mutex_lock(&ns->ctrl->subsys->lock); 2979 mutex_lock(&ns->ctrl->subsys->lock);
2982 nvme_mpath_clear_current_path(ns); 2980 nvme_mpath_clear_current_path(ns);
2983 if (head) 2981 list_del_rcu(&ns->siblings);
2984 list_del_rcu(&ns->siblings);
2985 mutex_unlock(&ns->ctrl->subsys->lock); 2982 mutex_unlock(&ns->ctrl->subsys->lock);
2986 2983
2987 mutex_lock(&ns->ctrl->namespaces_mutex); 2984 mutex_lock(&ns->ctrl->namespaces_mutex);
2988 list_del_init(&ns->list); 2985 list_del_init(&ns->list);
2989 mutex_unlock(&ns->ctrl->namespaces_mutex); 2986 mutex_unlock(&ns->ctrl->namespaces_mutex);
2990 2987
2991 synchronize_srcu(&head->srcu); 2988 synchronize_srcu(&ns->head->srcu);
2992 nvme_put_ns(ns); 2989 nvme_put_ns(ns);
2993} 2990}
2994 2991
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index 42232e731f19..9ba614953607 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -156,4 +156,34 @@ void nvmf_free_options(struct nvmf_ctrl_options *opts);
156int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size); 156int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size);
157bool nvmf_should_reconnect(struct nvme_ctrl *ctrl); 157bool nvmf_should_reconnect(struct nvme_ctrl *ctrl);
158 158
159static inline blk_status_t nvmf_check_init_req(struct nvme_ctrl *ctrl,
160 struct request *rq)
161{
162 struct nvme_command *cmd = nvme_req(rq)->cmd;
163
164 /*
165 * We cannot accept any other command until the connect command has
166 * completed, so only allow connect to pass.
167 */
168 if (!blk_rq_is_passthrough(rq) ||
169 cmd->common.opcode != nvme_fabrics_command ||
170 cmd->fabrics.fctype != nvme_fabrics_type_connect) {
171 /*
172 * Reconnecting state means transport disruption, which can take
173 * a long time and even might fail permanently, fail fast to
174 * give upper layers a chance to failover.
175 * Deleting state means that the ctrl will never accept commands
176 * again, fail it permanently.
177 */
178 if (ctrl->state == NVME_CTRL_RECONNECTING ||
179 ctrl->state == NVME_CTRL_DELETING) {
180 nvme_req(rq)->status = NVME_SC_ABORT_REQ;
181 return BLK_STS_IOERR;
182 }
183 return BLK_STS_RESOURCE; /* try again later */
184 }
185
186 return BLK_STS_OK;
187}
188
159#endif /* _NVME_FABRICS_H */ 189#endif /* _NVME_FABRICS_H */
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 7ab0be55c7d0..0a8af4daef89 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -31,7 +31,8 @@
31 31
32 32
33enum nvme_fc_queue_flags { 33enum nvme_fc_queue_flags {
34 NVME_FC_Q_CONNECTED = (1 << 0), 34 NVME_FC_Q_CONNECTED = 0,
35 NVME_FC_Q_LIVE,
35}; 36};
36 37
37#define NVMEFC_QUEUE_DELAY 3 /* ms units */ 38#define NVMEFC_QUEUE_DELAY 3 /* ms units */
@@ -1927,6 +1928,7 @@ nvme_fc_free_queue(struct nvme_fc_queue *queue)
1927 if (!test_and_clear_bit(NVME_FC_Q_CONNECTED, &queue->flags)) 1928 if (!test_and_clear_bit(NVME_FC_Q_CONNECTED, &queue->flags))
1928 return; 1929 return;
1929 1930
1931 clear_bit(NVME_FC_Q_LIVE, &queue->flags);
1930 /* 1932 /*
1931 * Current implementation never disconnects a single queue. 1933 * Current implementation never disconnects a single queue.
1932 * It always terminates a whole association. So there is never 1934 * It always terminates a whole association. So there is never
@@ -1934,7 +1936,6 @@ nvme_fc_free_queue(struct nvme_fc_queue *queue)
1934 */ 1936 */
1935 1937
1936 queue->connection_id = 0; 1938 queue->connection_id = 0;
1937 clear_bit(NVME_FC_Q_CONNECTED, &queue->flags);
1938} 1939}
1939 1940
1940static void 1941static void
@@ -2013,6 +2014,8 @@ nvme_fc_connect_io_queues(struct nvme_fc_ctrl *ctrl, u16 qsize)
2013 ret = nvmf_connect_io_queue(&ctrl->ctrl, i); 2014 ret = nvmf_connect_io_queue(&ctrl->ctrl, i);
2014 if (ret) 2015 if (ret)
2015 break; 2016 break;
2017
2018 set_bit(NVME_FC_Q_LIVE, &ctrl->queues[i].flags);
2016 } 2019 }
2017 2020
2018 return ret; 2021 return ret;
@@ -2320,6 +2323,14 @@ busy:
2320 return BLK_STS_RESOURCE; 2323 return BLK_STS_RESOURCE;
2321} 2324}
2322 2325
2326static inline blk_status_t nvme_fc_is_ready(struct nvme_fc_queue *queue,
2327 struct request *rq)
2328{
2329 if (unlikely(!test_bit(NVME_FC_Q_LIVE, &queue->flags)))
2330 return nvmf_check_init_req(&queue->ctrl->ctrl, rq);
2331 return BLK_STS_OK;
2332}
2333
2323static blk_status_t 2334static blk_status_t
2324nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx, 2335nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,
2325 const struct blk_mq_queue_data *bd) 2336 const struct blk_mq_queue_data *bd)
@@ -2335,6 +2346,10 @@ nvme_fc_queue_rq(struct blk_mq_hw_ctx *hctx,
2335 u32 data_len; 2346 u32 data_len;
2336 blk_status_t ret; 2347 blk_status_t ret;
2337 2348
2349 ret = nvme_fc_is_ready(queue, rq);
2350 if (unlikely(ret))
2351 return ret;
2352
2338 ret = nvme_setup_cmd(ns, rq, sqe); 2353 ret = nvme_setup_cmd(ns, rq, sqe);
2339 if (ret) 2354 if (ret)
2340 return ret; 2355 return ret;
@@ -2727,6 +2742,8 @@ nvme_fc_create_association(struct nvme_fc_ctrl *ctrl)
2727 if (ret) 2742 if (ret)
2728 goto out_disconnect_admin_queue; 2743 goto out_disconnect_admin_queue;
2729 2744
2745 set_bit(NVME_FC_Q_LIVE, &ctrl->queues[0].flags);
2746
2730 /* 2747 /*
2731 * Check controller capabilities 2748 * Check controller capabilities
2732 * 2749 *
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 78d92151a904..1218a9fca846 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -131,7 +131,7 @@ static blk_qc_t nvme_ns_head_make_request(struct request_queue *q,
131 bio->bi_opf |= REQ_NVME_MPATH; 131 bio->bi_opf |= REQ_NVME_MPATH;
132 ret = direct_make_request(bio); 132 ret = direct_make_request(bio);
133 } else if (!list_empty_careful(&head->list)) { 133 } else if (!list_empty_careful(&head->list)) {
134 dev_warn_ratelimited(dev, "no path available - requeing I/O\n"); 134 dev_warn_ratelimited(dev, "no path available - requeuing I/O\n");
135 135
136 spin_lock_irq(&head->requeue_lock); 136 spin_lock_irq(&head->requeue_lock);
137 bio_list_add(&head->requeue_list, bio); 137 bio_list_add(&head->requeue_list, bio);
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index c0873a68872f..ea1aa5283e8e 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -114,7 +114,7 @@ static inline struct nvme_request *nvme_req(struct request *req)
114 * NVME_QUIRK_DELAY_BEFORE_CHK_RDY quirk enabled. The value (in ms) was 114 * NVME_QUIRK_DELAY_BEFORE_CHK_RDY quirk enabled. The value (in ms) was
115 * found empirically. 115 * found empirically.
116 */ 116 */
117#define NVME_QUIRK_DELAY_AMOUNT 2000 117#define NVME_QUIRK_DELAY_AMOUNT 2300
118 118
119enum nvme_ctrl_state { 119enum nvme_ctrl_state {
120 NVME_CTRL_NEW, 120 NVME_CTRL_NEW,
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index a11cfd470089..f5800c3c9082 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1759,6 +1759,7 @@ static void nvme_free_host_mem(struct nvme_dev *dev)
1759 dev->nr_host_mem_descs * sizeof(*dev->host_mem_descs), 1759 dev->nr_host_mem_descs * sizeof(*dev->host_mem_descs),
1760 dev->host_mem_descs, dev->host_mem_descs_dma); 1760 dev->host_mem_descs, dev->host_mem_descs_dma);
1761 dev->host_mem_descs = NULL; 1761 dev->host_mem_descs = NULL;
1762 dev->nr_host_mem_descs = 0;
1762} 1763}
1763 1764
1764static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred, 1765static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred,
@@ -1787,7 +1788,7 @@ static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred,
1787 if (!bufs) 1788 if (!bufs)
1788 goto out_free_descs; 1789 goto out_free_descs;
1789 1790
1790 for (size = 0; size < preferred; size += len) { 1791 for (size = 0; size < preferred && i < max_entries; size += len) {
1791 dma_addr_t dma_addr; 1792 dma_addr_t dma_addr;
1792 1793
1793 len = min_t(u64, chunk_size, preferred - size); 1794 len = min_t(u64, chunk_size, preferred - size);
@@ -2428,7 +2429,7 @@ static int nvme_dev_map(struct nvme_dev *dev)
2428 return -ENODEV; 2429 return -ENODEV;
2429} 2430}
2430 2431
2431static unsigned long check_dell_samsung_bug(struct pci_dev *pdev) 2432static unsigned long check_vendor_combination_bug(struct pci_dev *pdev)
2432{ 2433{
2433 if (pdev->vendor == 0x144d && pdev->device == 0xa802) { 2434 if (pdev->vendor == 0x144d && pdev->device == 0xa802) {
2434 /* 2435 /*
@@ -2443,6 +2444,14 @@ static unsigned long check_dell_samsung_bug(struct pci_dev *pdev)
2443 (dmi_match(DMI_PRODUCT_NAME, "XPS 15 9550") || 2444 (dmi_match(DMI_PRODUCT_NAME, "XPS 15 9550") ||
2444 dmi_match(DMI_PRODUCT_NAME, "Precision 5510"))) 2445 dmi_match(DMI_PRODUCT_NAME, "Precision 5510")))
2445 return NVME_QUIRK_NO_DEEPEST_PS; 2446 return NVME_QUIRK_NO_DEEPEST_PS;
2447 } else if (pdev->vendor == 0x144d && pdev->device == 0xa804) {
2448 /*
2449 * Samsung SSD 960 EVO drops off the PCIe bus after system
2450 * suspend on a Ryzen board, ASUS PRIME B350M-A.
2451 */
2452 if (dmi_match(DMI_BOARD_VENDOR, "ASUSTeK COMPUTER INC.") &&
2453 dmi_match(DMI_BOARD_NAME, "PRIME B350M-A"))
2454 return NVME_QUIRK_NO_APST;
2446 } 2455 }
2447 2456
2448 return 0; 2457 return 0;
@@ -2482,7 +2491,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
2482 if (result) 2491 if (result)
2483 goto unmap; 2492 goto unmap;
2484 2493
2485 quirks |= check_dell_samsung_bug(pdev); 2494 quirks |= check_vendor_combination_bug(pdev);
2486 2495
2487 result = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops, 2496 result = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops,
2488 quirks); 2497 quirks);
@@ -2665,6 +2674,8 @@ static const struct pci_device_id nvme_id_table[] = {
2665 .driver_data = NVME_QUIRK_IDENTIFY_CNS, }, 2674 .driver_data = NVME_QUIRK_IDENTIFY_CNS, },
2666 { PCI_DEVICE(0x1c58, 0x0003), /* HGST adapter */ 2675 { PCI_DEVICE(0x1c58, 0x0003), /* HGST adapter */
2667 .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, 2676 .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
2677 { PCI_DEVICE(0x1c58, 0x0023), /* WDC SN200 adapter */
2678 .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
2668 { PCI_DEVICE(0x1c5f, 0x0540), /* Memblaze Pblaze4 adapter */ 2679 { PCI_DEVICE(0x1c5f, 0x0540), /* Memblaze Pblaze4 adapter */
2669 .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, }, 2680 .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
2670 { PCI_DEVICE(0x144d, 0xa821), /* Samsung PM1725 */ 2681 { PCI_DEVICE(0x144d, 0xa821), /* Samsung PM1725 */
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index 4f9bf2f815c3..37af56596be6 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -15,6 +15,7 @@
15#include <linux/module.h> 15#include <linux/module.h>
16#include <linux/init.h> 16#include <linux/init.h>
17#include <linux/slab.h> 17#include <linux/slab.h>
18#include <rdma/mr_pool.h>
18#include <linux/err.h> 19#include <linux/err.h>
19#include <linux/string.h> 20#include <linux/string.h>
20#include <linux/atomic.h> 21#include <linux/atomic.h>
@@ -59,6 +60,9 @@ struct nvme_rdma_request {
59 struct nvme_request req; 60 struct nvme_request req;
60 struct ib_mr *mr; 61 struct ib_mr *mr;
61 struct nvme_rdma_qe sqe; 62 struct nvme_rdma_qe sqe;
63 union nvme_result result;
64 __le16 status;
65 refcount_t ref;
62 struct ib_sge sge[1 + NVME_RDMA_MAX_INLINE_SEGMENTS]; 66 struct ib_sge sge[1 + NVME_RDMA_MAX_INLINE_SEGMENTS];
63 u32 num_sge; 67 u32 num_sge;
64 int nents; 68 int nents;
@@ -73,11 +77,11 @@ struct nvme_rdma_request {
73enum nvme_rdma_queue_flags { 77enum nvme_rdma_queue_flags {
74 NVME_RDMA_Q_ALLOCATED = 0, 78 NVME_RDMA_Q_ALLOCATED = 0,
75 NVME_RDMA_Q_LIVE = 1, 79 NVME_RDMA_Q_LIVE = 1,
80 NVME_RDMA_Q_TR_READY = 2,
76}; 81};
77 82
78struct nvme_rdma_queue { 83struct nvme_rdma_queue {
79 struct nvme_rdma_qe *rsp_ring; 84 struct nvme_rdma_qe *rsp_ring;
80 atomic_t sig_count;
81 int queue_size; 85 int queue_size;
82 size_t cmnd_capsule_len; 86 size_t cmnd_capsule_len;
83 struct nvme_rdma_ctrl *ctrl; 87 struct nvme_rdma_ctrl *ctrl;
@@ -258,32 +262,6 @@ static int nvme_rdma_create_qp(struct nvme_rdma_queue *queue, const int factor)
258 return ret; 262 return ret;
259} 263}
260 264
261static int nvme_rdma_reinit_request(void *data, struct request *rq)
262{
263 struct nvme_rdma_ctrl *ctrl = data;
264 struct nvme_rdma_device *dev = ctrl->device;
265 struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
266 int ret = 0;
267
268 if (WARN_ON_ONCE(!req->mr))
269 return 0;
270
271 ib_dereg_mr(req->mr);
272
273 req->mr = ib_alloc_mr(dev->pd, IB_MR_TYPE_MEM_REG,
274 ctrl->max_fr_pages);
275 if (IS_ERR(req->mr)) {
276 ret = PTR_ERR(req->mr);
277 req->mr = NULL;
278 goto out;
279 }
280
281 req->mr->need_inval = false;
282
283out:
284 return ret;
285}
286
287static void nvme_rdma_exit_request(struct blk_mq_tag_set *set, 265static void nvme_rdma_exit_request(struct blk_mq_tag_set *set,
288 struct request *rq, unsigned int hctx_idx) 266 struct request *rq, unsigned int hctx_idx)
289{ 267{
@@ -293,9 +271,6 @@ static void nvme_rdma_exit_request(struct blk_mq_tag_set *set,
293 struct nvme_rdma_queue *queue = &ctrl->queues[queue_idx]; 271 struct nvme_rdma_queue *queue = &ctrl->queues[queue_idx];
294 struct nvme_rdma_device *dev = queue->device; 272 struct nvme_rdma_device *dev = queue->device;
295 273
296 if (req->mr)
297 ib_dereg_mr(req->mr);
298
299 nvme_rdma_free_qe(dev->dev, &req->sqe, sizeof(struct nvme_command), 274 nvme_rdma_free_qe(dev->dev, &req->sqe, sizeof(struct nvme_command),
300 DMA_TO_DEVICE); 275 DMA_TO_DEVICE);
301} 276}
@@ -317,21 +292,9 @@ static int nvme_rdma_init_request(struct blk_mq_tag_set *set,
317 if (ret) 292 if (ret)
318 return ret; 293 return ret;
319 294
320 req->mr = ib_alloc_mr(dev->pd, IB_MR_TYPE_MEM_REG,
321 ctrl->max_fr_pages);
322 if (IS_ERR(req->mr)) {
323 ret = PTR_ERR(req->mr);
324 goto out_free_qe;
325 }
326
327 req->queue = queue; 295 req->queue = queue;
328 296
329 return 0; 297 return 0;
330
331out_free_qe:
332 nvme_rdma_free_qe(dev->dev, &req->sqe, sizeof(struct nvme_command),
333 DMA_TO_DEVICE);
334 return -ENOMEM;
335} 298}
336 299
337static int nvme_rdma_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, 300static int nvme_rdma_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
@@ -428,10 +391,23 @@ out_err:
428 391
429static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue) 392static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
430{ 393{
431 struct nvme_rdma_device *dev = queue->device; 394 struct nvme_rdma_device *dev;
432 struct ib_device *ibdev = dev->dev; 395 struct ib_device *ibdev;
433 396
434 rdma_destroy_qp(queue->cm_id); 397 if (!test_and_clear_bit(NVME_RDMA_Q_TR_READY, &queue->flags))
398 return;
399
400 dev = queue->device;
401 ibdev = dev->dev;
402
403 ib_mr_pool_destroy(queue->qp, &queue->qp->rdma_mrs);
404
405 /*
406 * The cm_id object might have been destroyed during RDMA connection
407 * establishment error flow to avoid getting other cma events, thus
408 * the destruction of the QP shouldn't use rdma_cm API.
409 */
410 ib_destroy_qp(queue->qp);
435 ib_free_cq(queue->ib_cq); 411 ib_free_cq(queue->ib_cq);
436 412
437 nvme_rdma_free_ring(ibdev, queue->rsp_ring, queue->queue_size, 413 nvme_rdma_free_ring(ibdev, queue->rsp_ring, queue->queue_size,
@@ -440,6 +416,12 @@ static void nvme_rdma_destroy_queue_ib(struct nvme_rdma_queue *queue)
440 nvme_rdma_dev_put(dev); 416 nvme_rdma_dev_put(dev);
441} 417}
442 418
419static int nvme_rdma_get_max_fr_pages(struct ib_device *ibdev)
420{
421 return min_t(u32, NVME_RDMA_MAX_SEGMENTS,
422 ibdev->attrs.max_fast_reg_page_list_len);
423}
424
443static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue) 425static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
444{ 426{
445 struct ib_device *ibdev; 427 struct ib_device *ibdev;
@@ -482,8 +464,24 @@ static int nvme_rdma_create_queue_ib(struct nvme_rdma_queue *queue)
482 goto out_destroy_qp; 464 goto out_destroy_qp;
483 } 465 }
484 466
467 ret = ib_mr_pool_init(queue->qp, &queue->qp->rdma_mrs,
468 queue->queue_size,
469 IB_MR_TYPE_MEM_REG,
470 nvme_rdma_get_max_fr_pages(ibdev));
471 if (ret) {
472 dev_err(queue->ctrl->ctrl.device,
473 "failed to initialize MR pool sized %d for QID %d\n",
474 queue->queue_size, idx);
475 goto out_destroy_ring;
476 }
477
478 set_bit(NVME_RDMA_Q_TR_READY, &queue->flags);
479
485 return 0; 480 return 0;
486 481
482out_destroy_ring:
483 nvme_rdma_free_ring(ibdev, queue->rsp_ring, queue->queue_size,
484 sizeof(struct nvme_completion), DMA_FROM_DEVICE);
487out_destroy_qp: 485out_destroy_qp:
488 rdma_destroy_qp(queue->cm_id); 486 rdma_destroy_qp(queue->cm_id);
489out_destroy_ib_cq: 487out_destroy_ib_cq:
@@ -510,7 +508,6 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl,
510 queue->cmnd_capsule_len = sizeof(struct nvme_command); 508 queue->cmnd_capsule_len = sizeof(struct nvme_command);
511 509
512 queue->queue_size = queue_size; 510 queue->queue_size = queue_size;
513 atomic_set(&queue->sig_count, 0);
514 511
515 queue->cm_id = rdma_create_id(&init_net, nvme_rdma_cm_handler, queue, 512 queue->cm_id = rdma_create_id(&init_net, nvme_rdma_cm_handler, queue,
516 RDMA_PS_TCP, IB_QPT_RC); 513 RDMA_PS_TCP, IB_QPT_RC);
@@ -546,6 +543,7 @@ static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl,
546 543
547out_destroy_cm_id: 544out_destroy_cm_id:
548 rdma_destroy_id(queue->cm_id); 545 rdma_destroy_id(queue->cm_id);
546 nvme_rdma_destroy_queue_ib(queue);
549 return ret; 547 return ret;
550} 548}
551 549
@@ -756,8 +754,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
756 754
757 ctrl->device = ctrl->queues[0].device; 755 ctrl->device = ctrl->queues[0].device;
758 756
759 ctrl->max_fr_pages = min_t(u32, NVME_RDMA_MAX_SEGMENTS, 757 ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev);
760 ctrl->device->dev->attrs.max_fast_reg_page_list_len);
761 758
762 if (new) { 759 if (new) {
763 ctrl->ctrl.admin_tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, true); 760 ctrl->ctrl.admin_tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, true);
@@ -771,10 +768,6 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
771 error = PTR_ERR(ctrl->ctrl.admin_q); 768 error = PTR_ERR(ctrl->ctrl.admin_q);
772 goto out_free_tagset; 769 goto out_free_tagset;
773 } 770 }
774 } else {
775 error = nvme_reinit_tagset(&ctrl->ctrl, ctrl->ctrl.admin_tagset);
776 if (error)
777 goto out_free_queue;
778 } 771 }
779 772
780 error = nvme_rdma_start_queue(ctrl, 0); 773 error = nvme_rdma_start_queue(ctrl, 0);
@@ -854,10 +847,6 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new)
854 goto out_free_tag_set; 847 goto out_free_tag_set;
855 } 848 }
856 } else { 849 } else {
857 ret = nvme_reinit_tagset(&ctrl->ctrl, ctrl->ctrl.tagset);
858 if (ret)
859 goto out_free_io_queues;
860
861 blk_mq_update_nr_hw_queues(&ctrl->tag_set, 850 blk_mq_update_nr_hw_queues(&ctrl->tag_set,
862 ctrl->ctrl.queue_count - 1); 851 ctrl->ctrl.queue_count - 1);
863 } 852 }
@@ -1018,8 +1007,18 @@ static void nvme_rdma_memreg_done(struct ib_cq *cq, struct ib_wc *wc)
1018 1007
1019static void nvme_rdma_inv_rkey_done(struct ib_cq *cq, struct ib_wc *wc) 1008static void nvme_rdma_inv_rkey_done(struct ib_cq *cq, struct ib_wc *wc)
1020{ 1009{
1021 if (unlikely(wc->status != IB_WC_SUCCESS)) 1010 struct nvme_rdma_request *req =
1011 container_of(wc->wr_cqe, struct nvme_rdma_request, reg_cqe);
1012 struct request *rq = blk_mq_rq_from_pdu(req);
1013
1014 if (unlikely(wc->status != IB_WC_SUCCESS)) {
1022 nvme_rdma_wr_error(cq, wc, "LOCAL_INV"); 1015 nvme_rdma_wr_error(cq, wc, "LOCAL_INV");
1016 return;
1017 }
1018
1019 if (refcount_dec_and_test(&req->ref))
1020 nvme_end_request(rq, req->status, req->result);
1021
1023} 1022}
1024 1023
1025static int nvme_rdma_inv_rkey(struct nvme_rdma_queue *queue, 1024static int nvme_rdma_inv_rkey(struct nvme_rdma_queue *queue,
@@ -1030,7 +1029,7 @@ static int nvme_rdma_inv_rkey(struct nvme_rdma_queue *queue,
1030 .opcode = IB_WR_LOCAL_INV, 1029 .opcode = IB_WR_LOCAL_INV,
1031 .next = NULL, 1030 .next = NULL,
1032 .num_sge = 0, 1031 .num_sge = 0,
1033 .send_flags = 0, 1032 .send_flags = IB_SEND_SIGNALED,
1034 .ex.invalidate_rkey = req->mr->rkey, 1033 .ex.invalidate_rkey = req->mr->rkey,
1035 }; 1034 };
1036 1035
@@ -1044,22 +1043,15 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue,
1044 struct request *rq) 1043 struct request *rq)
1045{ 1044{
1046 struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); 1045 struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
1047 struct nvme_rdma_ctrl *ctrl = queue->ctrl;
1048 struct nvme_rdma_device *dev = queue->device; 1046 struct nvme_rdma_device *dev = queue->device;
1049 struct ib_device *ibdev = dev->dev; 1047 struct ib_device *ibdev = dev->dev;
1050 int res;
1051 1048
1052 if (!blk_rq_bytes(rq)) 1049 if (!blk_rq_bytes(rq))
1053 return; 1050 return;
1054 1051
1055 if (req->mr->need_inval && test_bit(NVME_RDMA_Q_LIVE, &req->queue->flags)) { 1052 if (req->mr) {
1056 res = nvme_rdma_inv_rkey(queue, req); 1053 ib_mr_pool_put(queue->qp, &queue->qp->rdma_mrs, req->mr);
1057 if (unlikely(res < 0)) { 1054 req->mr = NULL;
1058 dev_err(ctrl->ctrl.device,
1059 "Queueing INV WR for rkey %#x failed (%d)\n",
1060 req->mr->rkey, res);
1061 nvme_rdma_error_recovery(queue->ctrl);
1062 }
1063 } 1055 }
1064 1056
1065 ib_dma_unmap_sg(ibdev, req->sg_table.sgl, 1057 ib_dma_unmap_sg(ibdev, req->sg_table.sgl,
@@ -1118,12 +1110,18 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue,
1118 struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl; 1110 struct nvme_keyed_sgl_desc *sg = &c->common.dptr.ksgl;
1119 int nr; 1111 int nr;
1120 1112
1113 req->mr = ib_mr_pool_get(queue->qp, &queue->qp->rdma_mrs);
1114 if (WARN_ON_ONCE(!req->mr))
1115 return -EAGAIN;
1116
1121 /* 1117 /*
1122 * Align the MR to a 4K page size to match the ctrl page size and 1118 * Align the MR to a 4K page size to match the ctrl page size and
1123 * the block virtual boundary. 1119 * the block virtual boundary.
1124 */ 1120 */
1125 nr = ib_map_mr_sg(req->mr, req->sg_table.sgl, count, NULL, SZ_4K); 1121 nr = ib_map_mr_sg(req->mr, req->sg_table.sgl, count, NULL, SZ_4K);
1126 if (unlikely(nr < count)) { 1122 if (unlikely(nr < count)) {
1123 ib_mr_pool_put(queue->qp, &queue->qp->rdma_mrs, req->mr);
1124 req->mr = NULL;
1127 if (nr < 0) 1125 if (nr < 0)
1128 return nr; 1126 return nr;
1129 return -EINVAL; 1127 return -EINVAL;
@@ -1142,8 +1140,6 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue,
1142 IB_ACCESS_REMOTE_READ | 1140 IB_ACCESS_REMOTE_READ |
1143 IB_ACCESS_REMOTE_WRITE; 1141 IB_ACCESS_REMOTE_WRITE;
1144 1142
1145 req->mr->need_inval = true;
1146
1147 sg->addr = cpu_to_le64(req->mr->iova); 1143 sg->addr = cpu_to_le64(req->mr->iova);
1148 put_unaligned_le24(req->mr->length, sg->length); 1144 put_unaligned_le24(req->mr->length, sg->length);
1149 put_unaligned_le32(req->mr->rkey, sg->key); 1145 put_unaligned_le32(req->mr->rkey, sg->key);
@@ -1163,7 +1159,7 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
1163 1159
1164 req->num_sge = 1; 1160 req->num_sge = 1;
1165 req->inline_data = false; 1161 req->inline_data = false;
1166 req->mr->need_inval = false; 1162 refcount_set(&req->ref, 2); /* send and recv completions */
1167 1163
1168 c->common.flags |= NVME_CMD_SGL_METABUF; 1164 c->common.flags |= NVME_CMD_SGL_METABUF;
1169 1165
@@ -1200,25 +1196,24 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
1200 1196
1201static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc) 1197static void nvme_rdma_send_done(struct ib_cq *cq, struct ib_wc *wc)
1202{ 1198{
1203 if (unlikely(wc->status != IB_WC_SUCCESS)) 1199 struct nvme_rdma_qe *qe =
1204 nvme_rdma_wr_error(cq, wc, "SEND"); 1200 container_of(wc->wr_cqe, struct nvme_rdma_qe, cqe);
1205} 1201 struct nvme_rdma_request *req =
1202 container_of(qe, struct nvme_rdma_request, sqe);
1203 struct request *rq = blk_mq_rq_from_pdu(req);
1206 1204
1207/* 1205 if (unlikely(wc->status != IB_WC_SUCCESS)) {
1208 * We want to signal completion at least every queue depth/2. This returns the 1206 nvme_rdma_wr_error(cq, wc, "SEND");
1209 * largest power of two that is not above half of (queue size + 1) to optimize 1207 return;
1210 * (avoid divisions). 1208 }
1211 */
1212static inline bool nvme_rdma_queue_sig_limit(struct nvme_rdma_queue *queue)
1213{
1214 int limit = 1 << ilog2((queue->queue_size + 1) / 2);
1215 1209
1216 return (atomic_inc_return(&queue->sig_count) & (limit - 1)) == 0; 1210 if (refcount_dec_and_test(&req->ref))
1211 nvme_end_request(rq, req->status, req->result);
1217} 1212}
1218 1213
1219static int nvme_rdma_post_send(struct nvme_rdma_queue *queue, 1214static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
1220 struct nvme_rdma_qe *qe, struct ib_sge *sge, u32 num_sge, 1215 struct nvme_rdma_qe *qe, struct ib_sge *sge, u32 num_sge,
1221 struct ib_send_wr *first, bool flush) 1216 struct ib_send_wr *first)
1222{ 1217{
1223 struct ib_send_wr wr, *bad_wr; 1218 struct ib_send_wr wr, *bad_wr;
1224 int ret; 1219 int ret;
@@ -1227,31 +1222,12 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue,
1227 sge->length = sizeof(struct nvme_command), 1222 sge->length = sizeof(struct nvme_command),
1228 sge->lkey = queue->device->pd->local_dma_lkey; 1223 sge->lkey = queue->device->pd->local_dma_lkey;
1229 1224
1230 qe->cqe.done = nvme_rdma_send_done;
1231
1232 wr.next = NULL; 1225 wr.next = NULL;
1233 wr.wr_cqe = &qe->cqe; 1226 wr.wr_cqe = &qe->cqe;
1234 wr.sg_list = sge; 1227 wr.sg_list = sge;
1235 wr.num_sge = num_sge; 1228 wr.num_sge = num_sge;
1236 wr.opcode = IB_WR_SEND; 1229 wr.opcode = IB_WR_SEND;
1237 wr.send_flags = 0; 1230 wr.send_flags = IB_SEND_SIGNALED;
1238
1239 /*
1240 * Unsignalled send completions are another giant desaster in the
1241 * IB Verbs spec: If we don't regularly post signalled sends
1242 * the send queue will fill up and only a QP reset will rescue us.
1243 * Would have been way to obvious to handle this in hardware or
1244 * at least the RDMA stack..
1245 *
1246 * Always signal the flushes. The magic request used for the flush
1247 * sequencer is not allocated in our driver's tagset and it's
1248 * triggered to be freed by blk_cleanup_queue(). So we need to
1249 * always mark it as signaled to ensure that the "wr_cqe", which is
1250 * embedded in request's payload, is not freed when __ib_process_cq()
1251 * calls wr_cqe->done().
1252 */
1253 if (nvme_rdma_queue_sig_limit(queue) || flush)
1254 wr.send_flags |= IB_SEND_SIGNALED;
1255 1231
1256 if (first) 1232 if (first)
1257 first->next = &wr; 1233 first->next = &wr;
@@ -1301,6 +1277,12 @@ static struct blk_mq_tags *nvme_rdma_tagset(struct nvme_rdma_queue *queue)
1301 return queue->ctrl->tag_set.tags[queue_idx - 1]; 1277 return queue->ctrl->tag_set.tags[queue_idx - 1];
1302} 1278}
1303 1279
1280static void nvme_rdma_async_done(struct ib_cq *cq, struct ib_wc *wc)
1281{
1282 if (unlikely(wc->status != IB_WC_SUCCESS))
1283 nvme_rdma_wr_error(cq, wc, "ASYNC");
1284}
1285
1304static void nvme_rdma_submit_async_event(struct nvme_ctrl *arg) 1286static void nvme_rdma_submit_async_event(struct nvme_ctrl *arg)
1305{ 1287{
1306 struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(arg); 1288 struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(arg);
@@ -1319,10 +1301,12 @@ static void nvme_rdma_submit_async_event(struct nvme_ctrl *arg)
1319 cmd->common.flags |= NVME_CMD_SGL_METABUF; 1301 cmd->common.flags |= NVME_CMD_SGL_METABUF;
1320 nvme_rdma_set_sg_null(cmd); 1302 nvme_rdma_set_sg_null(cmd);
1321 1303
1304 sqe->cqe.done = nvme_rdma_async_done;
1305
1322 ib_dma_sync_single_for_device(dev, sqe->dma, sizeof(*cmd), 1306 ib_dma_sync_single_for_device(dev, sqe->dma, sizeof(*cmd),
1323 DMA_TO_DEVICE); 1307 DMA_TO_DEVICE);
1324 1308
1325 ret = nvme_rdma_post_send(queue, sqe, &sge, 1, NULL, false); 1309 ret = nvme_rdma_post_send(queue, sqe, &sge, 1, NULL);
1326 WARN_ON_ONCE(ret); 1310 WARN_ON_ONCE(ret);
1327} 1311}
1328 1312
@@ -1343,14 +1327,34 @@ static int nvme_rdma_process_nvme_rsp(struct nvme_rdma_queue *queue,
1343 } 1327 }
1344 req = blk_mq_rq_to_pdu(rq); 1328 req = blk_mq_rq_to_pdu(rq);
1345 1329
1346 if (rq->tag == tag) 1330 req->status = cqe->status;
1347 ret = 1; 1331 req->result = cqe->result;
1348 1332
1349 if ((wc->wc_flags & IB_WC_WITH_INVALIDATE) && 1333 if (wc->wc_flags & IB_WC_WITH_INVALIDATE) {
1350 wc->ex.invalidate_rkey == req->mr->rkey) 1334 if (unlikely(wc->ex.invalidate_rkey != req->mr->rkey)) {
1351 req->mr->need_inval = false; 1335 dev_err(queue->ctrl->ctrl.device,
1336 "Bogus remote invalidation for rkey %#x\n",
1337 req->mr->rkey);
1338 nvme_rdma_error_recovery(queue->ctrl);
1339 }
1340 } else if (req->mr) {
1341 ret = nvme_rdma_inv_rkey(queue, req);
1342 if (unlikely(ret < 0)) {
1343 dev_err(queue->ctrl->ctrl.device,
1344 "Queueing INV WR for rkey %#x failed (%d)\n",
1345 req->mr->rkey, ret);
1346 nvme_rdma_error_recovery(queue->ctrl);
1347 }
1348 /* the local invalidation completion will end the request */
1349 return 0;
1350 }
1351
1352 if (refcount_dec_and_test(&req->ref)) {
1353 if (rq->tag == tag)
1354 ret = 1;
1355 nvme_end_request(rq, req->status, req->result);
1356 }
1352 1357
1353 nvme_end_request(rq, cqe->status, cqe->result);
1354 return ret; 1358 return ret;
1355} 1359}
1356 1360
@@ -1591,31 +1595,11 @@ nvme_rdma_timeout(struct request *rq, bool reserved)
1591 * We cannot accept any other command until the Connect command has completed. 1595 * We cannot accept any other command until the Connect command has completed.
1592 */ 1596 */
1593static inline blk_status_t 1597static inline blk_status_t
1594nvme_rdma_queue_is_ready(struct nvme_rdma_queue *queue, struct request *rq) 1598nvme_rdma_is_ready(struct nvme_rdma_queue *queue, struct request *rq)
1595{ 1599{
1596 if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags))) { 1600 if (unlikely(!test_bit(NVME_RDMA_Q_LIVE, &queue->flags)))
1597 struct nvme_command *cmd = nvme_req(rq)->cmd; 1601 return nvmf_check_init_req(&queue->ctrl->ctrl, rq);
1598 1602 return BLK_STS_OK;
1599 if (!blk_rq_is_passthrough(rq) ||
1600 cmd->common.opcode != nvme_fabrics_command ||
1601 cmd->fabrics.fctype != nvme_fabrics_type_connect) {
1602 /*
1603 * reconnecting state means transport disruption, which
1604 * can take a long time and even might fail permanently,
1605 * fail fast to give upper layers a chance to failover.
1606 * deleting state means that the ctrl will never accept
1607 * commands again, fail it permanently.
1608 */
1609 if (queue->ctrl->ctrl.state == NVME_CTRL_RECONNECTING ||
1610 queue->ctrl->ctrl.state == NVME_CTRL_DELETING) {
1611 nvme_req(rq)->status = NVME_SC_ABORT_REQ;
1612 return BLK_STS_IOERR;
1613 }
1614 return BLK_STS_RESOURCE; /* try again later */
1615 }
1616 }
1617
1618 return 0;
1619} 1603}
1620 1604
1621static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, 1605static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
@@ -1627,14 +1611,13 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
1627 struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); 1611 struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
1628 struct nvme_rdma_qe *sqe = &req->sqe; 1612 struct nvme_rdma_qe *sqe = &req->sqe;
1629 struct nvme_command *c = sqe->data; 1613 struct nvme_command *c = sqe->data;
1630 bool flush = false;
1631 struct ib_device *dev; 1614 struct ib_device *dev;
1632 blk_status_t ret; 1615 blk_status_t ret;
1633 int err; 1616 int err;
1634 1617
1635 WARN_ON_ONCE(rq->tag < 0); 1618 WARN_ON_ONCE(rq->tag < 0);
1636 1619
1637 ret = nvme_rdma_queue_is_ready(queue, rq); 1620 ret = nvme_rdma_is_ready(queue, rq);
1638 if (unlikely(ret)) 1621 if (unlikely(ret))
1639 return ret; 1622 return ret;
1640 1623
@@ -1656,13 +1639,13 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
1656 goto err; 1639 goto err;
1657 } 1640 }
1658 1641
1642 sqe->cqe.done = nvme_rdma_send_done;
1643
1659 ib_dma_sync_single_for_device(dev, sqe->dma, 1644 ib_dma_sync_single_for_device(dev, sqe->dma,
1660 sizeof(struct nvme_command), DMA_TO_DEVICE); 1645 sizeof(struct nvme_command), DMA_TO_DEVICE);
1661 1646
1662 if (req_op(rq) == REQ_OP_FLUSH)
1663 flush = true;
1664 err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge, 1647 err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge,
1665 req->mr->need_inval ? &req->reg_wr.wr : NULL, flush); 1648 req->mr ? &req->reg_wr.wr : NULL);
1666 if (unlikely(err)) { 1649 if (unlikely(err)) {
1667 nvme_rdma_unmap_data(queue, rq); 1650 nvme_rdma_unmap_data(queue, rq);
1668 goto err; 1651 goto err;
@@ -1810,7 +1793,6 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
1810 .submit_async_event = nvme_rdma_submit_async_event, 1793 .submit_async_event = nvme_rdma_submit_async_event,
1811 .delete_ctrl = nvme_rdma_delete_ctrl, 1794 .delete_ctrl = nvme_rdma_delete_ctrl,
1812 .get_address = nvmf_get_address, 1795 .get_address = nvmf_get_address,
1813 .reinit_request = nvme_rdma_reinit_request,
1814}; 1796};
1815 1797
1816static inline bool 1798static inline bool
diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c
index 664d3013f68f..5fd86039e353 100644
--- a/drivers/nvme/target/fc.c
+++ b/drivers/nvme/target/fc.c
@@ -533,15 +533,15 @@ nvmet_fc_free_fcp_iod(struct nvmet_fc_tgt_queue *queue,
533 533
534 tgtport->ops->fcp_req_release(&tgtport->fc_target_port, fcpreq); 534 tgtport->ops->fcp_req_release(&tgtport->fc_target_port, fcpreq);
535 535
536 /* release the queue lookup reference on the completed IO */
537 nvmet_fc_tgt_q_put(queue);
538
536 spin_lock_irqsave(&queue->qlock, flags); 539 spin_lock_irqsave(&queue->qlock, flags);
537 deferfcp = list_first_entry_or_null(&queue->pending_cmd_list, 540 deferfcp = list_first_entry_or_null(&queue->pending_cmd_list,
538 struct nvmet_fc_defer_fcp_req, req_list); 541 struct nvmet_fc_defer_fcp_req, req_list);
539 if (!deferfcp) { 542 if (!deferfcp) {
540 list_add_tail(&fod->fcp_list, &fod->queue->fod_list); 543 list_add_tail(&fod->fcp_list, &fod->queue->fod_list);
541 spin_unlock_irqrestore(&queue->qlock, flags); 544 spin_unlock_irqrestore(&queue->qlock, flags);
542
543 /* Release reference taken at queue lookup and fod allocation */
544 nvmet_fc_tgt_q_put(queue);
545 return; 545 return;
546 } 546 }
547 547
@@ -760,6 +760,9 @@ nvmet_fc_delete_target_queue(struct nvmet_fc_tgt_queue *queue)
760 tgtport->ops->fcp_req_release(&tgtport->fc_target_port, 760 tgtport->ops->fcp_req_release(&tgtport->fc_target_port,
761 deferfcp->fcp_req); 761 deferfcp->fcp_req);
762 762
763 /* release the queue lookup reference */
764 nvmet_fc_tgt_q_put(queue);
765
763 kfree(deferfcp); 766 kfree(deferfcp);
764 767
765 spin_lock_irqsave(&queue->qlock, flags); 768 spin_lock_irqsave(&queue->qlock, flags);
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 96d390416789..1e21b286f299 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -52,10 +52,15 @@ static inline struct nvme_loop_ctrl *to_loop_ctrl(struct nvme_ctrl *ctrl)
52 return container_of(ctrl, struct nvme_loop_ctrl, ctrl); 52 return container_of(ctrl, struct nvme_loop_ctrl, ctrl);
53} 53}
54 54
55enum nvme_loop_queue_flags {
56 NVME_LOOP_Q_LIVE = 0,
57};
58
55struct nvme_loop_queue { 59struct nvme_loop_queue {
56 struct nvmet_cq nvme_cq; 60 struct nvmet_cq nvme_cq;
57 struct nvmet_sq nvme_sq; 61 struct nvmet_sq nvme_sq;
58 struct nvme_loop_ctrl *ctrl; 62 struct nvme_loop_ctrl *ctrl;
63 unsigned long flags;
59}; 64};
60 65
61static struct nvmet_port *nvmet_loop_port; 66static struct nvmet_port *nvmet_loop_port;
@@ -144,6 +149,14 @@ nvme_loop_timeout(struct request *rq, bool reserved)
144 return BLK_EH_HANDLED; 149 return BLK_EH_HANDLED;
145} 150}
146 151
152static inline blk_status_t nvme_loop_is_ready(struct nvme_loop_queue *queue,
153 struct request *rq)
154{
155 if (unlikely(!test_bit(NVME_LOOP_Q_LIVE, &queue->flags)))
156 return nvmf_check_init_req(&queue->ctrl->ctrl, rq);
157 return BLK_STS_OK;
158}
159
147static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, 160static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
148 const struct blk_mq_queue_data *bd) 161 const struct blk_mq_queue_data *bd)
149{ 162{
@@ -153,6 +166,10 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
153 struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req); 166 struct nvme_loop_iod *iod = blk_mq_rq_to_pdu(req);
154 blk_status_t ret; 167 blk_status_t ret;
155 168
169 ret = nvme_loop_is_ready(queue, req);
170 if (unlikely(ret))
171 return ret;
172
156 ret = nvme_setup_cmd(ns, req, &iod->cmd); 173 ret = nvme_setup_cmd(ns, req, &iod->cmd);
157 if (ret) 174 if (ret)
158 return ret; 175 return ret;
@@ -267,6 +284,7 @@ static const struct blk_mq_ops nvme_loop_admin_mq_ops = {
267 284
268static void nvme_loop_destroy_admin_queue(struct nvme_loop_ctrl *ctrl) 285static void nvme_loop_destroy_admin_queue(struct nvme_loop_ctrl *ctrl)
269{ 286{
287 clear_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags);
270 nvmet_sq_destroy(&ctrl->queues[0].nvme_sq); 288 nvmet_sq_destroy(&ctrl->queues[0].nvme_sq);
271 blk_cleanup_queue(ctrl->ctrl.admin_q); 289 blk_cleanup_queue(ctrl->ctrl.admin_q);
272 blk_mq_free_tag_set(&ctrl->admin_tag_set); 290 blk_mq_free_tag_set(&ctrl->admin_tag_set);
@@ -297,8 +315,10 @@ static void nvme_loop_destroy_io_queues(struct nvme_loop_ctrl *ctrl)
297{ 315{
298 int i; 316 int i;
299 317
300 for (i = 1; i < ctrl->ctrl.queue_count; i++) 318 for (i = 1; i < ctrl->ctrl.queue_count; i++) {
319 clear_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[i].flags);
301 nvmet_sq_destroy(&ctrl->queues[i].nvme_sq); 320 nvmet_sq_destroy(&ctrl->queues[i].nvme_sq);
321 }
302} 322}
303 323
304static int nvme_loop_init_io_queues(struct nvme_loop_ctrl *ctrl) 324static int nvme_loop_init_io_queues(struct nvme_loop_ctrl *ctrl)
@@ -338,6 +358,7 @@ static int nvme_loop_connect_io_queues(struct nvme_loop_ctrl *ctrl)
338 ret = nvmf_connect_io_queue(&ctrl->ctrl, i); 358 ret = nvmf_connect_io_queue(&ctrl->ctrl, i);
339 if (ret) 359 if (ret)
340 return ret; 360 return ret;
361 set_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[i].flags);
341 } 362 }
342 363
343 return 0; 364 return 0;
@@ -380,6 +401,8 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
380 if (error) 401 if (error)
381 goto out_cleanup_queue; 402 goto out_cleanup_queue;
382 403
404 set_bit(NVME_LOOP_Q_LIVE, &ctrl->queues[0].flags);
405
383 error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->ctrl.cap); 406 error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->ctrl.cap);
384 if (error) { 407 if (error) {
385 dev_err(ctrl->ctrl.device, 408 dev_err(ctrl->ctrl.device,
diff --git a/include/uapi/linux/bcache.h b/include/uapi/linux/bcache.h
index 90fc490f973f..821f71a2e48f 100644
--- a/include/uapi/linux/bcache.h
+++ b/include/uapi/linux/bcache.h
@@ -91,7 +91,7 @@ PTR_FIELD(PTR_GEN, 0, 8)
91 91
92#define PTR_CHECK_DEV ((1 << PTR_DEV_BITS) - 1) 92#define PTR_CHECK_DEV ((1 << PTR_DEV_BITS) - 1)
93 93
94#define PTR(gen, offset, dev) \ 94#define MAKE_PTR(gen, offset, dev) \
95 ((((__u64) dev) << 51) | ((__u64) offset) << 8 | gen) 95 ((((__u64) dev) << 51) | ((__u64) offset) << 8 | gen)
96 96
97/* Bkey utility code */ 97/* Bkey utility code */
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 206e0e2ace53..987d9a9ae283 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -591,7 +591,7 @@ static int __blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
591 return ret; 591 return ret;
592 592
593 if (copy_to_user(arg, &buts, sizeof(buts))) { 593 if (copy_to_user(arg, &buts, sizeof(buts))) {
594 blk_trace_remove(q); 594 __blk_trace_remove(q);
595 return -EFAULT; 595 return -EFAULT;
596 } 596 }
597 return 0; 597 return 0;
@@ -637,7 +637,7 @@ static int compat_blk_trace_setup(struct request_queue *q, char *name,
637 return ret; 637 return ret;
638 638
639 if (copy_to_user(arg, &buts.name, ARRAY_SIZE(buts.name))) { 639 if (copy_to_user(arg, &buts.name, ARRAY_SIZE(buts.name))) {
640 blk_trace_remove(q); 640 __blk_trace_remove(q);
641 return -EFAULT; 641 return -EFAULT;
642 } 642 }
643 643
@@ -872,7 +872,7 @@ static void blk_add_trace_rq_complete(void *ignore, struct request *rq,
872 * 872 *
873 **/ 873 **/
874static void blk_add_trace_bio(struct request_queue *q, struct bio *bio, 874static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
875 u32 what, int error, union kernfs_node_id *cgid) 875 u32 what, int error)
876{ 876{
877 struct blk_trace *bt = q->blk_trace; 877 struct blk_trace *bt = q->blk_trace;
878 878
@@ -880,22 +880,21 @@ static void blk_add_trace_bio(struct request_queue *q, struct bio *bio,
880 return; 880 return;
881 881
882 __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, 882 __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
883 bio_op(bio), bio->bi_opf, what, error, 0, NULL, cgid); 883 bio_op(bio), bio->bi_opf, what, error, 0, NULL,
884 blk_trace_bio_get_cgid(q, bio));
884} 885}
885 886
886static void blk_add_trace_bio_bounce(void *ignore, 887static void blk_add_trace_bio_bounce(void *ignore,
887 struct request_queue *q, struct bio *bio) 888 struct request_queue *q, struct bio *bio)
888{ 889{
889 blk_add_trace_bio(q, bio, BLK_TA_BOUNCE, 0, 890 blk_add_trace_bio(q, bio, BLK_TA_BOUNCE, 0);
890 blk_trace_bio_get_cgid(q, bio));
891} 891}
892 892
893static void blk_add_trace_bio_complete(void *ignore, 893static void blk_add_trace_bio_complete(void *ignore,
894 struct request_queue *q, struct bio *bio, 894 struct request_queue *q, struct bio *bio,
895 int error) 895 int error)
896{ 896{
897 blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error, 897 blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error);
898 blk_trace_bio_get_cgid(q, bio));
899} 898}
900 899
901static void blk_add_trace_bio_backmerge(void *ignore, 900static void blk_add_trace_bio_backmerge(void *ignore,
@@ -903,8 +902,7 @@ static void blk_add_trace_bio_backmerge(void *ignore,
903 struct request *rq, 902 struct request *rq,
904 struct bio *bio) 903 struct bio *bio)
905{ 904{
906 blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE, 0, 905 blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE, 0);
907 blk_trace_bio_get_cgid(q, bio));
908} 906}
909 907
910static void blk_add_trace_bio_frontmerge(void *ignore, 908static void blk_add_trace_bio_frontmerge(void *ignore,
@@ -912,15 +910,13 @@ static void blk_add_trace_bio_frontmerge(void *ignore,
912 struct request *rq, 910 struct request *rq,
913 struct bio *bio) 911 struct bio *bio)
914{ 912{
915 blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE, 0, 913 blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE, 0);
916 blk_trace_bio_get_cgid(q, bio));
917} 914}
918 915
919static void blk_add_trace_bio_queue(void *ignore, 916static void blk_add_trace_bio_queue(void *ignore,
920 struct request_queue *q, struct bio *bio) 917 struct request_queue *q, struct bio *bio)
921{ 918{
922 blk_add_trace_bio(q, bio, BLK_TA_QUEUE, 0, 919 blk_add_trace_bio(q, bio, BLK_TA_QUEUE, 0);
923 blk_trace_bio_get_cgid(q, bio));
924} 920}
925 921
926static void blk_add_trace_getrq(void *ignore, 922static void blk_add_trace_getrq(void *ignore,
@@ -928,8 +924,7 @@ static void blk_add_trace_getrq(void *ignore,
928 struct bio *bio, int rw) 924 struct bio *bio, int rw)
929{ 925{
930 if (bio) 926 if (bio)
931 blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0, 927 blk_add_trace_bio(q, bio, BLK_TA_GETRQ, 0);
932 blk_trace_bio_get_cgid(q, bio));
933 else { 928 else {
934 struct blk_trace *bt = q->blk_trace; 929 struct blk_trace *bt = q->blk_trace;
935 930
@@ -945,8 +940,7 @@ static void blk_add_trace_sleeprq(void *ignore,
945 struct bio *bio, int rw) 940 struct bio *bio, int rw)
946{ 941{
947 if (bio) 942 if (bio)
948 blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ, 0, 943 blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ, 0);
949 blk_trace_bio_get_cgid(q, bio));
950 else { 944 else {
951 struct blk_trace *bt = q->blk_trace; 945 struct blk_trace *bt = q->blk_trace;
952 946
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 74b52dfd5852..84b2dc76f140 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -113,11 +113,23 @@ static const struct file_operations bdi_debug_stats_fops = {
113 .release = single_release, 113 .release = single_release,
114}; 114};
115 115
116static void bdi_debug_register(struct backing_dev_info *bdi, const char *name) 116static int bdi_debug_register(struct backing_dev_info *bdi, const char *name)
117{ 117{
118 if (!bdi_debug_root)
119 return -ENOMEM;
120
118 bdi->debug_dir = debugfs_create_dir(name, bdi_debug_root); 121 bdi->debug_dir = debugfs_create_dir(name, bdi_debug_root);
122 if (!bdi->debug_dir)
123 return -ENOMEM;
124
119 bdi->debug_stats = debugfs_create_file("stats", 0444, bdi->debug_dir, 125 bdi->debug_stats = debugfs_create_file("stats", 0444, bdi->debug_dir,
120 bdi, &bdi_debug_stats_fops); 126 bdi, &bdi_debug_stats_fops);
127 if (!bdi->debug_stats) {
128 debugfs_remove(bdi->debug_dir);
129 return -ENOMEM;
130 }
131
132 return 0;
121} 133}
122 134
123static void bdi_debug_unregister(struct backing_dev_info *bdi) 135static void bdi_debug_unregister(struct backing_dev_info *bdi)
@@ -129,9 +141,10 @@ static void bdi_debug_unregister(struct backing_dev_info *bdi)
129static inline void bdi_debug_init(void) 141static inline void bdi_debug_init(void)
130{ 142{
131} 143}
132static inline void bdi_debug_register(struct backing_dev_info *bdi, 144static inline int bdi_debug_register(struct backing_dev_info *bdi,
133 const char *name) 145 const char *name)
134{ 146{
147 return 0;
135} 148}
136static inline void bdi_debug_unregister(struct backing_dev_info *bdi) 149static inline void bdi_debug_unregister(struct backing_dev_info *bdi)
137{ 150{
@@ -869,10 +882,13 @@ int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, va_list args)
869 if (IS_ERR(dev)) 882 if (IS_ERR(dev))
870 return PTR_ERR(dev); 883 return PTR_ERR(dev);
871 884
885 if (bdi_debug_register(bdi, dev_name(dev))) {
886 device_destroy(bdi_class, dev->devt);
887 return -ENOMEM;
888 }
872 cgwb_bdi_register(bdi); 889 cgwb_bdi_register(bdi);
873 bdi->dev = dev; 890 bdi->dev = dev;
874 891
875 bdi_debug_register(bdi, dev_name(dev));
876 set_bit(WB_registered, &bdi->wb.state); 892 set_bit(WB_registered, &bdi->wb.state);
877 893
878 spin_lock_bh(&bdi_lock); 894 spin_lock_bh(&bdi_lock);