summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDamien Le Moal <damien.lemoal@wdc.com>2019-09-05 05:51:33 -0400
committerJens Axboe <axboe@kernel.dk>2019-09-05 21:52:34 -0400
commit737eb78e82d52d35df166d29af32bf61992de71d (patch)
tree88571fe3b3a82b62192a895d1e81b71af99bfa74
parenta0958ba7fcdc316e3900f8d2afda519850d60985 (diff)
block: Delay default elevator initialization
When elevator_init_mq() is called from blk_mq_init_allocated_queue(), the only information known about the device is the number of hardware queues as the block device scan by the device driver is not completed yet for most drivers. The device type and elevator required features are not set yet, preventing to correctly select the default elevator most suitable for the device. This currently affects all multi-queue zoned block devices which default to the "none" elevator instead of the required "mq-deadline" elevator. These drives currently include host-managed SMR disks connected to a smartpqi HBA and null_blk block devices with zoned mode enabled. Upcoming NVMe Zoned Namespace devices will also be affected. Fix this by adding the boolean elevator_init argument to blk_mq_init_allocated_queue() to control the execution of elevator_init_mq(). Two cases exist: 1) elevator_init = false is used for calls to blk_mq_init_allocated_queue() within blk_mq_init_queue(). In this case, a call to elevator_init_mq() is added to __device_add_disk(), resulting in the delayed initialization of the queue elevator after the device driver finished probing the device information. This effectively allows elevator_init_mq() access to more information about the device. 2) elevator_init = true preserves the current behavior of initializing the elevator directly from blk_mq_init_allocated_queue(). This case is used for the special request based DM devices where the device gendisk is created before the queue initialization and device information (e.g. queue limits) is already known when the queue initialization is executed. Additionally, to make sure that the elevator initialization is never done while requests are in-flight (there should be none when the device driver calls device_add_disk()), freeze and quiesce the device request queue before calling blk_mq_init_sched() in elevator_init_mq(). Reviewed-by: Ming Lei <ming.lei@redhat.com> Signed-off-by: Damien Le Moal <damien.lemoal@wdc.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--block/blk-mq.c12
-rw-r--r--block/elevator.c7
-rw-r--r--block/genhd.c9
-rw-r--r--drivers/md/dm-rq.c2
-rw-r--r--include/linux/blk-mq.h3
5 files changed, 28 insertions, 5 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c
index d10a7ab4207a..3647776a0f6e 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2695,7 +2695,11 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
2695 if (!uninit_q) 2695 if (!uninit_q)
2696 return ERR_PTR(-ENOMEM); 2696 return ERR_PTR(-ENOMEM);
2697 2697
2698 q = blk_mq_init_allocated_queue(set, uninit_q); 2698 /*
2699 * Initialize the queue without an elevator. device_add_disk() will do
2700 * the initialization.
2701 */
2702 q = blk_mq_init_allocated_queue(set, uninit_q, false);
2699 if (IS_ERR(q)) 2703 if (IS_ERR(q))
2700 blk_cleanup_queue(uninit_q); 2704 blk_cleanup_queue(uninit_q);
2701 2705
@@ -2846,7 +2850,8 @@ static unsigned int nr_hw_queues(struct blk_mq_tag_set *set)
2846} 2850}
2847 2851
2848struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, 2852struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
2849 struct request_queue *q) 2853 struct request_queue *q,
2854 bool elevator_init)
2850{ 2855{
2851 /* mark the queue as mq asap */ 2856 /* mark the queue as mq asap */
2852 q->mq_ops = set->ops; 2857 q->mq_ops = set->ops;
@@ -2908,7 +2913,8 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
2908 blk_mq_add_queue_tag_set(set, q); 2913 blk_mq_add_queue_tag_set(set, q);
2909 blk_mq_map_swqueue(q); 2914 blk_mq_map_swqueue(q);
2910 2915
2911 elevator_init_mq(q); 2916 if (elevator_init)
2917 elevator_init_mq(q);
2912 2918
2913 return q; 2919 return q;
2914 2920
diff --git a/block/elevator.c b/block/elevator.c
index 520d6b224b74..096a670d22d7 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -712,7 +712,14 @@ void elevator_init_mq(struct request_queue *q)
712 if (!e) 712 if (!e)
713 return; 713 return;
714 714
715 blk_mq_freeze_queue(q);
716 blk_mq_quiesce_queue(q);
717
715 err = blk_mq_init_sched(q, e); 718 err = blk_mq_init_sched(q, e);
719
720 blk_mq_unquiesce_queue(q);
721 blk_mq_unfreeze_queue(q);
722
716 if (err) { 723 if (err) {
717 pr_warn("\"%s\" elevator initialization failed, " 724 pr_warn("\"%s\" elevator initialization failed, "
718 "falling back to \"none\"\n", e->elevator_name); 725 "falling back to \"none\"\n", e->elevator_name);
diff --git a/block/genhd.c b/block/genhd.c
index 54f1f0d381f4..26b31fcae217 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -695,6 +695,15 @@ static void __device_add_disk(struct device *parent, struct gendisk *disk,
695 dev_t devt; 695 dev_t devt;
696 int retval; 696 int retval;
697 697
698 /*
699 * The disk queue should now be all set with enough information about
700 * the device for the elevator code to pick an adequate default
701 * elevator if one is needed, that is, for devices requesting queue
702 * registration.
703 */
704 if (register_queue)
705 elevator_init_mq(disk->queue);
706
698 /* minors == 0 indicates to use ext devt from part0 and should 707 /* minors == 0 indicates to use ext devt from part0 and should
699 * be accompanied with EXT_DEVT flag. Make sure all 708 * be accompanied with EXT_DEVT flag. Make sure all
700 * parameters make sense. 709 * parameters make sense.
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index 21d5c1784d0c..3f8577e2c13b 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -563,7 +563,7 @@ int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t)
563 if (err) 563 if (err)
564 goto out_kfree_tag_set; 564 goto out_kfree_tag_set;
565 565
566 q = blk_mq_init_allocated_queue(md->tag_set, md->queue); 566 q = blk_mq_init_allocated_queue(md->tag_set, md->queue, true);
567 if (IS_ERR(q)) { 567 if (IS_ERR(q)) {
568 err = PTR_ERR(q); 568 err = PTR_ERR(q);
569 goto out_tag_set; 569 goto out_tag_set;
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 62a3bb715899..0bf056de5cc3 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -248,7 +248,8 @@ enum {
248 248
249struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *); 249struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *);
250struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, 250struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
251 struct request_queue *q); 251 struct request_queue *q,
252 bool elevator_init);
252struct request_queue *blk_mq_init_sq_queue(struct blk_mq_tag_set *set, 253struct request_queue *blk_mq_init_sq_queue(struct blk_mq_tag_set *set,
253 const struct blk_mq_ops *ops, 254 const struct blk_mq_ops *ops,
254 unsigned int queue_depth, 255 unsigned int queue_depth,