summaryrefslogtreecommitdiffstats
path: root/block/blk-mq-sched.c
diff options
context:
space:
mode:
authorMing Lei <ming.lei@redhat.com>2018-06-25 07:31:48 -0400
committerJens Axboe <axboe@kernel.dk>2018-07-09 11:07:52 -0400
commit97889f9ac24f8d2fc8e703ea7f80c162bab10d4d (patch)
treef8280901685cfc59ae03e92ea91a29a567f3b7b6 /block/blk-mq-sched.c
parent5815839b3ca16bb1d45939270871169f6803a121 (diff)
blk-mq: remove synchronize_rcu() from blk_mq_del_queue_tag_set()
We have to remove synchronize_rcu() from blk_queue_cleanup(), otherwise long delay can be caused during lun probe. For removing it, we have to avoid to iterate the set->tag_list in IO path, eg, blk_mq_sched_restart(). This patch reverts 5b79413946d (Revert "blk-mq: don't handle TAG_SHARED in restart"). Given we have fixed enough IO hang issue, and there isn't any reason to restart all queues in one tags any more, see the following reasons: 1) blk-mq core can deal with shared-tags case well via blk_mq_get_driver_tag(), which can wake up queues waiting for driver tag. 2) SCSI is a bit special because it may return BLK_STS_RESOURCE if queue, target or host is ready, but SCSI built-in restart can cover all these well, see scsi_end_request(), queue will be rerun after any request initiated from this host/target is completed. In my test on scsi_debug(8 luns), this patch may improve IOPS by 20% ~ 30% when running I/O on these 8 luns concurrently. Fixes: 705cda97ee3a ("blk-mq: Make it safe to use RCU to iterate over blk_mq_tag_set.tag_list") Cc: Omar Sandoval <osandov@fb.com> Cc: Bart Van Assche <bart.vanassche@wdc.com> Cc: Christoph Hellwig <hch@lst.de> Cc: Martin K. Petersen <martin.petersen@oracle.com> Cc: linux-scsi@vger.kernel.org Reported-by: Andrew Jones <drjones@redhat.com> Tested-by: Andrew Jones <drjones@redhat.com> Signed-off-by: Ming Lei <ming.lei@redhat.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block/blk-mq-sched.c')
-rw-r--r--block/blk-mq-sched.c85
1 files changed, 5 insertions, 80 deletions
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 56c493c6cd90..4e027f6108ae 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -59,29 +59,16 @@ static void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
59 if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) 59 if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
60 return; 60 return;
61 61
62 if (hctx->flags & BLK_MQ_F_TAG_SHARED) { 62 set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
63 struct request_queue *q = hctx->queue;
64
65 if (!test_and_set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
66 atomic_inc(&q->shared_hctx_restart);
67 } else
68 set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
69} 63}
70 64
71static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx) 65void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx)
72{ 66{
73 if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) 67 if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
74 return false; 68 return;
75 69 clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
76 if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
77 struct request_queue *q = hctx->queue;
78
79 if (test_and_clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
80 atomic_dec(&q->shared_hctx_restart);
81 } else
82 clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
83 70
84 return blk_mq_run_hw_queue(hctx, true); 71 blk_mq_run_hw_queue(hctx, true);
85} 72}
86 73
87/* 74/*
@@ -380,68 +367,6 @@ static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
380 return false; 367 return false;
381} 368}
382 369
383/**
384 * list_for_each_entry_rcu_rr - iterate in a round-robin fashion over rcu list
385 * @pos: loop cursor.
386 * @skip: the list element that will not be examined. Iteration starts at
387 * @skip->next.
388 * @head: head of the list to examine. This list must have at least one
389 * element, namely @skip.
390 * @member: name of the list_head structure within typeof(*pos).
391 */
392#define list_for_each_entry_rcu_rr(pos, skip, head, member) \
393 for ((pos) = (skip); \
394 (pos = (pos)->member.next != (head) ? list_entry_rcu( \
395 (pos)->member.next, typeof(*pos), member) : \
396 list_entry_rcu((pos)->member.next->next, typeof(*pos), member)), \
397 (pos) != (skip); )
398
399/*
400 * Called after a driver tag has been freed to check whether a hctx needs to
401 * be restarted. Restarts @hctx if its tag set is not shared. Restarts hardware
402 * queues in a round-robin fashion if the tag set of @hctx is shared with other
403 * hardware queues.
404 */
405void blk_mq_sched_restart(struct blk_mq_hw_ctx *const hctx)
406{
407 struct blk_mq_tags *const tags = hctx->tags;
408 struct blk_mq_tag_set *const set = hctx->queue->tag_set;
409 struct request_queue *const queue = hctx->queue, *q;
410 struct blk_mq_hw_ctx *hctx2;
411 unsigned int i, j;
412
413 if (set->flags & BLK_MQ_F_TAG_SHARED) {
414 /*
415 * If this is 0, then we know that no hardware queues
416 * have RESTART marked. We're done.
417 */
418 if (!atomic_read(&queue->shared_hctx_restart))
419 return;
420
421 rcu_read_lock();
422 list_for_each_entry_rcu_rr(q, queue, &set->tag_list,
423 tag_set_list) {
424 queue_for_each_hw_ctx(q, hctx2, i)
425 if (hctx2->tags == tags &&
426 blk_mq_sched_restart_hctx(hctx2))
427 goto done;
428 }
429 j = hctx->queue_num + 1;
430 for (i = 0; i < queue->nr_hw_queues; i++, j++) {
431 if (j == queue->nr_hw_queues)
432 j = 0;
433 hctx2 = queue->queue_hw_ctx[j];
434 if (hctx2->tags == tags &&
435 blk_mq_sched_restart_hctx(hctx2))
436 break;
437 }
438done:
439 rcu_read_unlock();
440 } else {
441 blk_mq_sched_restart_hctx(hctx);
442 }
443}
444
445void blk_mq_sched_insert_request(struct request *rq, bool at_head, 370void blk_mq_sched_insert_request(struct request *rq, bool at_head,
446 bool run_queue, bool async) 371 bool run_queue, bool async)
447{ 372{