diff options
author | Ming Lei <ming.lei@redhat.com> | 2018-06-25 07:31:48 -0400 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2018-07-09 11:07:52 -0400 |
commit | 97889f9ac24f8d2fc8e703ea7f80c162bab10d4d (patch) | |
tree | f8280901685cfc59ae03e92ea91a29a567f3b7b6 /block/blk-mq-sched.c | |
parent | 5815839b3ca16bb1d45939270871169f6803a121 (diff) |
blk-mq: remove synchronize_rcu() from blk_mq_del_queue_tag_set()
We have to remove synchronize_rcu() from blk_queue_cleanup(),
otherwise long delay can be caused during lun probe. For removing
it, we have to avoid to iterate the set->tag_list in IO path, eg,
blk_mq_sched_restart().
This patch reverts 5b79413946d (Revert "blk-mq: don't handle
TAG_SHARED in restart"). Given we have fixed enough IO hang issue,
and there isn't any reason to restart all queues in one tags any more,
see the following reasons:
1) blk-mq core can deal with shared-tags case well via blk_mq_get_driver_tag(),
which can wake up queues waiting for driver tag.
2) SCSI is a bit special because it may return BLK_STS_RESOURCE if queue,
target or host is ready, but SCSI built-in restart can cover all these well,
see scsi_end_request(), queue will be rerun after any request initiated from
this host/target is completed.
In my test on scsi_debug(8 luns), this patch may improve IOPS by 20% ~ 30%
when running I/O on these 8 luns concurrently.
Fixes: 705cda97ee3a ("blk-mq: Make it safe to use RCU to iterate over blk_mq_tag_set.tag_list")
Cc: Omar Sandoval <osandov@fb.com>
Cc: Bart Van Assche <bart.vanassche@wdc.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: linux-scsi@vger.kernel.org
Reported-by: Andrew Jones <drjones@redhat.com>
Tested-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block/blk-mq-sched.c')
-rw-r--r-- | block/blk-mq-sched.c | 85 |
1 files changed, 5 insertions, 80 deletions
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 56c493c6cd90..4e027f6108ae 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c | |||
@@ -59,29 +59,16 @@ static void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx) | |||
59 | if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) | 59 | if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) |
60 | return; | 60 | return; |
61 | 61 | ||
62 | if (hctx->flags & BLK_MQ_F_TAG_SHARED) { | 62 | set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); |
63 | struct request_queue *q = hctx->queue; | ||
64 | |||
65 | if (!test_and_set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) | ||
66 | atomic_inc(&q->shared_hctx_restart); | ||
67 | } else | ||
68 | set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); | ||
69 | } | 63 | } |
70 | 64 | ||
71 | static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx) | 65 | void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx) |
72 | { | 66 | { |
73 | if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) | 67 | if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) |
74 | return false; | 68 | return; |
75 | 69 | clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); | |
76 | if (hctx->flags & BLK_MQ_F_TAG_SHARED) { | ||
77 | struct request_queue *q = hctx->queue; | ||
78 | |||
79 | if (test_and_clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) | ||
80 | atomic_dec(&q->shared_hctx_restart); | ||
81 | } else | ||
82 | clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); | ||
83 | 70 | ||
84 | return blk_mq_run_hw_queue(hctx, true); | 71 | blk_mq_run_hw_queue(hctx, true); |
85 | } | 72 | } |
86 | 73 | ||
87 | /* | 74 | /* |
@@ -380,68 +367,6 @@ static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx, | |||
380 | return false; | 367 | return false; |
381 | } | 368 | } |
382 | 369 | ||
383 | /** | ||
384 | * list_for_each_entry_rcu_rr - iterate in a round-robin fashion over rcu list | ||
385 | * @pos: loop cursor. | ||
386 | * @skip: the list element that will not be examined. Iteration starts at | ||
387 | * @skip->next. | ||
388 | * @head: head of the list to examine. This list must have at least one | ||
389 | * element, namely @skip. | ||
390 | * @member: name of the list_head structure within typeof(*pos). | ||
391 | */ | ||
392 | #define list_for_each_entry_rcu_rr(pos, skip, head, member) \ | ||
393 | for ((pos) = (skip); \ | ||
394 | (pos = (pos)->member.next != (head) ? list_entry_rcu( \ | ||
395 | (pos)->member.next, typeof(*pos), member) : \ | ||
396 | list_entry_rcu((pos)->member.next->next, typeof(*pos), member)), \ | ||
397 | (pos) != (skip); ) | ||
398 | |||
399 | /* | ||
400 | * Called after a driver tag has been freed to check whether a hctx needs to | ||
401 | * be restarted. Restarts @hctx if its tag set is not shared. Restarts hardware | ||
402 | * queues in a round-robin fashion if the tag set of @hctx is shared with other | ||
403 | * hardware queues. | ||
404 | */ | ||
405 | void blk_mq_sched_restart(struct blk_mq_hw_ctx *const hctx) | ||
406 | { | ||
407 | struct blk_mq_tags *const tags = hctx->tags; | ||
408 | struct blk_mq_tag_set *const set = hctx->queue->tag_set; | ||
409 | struct request_queue *const queue = hctx->queue, *q; | ||
410 | struct blk_mq_hw_ctx *hctx2; | ||
411 | unsigned int i, j; | ||
412 | |||
413 | if (set->flags & BLK_MQ_F_TAG_SHARED) { | ||
414 | /* | ||
415 | * If this is 0, then we know that no hardware queues | ||
416 | * have RESTART marked. We're done. | ||
417 | */ | ||
418 | if (!atomic_read(&queue->shared_hctx_restart)) | ||
419 | return; | ||
420 | |||
421 | rcu_read_lock(); | ||
422 | list_for_each_entry_rcu_rr(q, queue, &set->tag_list, | ||
423 | tag_set_list) { | ||
424 | queue_for_each_hw_ctx(q, hctx2, i) | ||
425 | if (hctx2->tags == tags && | ||
426 | blk_mq_sched_restart_hctx(hctx2)) | ||
427 | goto done; | ||
428 | } | ||
429 | j = hctx->queue_num + 1; | ||
430 | for (i = 0; i < queue->nr_hw_queues; i++, j++) { | ||
431 | if (j == queue->nr_hw_queues) | ||
432 | j = 0; | ||
433 | hctx2 = queue->queue_hw_ctx[j]; | ||
434 | if (hctx2->tags == tags && | ||
435 | blk_mq_sched_restart_hctx(hctx2)) | ||
436 | break; | ||
437 | } | ||
438 | done: | ||
439 | rcu_read_unlock(); | ||
440 | } else { | ||
441 | blk_mq_sched_restart_hctx(hctx); | ||
442 | } | ||
443 | } | ||
444 | |||
445 | void blk_mq_sched_insert_request(struct request *rq, bool at_head, | 370 | void blk_mq_sched_insert_request(struct request *rq, bool at_head, |
446 | bool run_queue, bool async) | 371 | bool run_queue, bool async) |
447 | { | 372 | { |