aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaolo Valente <paolo.valente@linaro.org>2018-02-07 16:19:20 -0500
committerJens Axboe <axboe@kernel.dk>2018-02-07 17:17:46 -0500
commita7877390614770965a6925dfed79cbd3eeeb61e0 (patch)
tree4e6acea38e6ad42d624db7954e2d573dffcc2aef
parent73ac105be390c1de42a2f21643c9778a5e002930 (diff)
block, bfq: add requeue-request hook
Commit 'a6a252e64914 ("blk-mq-sched: decide how to handle flush rq via RQF_FLUSH_SEQ")' makes all non-flush re-prepared requests for a device be re-inserted into the active I/O scheduler for that device. As a consequence, I/O schedulers may get the same request inserted again, even several times, without a finish_request invoked on that request before each re-insertion. This fact is the cause of the failure reported in [1]. For an I/O scheduler, every re-insertion of the same re-prepared request is equivalent to the insertion of a new request. For schedulers like mq-deadline or kyber, this fact causes no harm. In contrast, it confuses a stateful scheduler like BFQ, which keeps state for an I/O request, until the finish_request hook is invoked on the request. In particular, BFQ may get stuck, waiting forever for the number of request dispatches, of the same request, to be balanced by an equal number of request completions (while there will be one completion for that request). In this state, BFQ may refuse to serve I/O requests from other bfq_queues. The hang reported in [1] then follows. However, the above re-prepared requests undergo a requeue, thus the requeue_request hook of the active elevator is invoked for these requests, if set. This commit then addresses the above issue by properly implementing the hook requeue_request in BFQ. [1] https://marc.info/?l=linux-block&m=151211117608676 Reported-by: Ivan Kozik <ivan@ludios.org> Reported-by: Alban Browaeys <alban.browaeys@gmail.com> Tested-by: Mike Galbraith <efault@gmx.de> Signed-off-by: Paolo Valente <paolo.valente@linaro.org> Signed-off-by: Serena Ziviani <ziviani.serena@gmail.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--block/bfq-iosched.c107
1 files changed, 82 insertions, 25 deletions
diff --git a/block/bfq-iosched.c b/block/bfq-iosched.c
index 47e6ec7427c4..aeca22d91101 100644
--- a/block/bfq-iosched.c
+++ b/block/bfq-iosched.c
@@ -3823,24 +3823,26 @@ static struct request *__bfq_dispatch_request(struct blk_mq_hw_ctx *hctx)
3823 } 3823 }
3824 3824
3825 /* 3825 /*
3826 * We exploit the bfq_finish_request hook to decrement 3826 * We exploit the bfq_finish_requeue_request hook to
3827 * rq_in_driver, but bfq_finish_request will not be 3827 * decrement rq_in_driver, but
3828 * invoked on this request. So, to avoid unbalance, 3828 * bfq_finish_requeue_request will not be invoked on
3829 * just start this request, without incrementing 3829 * this request. So, to avoid unbalance, just start
3830 * rq_in_driver. As a negative consequence, 3830 * this request, without incrementing rq_in_driver. As
3831 * rq_in_driver is deceptively lower than it should be 3831 * a negative consequence, rq_in_driver is deceptively
3832 * while this request is in service. This may cause 3832 * lower than it should be while this request is in
3833 * bfq_schedule_dispatch to be invoked uselessly. 3833 * service. This may cause bfq_schedule_dispatch to be
3834 * invoked uselessly.
3834 * 3835 *
3835 * As for implementing an exact solution, the 3836 * As for implementing an exact solution, the
3836 * bfq_finish_request hook, if defined, is probably 3837 * bfq_finish_requeue_request hook, if defined, is
3837 * invoked also on this request. So, by exploiting 3838 * probably invoked also on this request. So, by
3838 * this hook, we could 1) increment rq_in_driver here, 3839 * exploiting this hook, we could 1) increment
3839 * and 2) decrement it in bfq_finish_request. Such a 3840 * rq_in_driver here, and 2) decrement it in
3840 * solution would let the value of the counter be 3841 * bfq_finish_requeue_request. Such a solution would
3841 * always accurate, but it would entail using an extra 3842 * let the value of the counter be always accurate,
3842 * interface function. This cost seems higher than the 3843 * but it would entail using an extra interface
3843 * benefit, being the frequency of non-elevator-private 3844 * function. This cost seems higher than the benefit,
3845 * being the frequency of non-elevator-private
3844 * requests very low. 3846 * requests very low.
3845 */ 3847 */
3846 goto start_rq; 3848 goto start_rq;
@@ -4515,6 +4517,8 @@ static inline void bfq_update_insert_stats(struct request_queue *q,
4515 unsigned int cmd_flags) {} 4517 unsigned int cmd_flags) {}
4516#endif 4518#endif
4517 4519
4520static void bfq_prepare_request(struct request *rq, struct bio *bio);
4521
4518static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, 4522static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
4519 bool at_head) 4523 bool at_head)
4520{ 4524{
@@ -4541,6 +4545,18 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
4541 else 4545 else
4542 list_add_tail(&rq->queuelist, &bfqd->dispatch); 4546 list_add_tail(&rq->queuelist, &bfqd->dispatch);
4543 } else { 4547 } else {
4548 if (WARN_ON_ONCE(!bfqq)) {
4549 /*
4550 * This should never happen. Most likely rq is
4551 * a requeued regular request, being
4552 * re-inserted without being first
4553 * re-prepared. Do a prepare, to avoid
4554 * failure.
4555 */
4556 bfq_prepare_request(rq, rq->bio);
4557 bfqq = RQ_BFQQ(rq);
4558 }
4559
4544 idle_timer_disabled = __bfq_insert_request(bfqd, rq); 4560 idle_timer_disabled = __bfq_insert_request(bfqd, rq);
4545 /* 4561 /*
4546 * Update bfqq, because, if a queue merge has occurred 4562 * Update bfqq, because, if a queue merge has occurred
@@ -4697,22 +4713,44 @@ static void bfq_completed_request(struct bfq_queue *bfqq, struct bfq_data *bfqd)
4697 bfq_schedule_dispatch(bfqd); 4713 bfq_schedule_dispatch(bfqd);
4698} 4714}
4699 4715
4700static void bfq_finish_request_body(struct bfq_queue *bfqq) 4716static void bfq_finish_requeue_request_body(struct bfq_queue *bfqq)
4701{ 4717{
4702 bfqq->allocated--; 4718 bfqq->allocated--;
4703 4719
4704 bfq_put_queue(bfqq); 4720 bfq_put_queue(bfqq);
4705} 4721}
4706 4722
4707static void bfq_finish_request(struct request *rq) 4723/*
4724 * Handle either a requeue or a finish for rq. The things to do are
4725 * the same in both cases: all references to rq are to be dropped. In
4726 * particular, rq is considered completed from the point of view of
4727 * the scheduler.
4728 */
4729static void bfq_finish_requeue_request(struct request *rq)
4708{ 4730{
4709 struct bfq_queue *bfqq; 4731 struct bfq_queue *bfqq = RQ_BFQQ(rq);
4710 struct bfq_data *bfqd; 4732 struct bfq_data *bfqd;
4711 4733
4712 if (!rq->elv.icq) 4734 /*
4735 * Requeue and finish hooks are invoked in blk-mq without
4736 * checking whether the involved request is actually still
4737 * referenced in the scheduler. To handle this fact, the
4738 * following two checks make this function exit in case of
4739 * spurious invocations, for which there is nothing to do.
4740 *
4741 * First, check whether rq has nothing to do with an elevator.
4742 */
4743 if (unlikely(!(rq->rq_flags & RQF_ELVPRIV)))
4744 return;
4745
4746 /*
4747 * rq either is not associated with any icq, or is an already
4748 * requeued request that has not (yet) been re-inserted into
4749 * a bfq_queue.
4750 */
4751 if (!rq->elv.icq || !bfqq)
4713 return; 4752 return;
4714 4753
4715 bfqq = RQ_BFQQ(rq);
4716 bfqd = bfqq->bfqd; 4754 bfqd = bfqq->bfqd;
4717 4755
4718 if (rq->rq_flags & RQF_STARTED) 4756 if (rq->rq_flags & RQF_STARTED)
@@ -4727,13 +4765,14 @@ static void bfq_finish_request(struct request *rq)
4727 spin_lock_irqsave(&bfqd->lock, flags); 4765 spin_lock_irqsave(&bfqd->lock, flags);
4728 4766
4729 bfq_completed_request(bfqq, bfqd); 4767 bfq_completed_request(bfqq, bfqd);
4730 bfq_finish_request_body(bfqq); 4768 bfq_finish_requeue_request_body(bfqq);
4731 4769
4732 spin_unlock_irqrestore(&bfqd->lock, flags); 4770 spin_unlock_irqrestore(&bfqd->lock, flags);
4733 } else { 4771 } else {
4734 /* 4772 /*
4735 * Request rq may be still/already in the scheduler, 4773 * Request rq may be still/already in the scheduler,
4736 * in which case we need to remove it. And we cannot 4774 * in which case we need to remove it (this should
4775 * never happen in case of requeue). And we cannot
4737 * defer such a check and removal, to avoid 4776 * defer such a check and removal, to avoid
4738 * inconsistencies in the time interval from the end 4777 * inconsistencies in the time interval from the end
4739 * of this function to the start of the deferred work. 4778 * of this function to the start of the deferred work.
@@ -4748,9 +4787,26 @@ static void bfq_finish_request(struct request *rq)
4748 bfqg_stats_update_io_remove(bfqq_group(bfqq), 4787 bfqg_stats_update_io_remove(bfqq_group(bfqq),
4749 rq->cmd_flags); 4788 rq->cmd_flags);
4750 } 4789 }
4751 bfq_finish_request_body(bfqq); 4790 bfq_finish_requeue_request_body(bfqq);
4752 } 4791 }
4753 4792
4793 /*
4794 * Reset private fields. In case of a requeue, this allows
4795 * this function to correctly do nothing if it is spuriously
4796 * invoked again on this same request (see the check at the
4797 * beginning of the function). Probably, a better general
4798 * design would be to prevent blk-mq from invoking the requeue
4799 * or finish hooks of an elevator, for a request that is not
4800 * referred by that elevator.
4801 *
4802 * Resetting the following fields would break the
4803 * request-insertion logic if rq is re-inserted into a bfq
4804 * internal queue, without a re-preparation. Here we assume
4805 * that re-insertions of requeued requests, without
4806 * re-preparation, can happen only for pass_through or at_head
4807 * requests (which are not re-inserted into bfq internal
4808 * queues).
4809 */
4754 rq->elv.priv[0] = NULL; 4810 rq->elv.priv[0] = NULL;
4755 rq->elv.priv[1] = NULL; 4811 rq->elv.priv[1] = NULL;
4756} 4812}
@@ -5426,7 +5482,8 @@ static struct elevator_type iosched_bfq_mq = {
5426 .ops.mq = { 5482 .ops.mq = {
5427 .limit_depth = bfq_limit_depth, 5483 .limit_depth = bfq_limit_depth,
5428 .prepare_request = bfq_prepare_request, 5484 .prepare_request = bfq_prepare_request,
5429 .finish_request = bfq_finish_request, 5485 .requeue_request = bfq_finish_requeue_request,
5486 .finish_request = bfq_finish_requeue_request,
5430 .exit_icq = bfq_exit_icq, 5487 .exit_icq = bfq_exit_icq,
5431 .insert_requests = bfq_insert_requests, 5488 .insert_requests = bfq_insert_requests,
5432 .dispatch_request = bfq_dispatch_request, 5489 .dispatch_request = bfq_dispatch_request,