diff options
author | Ming Lei <ming.lei@redhat.com> | 2017-11-02 11:24:38 -0400 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2017-11-04 14:40:13 -0400 |
commit | 923218f6166a84688973acdc39094f3bee1e9ad4 (patch) | |
tree | 1013b8c39764532780292633f7e4214c99480aaf | |
parent | 244c65a3ccaa06fd15cc940315606674d3108b2f (diff) |
blk-mq: don't allocate driver tag upfront for flush rq
The idea behind it is simple:
1) for none scheduler, driver tag has to be borrowed for flush rq,
otherwise we may run out of tag, and that causes an IO hang. And
get/put driver tag is actually noop for none, so reordering tags
isn't necessary at all.
2) for a real I/O scheduler, we need not allocate a driver tag upfront
for flush rq. It works just fine to follow the same approach as
normal requests: allocate driver tag for each rq just before calling
->queue_rq().
One driver visible change is that the driver tag isn't shared in the
flush request sequence. That won't be a problem, since we always do that
in legacy path.
Then flush rq need not be treated specially wrt. get/put driver tag.
This cleans up the code - for instance, reorder_tags_to_front() can be
removed, and we needn't worry about request ordering in dispatch list
for avoiding I/O deadlock.
Also we have to put the driver tag before requeueing.
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r-- | block/blk-flush.c | 35 | ||||
-rw-r--r-- | block/blk-mq-sched.c | 42 | ||||
-rw-r--r-- | block/blk-mq.c | 41 |
3 files changed, 37 insertions, 81 deletions
diff --git a/block/blk-flush.c b/block/blk-flush.c index a9773d2075ac..f17170675917 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c | |||
@@ -231,8 +231,13 @@ static void flush_end_io(struct request *flush_rq, blk_status_t error) | |||
231 | /* release the tag's ownership to the req cloned from */ | 231 | /* release the tag's ownership to the req cloned from */ |
232 | spin_lock_irqsave(&fq->mq_flush_lock, flags); | 232 | spin_lock_irqsave(&fq->mq_flush_lock, flags); |
233 | hctx = blk_mq_map_queue(q, flush_rq->mq_ctx->cpu); | 233 | hctx = blk_mq_map_queue(q, flush_rq->mq_ctx->cpu); |
234 | blk_mq_tag_set_rq(hctx, flush_rq->tag, fq->orig_rq); | 234 | if (!q->elevator) { |
235 | flush_rq->tag = -1; | 235 | blk_mq_tag_set_rq(hctx, flush_rq->tag, fq->orig_rq); |
236 | flush_rq->tag = -1; | ||
237 | } else { | ||
238 | blk_mq_put_driver_tag_hctx(hctx, flush_rq); | ||
239 | flush_rq->internal_tag = -1; | ||
240 | } | ||
236 | } | 241 | } |
237 | 242 | ||
238 | running = &fq->flush_queue[fq->flush_running_idx]; | 243 | running = &fq->flush_queue[fq->flush_running_idx]; |
@@ -318,19 +323,26 @@ static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq) | |||
318 | blk_rq_init(q, flush_rq); | 323 | blk_rq_init(q, flush_rq); |
319 | 324 | ||
320 | /* | 325 | /* |
321 | * Borrow tag from the first request since they can't | 326 | * In case of none scheduler, borrow tag from the first request |
322 | * be in flight at the same time. And acquire the tag's | 327 | * since they can't be in flight at the same time. And acquire |
323 | * ownership for flush req. | 328 | * the tag's ownership for flush req. |
329 | * | ||
330 | * In case of IO scheduler, flush rq need to borrow scheduler tag | ||
331 | * just for cheating put/get driver tag. | ||
324 | */ | 332 | */ |
325 | if (q->mq_ops) { | 333 | if (q->mq_ops) { |
326 | struct blk_mq_hw_ctx *hctx; | 334 | struct blk_mq_hw_ctx *hctx; |
327 | 335 | ||
328 | flush_rq->mq_ctx = first_rq->mq_ctx; | 336 | flush_rq->mq_ctx = first_rq->mq_ctx; |
329 | flush_rq->tag = first_rq->tag; | ||
330 | fq->orig_rq = first_rq; | ||
331 | 337 | ||
332 | hctx = blk_mq_map_queue(q, first_rq->mq_ctx->cpu); | 338 | if (!q->elevator) { |
333 | blk_mq_tag_set_rq(hctx, first_rq->tag, flush_rq); | 339 | fq->orig_rq = first_rq; |
340 | flush_rq->tag = first_rq->tag; | ||
341 | hctx = blk_mq_map_queue(q, first_rq->mq_ctx->cpu); | ||
342 | blk_mq_tag_set_rq(hctx, first_rq->tag, flush_rq); | ||
343 | } else { | ||
344 | flush_rq->internal_tag = first_rq->internal_tag; | ||
345 | } | ||
334 | } | 346 | } |
335 | 347 | ||
336 | flush_rq->cmd_flags = REQ_OP_FLUSH | REQ_PREFLUSH; | 348 | flush_rq->cmd_flags = REQ_OP_FLUSH | REQ_PREFLUSH; |
@@ -394,6 +406,11 @@ static void mq_flush_data_end_io(struct request *rq, blk_status_t error) | |||
394 | 406 | ||
395 | hctx = blk_mq_map_queue(q, ctx->cpu); | 407 | hctx = blk_mq_map_queue(q, ctx->cpu); |
396 | 408 | ||
409 | if (q->elevator) { | ||
410 | WARN_ON(rq->tag < 0); | ||
411 | blk_mq_put_driver_tag_hctx(hctx, rq); | ||
412 | } | ||
413 | |||
397 | /* | 414 | /* |
398 | * After populating an empty queue, kick it to avoid stall. Read | 415 | * After populating an empty queue, kick it to avoid stall. Read |
399 | * the comment in flush_end_io(). | 416 | * the comment in flush_end_io(). |
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index e7094f44afaf..01a43fed6b8c 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c | |||
@@ -356,29 +356,12 @@ static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx, | |||
356 | return true; | 356 | return true; |
357 | } | 357 | } |
358 | 358 | ||
359 | if (has_sched) { | 359 | if (has_sched) |
360 | rq->rq_flags |= RQF_SORTED; | 360 | rq->rq_flags |= RQF_SORTED; |
361 | WARN_ON(rq->tag != -1); | ||
362 | } | ||
363 | 361 | ||
364 | return false; | 362 | return false; |
365 | } | 363 | } |
366 | 364 | ||
367 | /* | ||
368 | * Add flush/fua to the queue. If we fail getting a driver tag, then | ||
369 | * punt to the requeue list. Requeue will re-invoke us from a context | ||
370 | * that's safe to block from. | ||
371 | */ | ||
372 | static void blk_mq_sched_insert_flush(struct blk_mq_hw_ctx *hctx, | ||
373 | struct request *rq, bool can_block) | ||
374 | { | ||
375 | if (blk_mq_get_driver_tag(rq, &hctx, can_block)) { | ||
376 | blk_insert_flush(rq); | ||
377 | blk_mq_run_hw_queue(hctx, true); | ||
378 | } else | ||
379 | blk_mq_add_to_requeue_list(rq, false, true); | ||
380 | } | ||
381 | |||
382 | void blk_mq_sched_insert_request(struct request *rq, bool at_head, | 365 | void blk_mq_sched_insert_request(struct request *rq, bool at_head, |
383 | bool run_queue, bool async, bool can_block) | 366 | bool run_queue, bool async, bool can_block) |
384 | { | 367 | { |
@@ -389,10 +372,12 @@ void blk_mq_sched_insert_request(struct request *rq, bool at_head, | |||
389 | 372 | ||
390 | /* flush rq in flush machinery need to be dispatched directly */ | 373 | /* flush rq in flush machinery need to be dispatched directly */ |
391 | if (!(rq->rq_flags & RQF_FLUSH_SEQ) && op_is_flush(rq->cmd_flags)) { | 374 | if (!(rq->rq_flags & RQF_FLUSH_SEQ) && op_is_flush(rq->cmd_flags)) { |
392 | blk_mq_sched_insert_flush(hctx, rq, can_block); | 375 | blk_insert_flush(rq); |
393 | return; | 376 | goto run; |
394 | } | 377 | } |
395 | 378 | ||
379 | WARN_ON(e && (rq->tag != -1)); | ||
380 | |||
396 | if (blk_mq_sched_bypass_insert(hctx, !!e, rq)) | 381 | if (blk_mq_sched_bypass_insert(hctx, !!e, rq)) |
397 | goto run; | 382 | goto run; |
398 | 383 | ||
@@ -419,23 +404,6 @@ void blk_mq_sched_insert_requests(struct request_queue *q, | |||
419 | struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu); | 404 | struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, ctx->cpu); |
420 | struct elevator_queue *e = hctx->queue->elevator; | 405 | struct elevator_queue *e = hctx->queue->elevator; |
421 | 406 | ||
422 | if (e) { | ||
423 | struct request *rq, *next; | ||
424 | |||
425 | /* | ||
426 | * We bypass requests that already have a driver tag assigned, | ||
427 | * which should only be flushes. Flushes are only ever inserted | ||
428 | * as single requests, so we shouldn't ever hit the | ||
429 | * WARN_ON_ONCE() below (but let's handle it just in case). | ||
430 | */ | ||
431 | list_for_each_entry_safe(rq, next, list, queuelist) { | ||
432 | if (WARN_ON_ONCE(rq->tag != -1)) { | ||
433 | list_del_init(&rq->queuelist); | ||
434 | blk_mq_sched_bypass_insert(hctx, true, rq); | ||
435 | } | ||
436 | } | ||
437 | } | ||
438 | |||
439 | if (e && e->type->ops.mq.insert_requests) | 407 | if (e && e->type->ops.mq.insert_requests) |
440 | e->type->ops.mq.insert_requests(hctx, list, false); | 408 | e->type->ops.mq.insert_requests(hctx, list, false); |
441 | else | 409 | else |
diff --git a/block/blk-mq.c b/block/blk-mq.c index 14f6886fbec8..c501cbd0de93 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c | |||
@@ -653,6 +653,8 @@ static void __blk_mq_requeue_request(struct request *rq) | |||
653 | { | 653 | { |
654 | struct request_queue *q = rq->q; | 654 | struct request_queue *q = rq->q; |
655 | 655 | ||
656 | blk_mq_put_driver_tag(rq); | ||
657 | |||
656 | trace_block_rq_requeue(q, rq); | 658 | trace_block_rq_requeue(q, rq); |
657 | wbt_requeue(q->rq_wb, &rq->issue_stat); | 659 | wbt_requeue(q->rq_wb, &rq->issue_stat); |
658 | blk_mq_sched_requeue_request(rq); | 660 | blk_mq_sched_requeue_request(rq); |
@@ -996,30 +998,6 @@ done: | |||
996 | return rq->tag != -1; | 998 | return rq->tag != -1; |
997 | } | 999 | } |
998 | 1000 | ||
999 | /* | ||
1000 | * If we fail getting a driver tag because all the driver tags are already | ||
1001 | * assigned and on the dispatch list, BUT the first entry does not have a | ||
1002 | * tag, then we could deadlock. For that case, move entries with assigned | ||
1003 | * driver tags to the front, leaving the set of tagged requests in the | ||
1004 | * same order, and the untagged set in the same order. | ||
1005 | */ | ||
1006 | static bool reorder_tags_to_front(struct list_head *list) | ||
1007 | { | ||
1008 | struct request *rq, *tmp, *first = NULL; | ||
1009 | |||
1010 | list_for_each_entry_safe_reverse(rq, tmp, list, queuelist) { | ||
1011 | if (rq == first) | ||
1012 | break; | ||
1013 | if (rq->tag != -1) { | ||
1014 | list_move(&rq->queuelist, list); | ||
1015 | if (!first) | ||
1016 | first = rq; | ||
1017 | } | ||
1018 | } | ||
1019 | |||
1020 | return first != NULL; | ||
1021 | } | ||
1022 | |||
1023 | static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode, int flags, | 1001 | static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode, int flags, |
1024 | void *key) | 1002 | void *key) |
1025 | { | 1003 | { |
@@ -1080,9 +1058,6 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, | |||
1080 | 1058 | ||
1081 | rq = list_first_entry(list, struct request, queuelist); | 1059 | rq = list_first_entry(list, struct request, queuelist); |
1082 | if (!blk_mq_get_driver_tag(rq, &hctx, false)) { | 1060 | if (!blk_mq_get_driver_tag(rq, &hctx, false)) { |
1083 | if (!queued && reorder_tags_to_front(list)) | ||
1084 | continue; | ||
1085 | |||
1086 | /* | 1061 | /* |
1087 | * The initial allocation attempt failed, so we need to | 1062 | * The initial allocation attempt failed, so we need to |
1088 | * rerun the hardware queue when a tag is freed. | 1063 | * rerun the hardware queue when a tag is freed. |
@@ -1133,7 +1108,6 @@ bool blk_mq_dispatch_rq_list(struct request_queue *q, struct list_head *list, | |||
1133 | nxt = list_first_entry(list, struct request, queuelist); | 1108 | nxt = list_first_entry(list, struct request, queuelist); |
1134 | blk_mq_put_driver_tag(nxt); | 1109 | blk_mq_put_driver_tag(nxt); |
1135 | } | 1110 | } |
1136 | blk_mq_put_driver_tag_hctx(hctx, rq); | ||
1137 | list_add(&rq->queuelist, list); | 1111 | list_add(&rq->queuelist, list); |
1138 | __blk_mq_requeue_request(rq); | 1112 | __blk_mq_requeue_request(rq); |
1139 | break; | 1113 | break; |
@@ -1698,13 +1672,10 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio) | |||
1698 | if (unlikely(is_flush_fua)) { | 1672 | if (unlikely(is_flush_fua)) { |
1699 | blk_mq_put_ctx(data.ctx); | 1673 | blk_mq_put_ctx(data.ctx); |
1700 | blk_mq_bio_to_request(rq, bio); | 1674 | blk_mq_bio_to_request(rq, bio); |
1701 | if (q->elevator) { | 1675 | |
1702 | blk_mq_sched_insert_request(rq, false, true, true, | 1676 | /* bypass scheduler for flush rq */ |
1703 | true); | 1677 | blk_insert_flush(rq); |
1704 | } else { | 1678 | blk_mq_run_hw_queue(data.hctx, true); |
1705 | blk_insert_flush(rq); | ||
1706 | blk_mq_run_hw_queue(data.hctx, true); | ||
1707 | } | ||
1708 | } else if (plug && q->nr_hw_queues == 1) { | 1679 | } else if (plug && q->nr_hw_queues == 1) { |
1709 | struct request *last = NULL; | 1680 | struct request *last = NULL; |
1710 | 1681 | ||