aboutsummaryrefslogtreecommitdiffstats
path: root/block/blk-mq.c
diff options
context:
space:
mode:
Diffstat (limited to 'block/blk-mq.c')
-rw-r--r--block/blk-mq.c237
1 files changed, 102 insertions, 135 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 57039fcd9c93..883f72089015 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -73,8 +73,8 @@ static void blk_mq_hctx_mark_pending(struct blk_mq_hw_ctx *hctx,
73 set_bit(ctx->index_hw, hctx->ctx_map); 73 set_bit(ctx->index_hw, hctx->ctx_map);
74} 74}
75 75
76static struct request *blk_mq_alloc_rq(struct blk_mq_hw_ctx *hctx, gfp_t gfp, 76static struct request *__blk_mq_alloc_request(struct blk_mq_hw_ctx *hctx,
77 bool reserved) 77 gfp_t gfp, bool reserved)
78{ 78{
79 struct request *rq; 79 struct request *rq;
80 unsigned int tag; 80 unsigned int tag;
@@ -193,12 +193,6 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx,
193 ctx->rq_dispatched[rw_is_sync(rw_flags)]++; 193 ctx->rq_dispatched[rw_is_sync(rw_flags)]++;
194} 194}
195 195
196static struct request *__blk_mq_alloc_request(struct blk_mq_hw_ctx *hctx,
197 gfp_t gfp, bool reserved)
198{
199 return blk_mq_alloc_rq(hctx, gfp, reserved);
200}
201
202static struct request *blk_mq_alloc_request_pinned(struct request_queue *q, 196static struct request *blk_mq_alloc_request_pinned(struct request_queue *q,
203 int rw, gfp_t gfp, 197 int rw, gfp_t gfp,
204 bool reserved) 198 bool reserved)
@@ -226,15 +220,14 @@ static struct request *blk_mq_alloc_request_pinned(struct request_queue *q,
226 return rq; 220 return rq;
227} 221}
228 222
229struct request *blk_mq_alloc_request(struct request_queue *q, int rw, 223struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp)
230 gfp_t gfp, bool reserved)
231{ 224{
232 struct request *rq; 225 struct request *rq;
233 226
234 if (blk_mq_queue_enter(q)) 227 if (blk_mq_queue_enter(q))
235 return NULL; 228 return NULL;
236 229
237 rq = blk_mq_alloc_request_pinned(q, rw, gfp, reserved); 230 rq = blk_mq_alloc_request_pinned(q, rw, gfp, false);
238 if (rq) 231 if (rq)
239 blk_mq_put_ctx(rq->mq_ctx); 232 blk_mq_put_ctx(rq->mq_ctx);
240 return rq; 233 return rq;
@@ -258,7 +251,7 @@ EXPORT_SYMBOL(blk_mq_alloc_reserved_request);
258/* 251/*
259 * Re-init and set pdu, if we have it 252 * Re-init and set pdu, if we have it
260 */ 253 */
261static void blk_mq_rq_init(struct blk_mq_hw_ctx *hctx, struct request *rq) 254void blk_mq_rq_init(struct blk_mq_hw_ctx *hctx, struct request *rq)
262{ 255{
263 blk_rq_init(hctx->queue, rq); 256 blk_rq_init(hctx->queue, rq);
264 257
@@ -290,38 +283,10 @@ void blk_mq_free_request(struct request *rq)
290 __blk_mq_free_request(hctx, ctx, rq); 283 __blk_mq_free_request(hctx, ctx, rq);
291} 284}
292 285
293static void blk_mq_bio_endio(struct request *rq, struct bio *bio, int error) 286bool blk_mq_end_io_partial(struct request *rq, int error, unsigned int nr_bytes)
294{
295 if (error)
296 clear_bit(BIO_UPTODATE, &bio->bi_flags);
297 else if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
298 error = -EIO;
299
300 if (unlikely(rq->cmd_flags & REQ_QUIET))
301 set_bit(BIO_QUIET, &bio->bi_flags);
302
303 /* don't actually finish bio if it's part of flush sequence */
304 if (!(rq->cmd_flags & REQ_FLUSH_SEQ))
305 bio_endio(bio, error);
306}
307
308void blk_mq_complete_request(struct request *rq, int error)
309{ 287{
310 struct bio *bio = rq->bio; 288 if (blk_update_request(rq, error, blk_rq_bytes(rq)))
311 unsigned int bytes = 0; 289 return true;
312
313 trace_block_rq_complete(rq->q, rq);
314
315 while (bio) {
316 struct bio *next = bio->bi_next;
317
318 bio->bi_next = NULL;
319 bytes += bio->bi_iter.bi_size;
320 blk_mq_bio_endio(rq, bio, error);
321 bio = next;
322 }
323
324 blk_account_io_completion(rq, bytes);
325 290
326 blk_account_io_done(rq); 291 blk_account_io_done(rq);
327 292
@@ -329,49 +294,57 @@ void blk_mq_complete_request(struct request *rq, int error)
329 rq->end_io(rq, error); 294 rq->end_io(rq, error);
330 else 295 else
331 blk_mq_free_request(rq); 296 blk_mq_free_request(rq);
297 return false;
332} 298}
299EXPORT_SYMBOL(blk_mq_end_io_partial);
333 300
334void __blk_mq_end_io(struct request *rq, int error) 301static void __blk_mq_complete_request_remote(void *data)
335{
336 if (!blk_mark_rq_complete(rq))
337 blk_mq_complete_request(rq, error);
338}
339
340static void blk_mq_end_io_remote(void *data)
341{ 302{
342 struct request *rq = data; 303 struct request *rq = data;
343 304
344 __blk_mq_end_io(rq, rq->errors); 305 rq->q->softirq_done_fn(rq);
345} 306}
346 307
347/* 308void __blk_mq_complete_request(struct request *rq)
348 * End IO on this request on a multiqueue enabled driver. We'll either do
349 * it directly inline, or punt to a local IPI handler on the matching
350 * remote CPU.
351 */
352void blk_mq_end_io(struct request *rq, int error)
353{ 309{
354 struct blk_mq_ctx *ctx = rq->mq_ctx; 310 struct blk_mq_ctx *ctx = rq->mq_ctx;
355 int cpu; 311 int cpu;
356 312
357 if (!ctx->ipi_redirect) 313 if (!ctx->ipi_redirect) {
358 return __blk_mq_end_io(rq, error); 314 rq->q->softirq_done_fn(rq);
315 return;
316 }
359 317
360 cpu = get_cpu(); 318 cpu = get_cpu();
361 if (cpu != ctx->cpu && cpu_online(ctx->cpu)) { 319 if (cpu != ctx->cpu && cpu_online(ctx->cpu)) {
362 rq->errors = error; 320 rq->csd.func = __blk_mq_complete_request_remote;
363 rq->csd.func = blk_mq_end_io_remote;
364 rq->csd.info = rq; 321 rq->csd.info = rq;
365 rq->csd.flags = 0; 322 rq->csd.flags = 0;
366 __smp_call_function_single(ctx->cpu, &rq->csd, 0); 323 __smp_call_function_single(ctx->cpu, &rq->csd, 0);
367 } else { 324 } else {
368 __blk_mq_end_io(rq, error); 325 rq->q->softirq_done_fn(rq);
369 } 326 }
370 put_cpu(); 327 put_cpu();
371} 328}
372EXPORT_SYMBOL(blk_mq_end_io);
373 329
374static void blk_mq_start_request(struct request *rq) 330/**
331 * blk_mq_complete_request - end I/O on a request
332 * @rq: the request being processed
333 *
334 * Description:
335 * Ends all I/O on a request. It does not handle partial completions.
336 * The actual completion happens out-of-order, through a IPI handler.
337 **/
338void blk_mq_complete_request(struct request *rq)
339{
340 if (unlikely(blk_should_fake_timeout(rq->q)))
341 return;
342 if (!blk_mark_rq_complete(rq))
343 __blk_mq_complete_request(rq);
344}
345EXPORT_SYMBOL(blk_mq_complete_request);
346
347static void blk_mq_start_request(struct request *rq, bool last)
375{ 348{
376 struct request_queue *q = rq->q; 349 struct request_queue *q = rq->q;
377 350
@@ -384,6 +357,25 @@ static void blk_mq_start_request(struct request *rq)
384 */ 357 */
385 rq->deadline = jiffies + q->rq_timeout; 358 rq->deadline = jiffies + q->rq_timeout;
386 set_bit(REQ_ATOM_STARTED, &rq->atomic_flags); 359 set_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
360
361 if (q->dma_drain_size && blk_rq_bytes(rq)) {
362 /*
363 * Make sure space for the drain appears. We know we can do
364 * this because max_hw_segments has been adjusted to be one
365 * fewer than the device can handle.
366 */
367 rq->nr_phys_segments++;
368 }
369
370 /*
371 * Flag the last request in the series so that drivers know when IO
372 * should be kicked off, if they don't do it on a per-request basis.
373 *
374 * Note: the flag isn't the only condition drivers should do kick off.
375 * If drive is busy, the last request might not have the bit set.
376 */
377 if (last)
378 rq->cmd_flags |= REQ_END;
387} 379}
388 380
389static void blk_mq_requeue_request(struct request *rq) 381static void blk_mq_requeue_request(struct request *rq)
@@ -392,6 +384,11 @@ static void blk_mq_requeue_request(struct request *rq)
392 384
393 trace_block_rq_requeue(q, rq); 385 trace_block_rq_requeue(q, rq);
394 clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags); 386 clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
387
388 rq->cmd_flags &= ~REQ_END;
389
390 if (q->dma_drain_size && blk_rq_bytes(rq))
391 rq->nr_phys_segments--;
395} 392}
396 393
397struct blk_mq_timeout_data { 394struct blk_mq_timeout_data {
@@ -559,19 +556,8 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
559 556
560 rq = list_first_entry(&rq_list, struct request, queuelist); 557 rq = list_first_entry(&rq_list, struct request, queuelist);
561 list_del_init(&rq->queuelist); 558 list_del_init(&rq->queuelist);
562 blk_mq_start_request(rq);
563 559
564 /* 560 blk_mq_start_request(rq, list_empty(&rq_list));
565 * Last request in the series. Flag it as such, this
566 * enables drivers to know when IO should be kicked off,
567 * if they don't do it on a per-request basis.
568 *
569 * Note: the flag isn't the only condition drivers
570 * should do kick off. If drive is busy, the last
571 * request might not have the bit set.
572 */
573 if (list_empty(&rq_list))
574 rq->cmd_flags |= REQ_END;
575 561
576 ret = q->mq_ops->queue_rq(hctx, rq); 562 ret = q->mq_ops->queue_rq(hctx, rq);
577 switch (ret) { 563 switch (ret) {
@@ -589,8 +575,8 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
589 break; 575 break;
590 default: 576 default:
591 pr_err("blk-mq: bad return on queue: %d\n", ret); 577 pr_err("blk-mq: bad return on queue: %d\n", ret);
592 rq->errors = -EIO;
593 case BLK_MQ_RQ_QUEUE_ERROR: 578 case BLK_MQ_RQ_QUEUE_ERROR:
579 rq->errors = -EIO;
594 blk_mq_end_io(rq, rq->errors); 580 blk_mq_end_io(rq, rq->errors);
595 break; 581 break;
596 } 582 }
@@ -693,13 +679,16 @@ static void blk_mq_work_fn(struct work_struct *work)
693} 679}
694 680
695static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, 681static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx,
696 struct request *rq) 682 struct request *rq, bool at_head)
697{ 683{
698 struct blk_mq_ctx *ctx = rq->mq_ctx; 684 struct blk_mq_ctx *ctx = rq->mq_ctx;
699 685
700 trace_block_rq_insert(hctx->queue, rq); 686 trace_block_rq_insert(hctx->queue, rq);
701 687
702 list_add_tail(&rq->queuelist, &ctx->rq_list); 688 if (at_head)
689 list_add(&rq->queuelist, &ctx->rq_list);
690 else
691 list_add_tail(&rq->queuelist, &ctx->rq_list);
703 blk_mq_hctx_mark_pending(hctx, ctx); 692 blk_mq_hctx_mark_pending(hctx, ctx);
704 693
705 /* 694 /*
@@ -708,61 +697,28 @@ static void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx,
708 blk_mq_add_timer(rq); 697 blk_mq_add_timer(rq);
709} 698}
710 699
711void blk_mq_insert_request(struct request_queue *q, struct request *rq, 700void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue,
712 bool run_queue) 701 bool async)
713{ 702{
703 struct request_queue *q = rq->q;
714 struct blk_mq_hw_ctx *hctx; 704 struct blk_mq_hw_ctx *hctx;
715 struct blk_mq_ctx *ctx, *current_ctx; 705 struct blk_mq_ctx *ctx = rq->mq_ctx, *current_ctx;
706
707 current_ctx = blk_mq_get_ctx(q);
708 if (!cpu_online(ctx->cpu))
709 rq->mq_ctx = ctx = current_ctx;
716 710
717 ctx = rq->mq_ctx;
718 hctx = q->mq_ops->map_queue(q, ctx->cpu); 711 hctx = q->mq_ops->map_queue(q, ctx->cpu);
719 712
720 if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA)) { 713 if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA) &&
714 !(rq->cmd_flags & (REQ_FLUSH_SEQ))) {
721 blk_insert_flush(rq); 715 blk_insert_flush(rq);
722 } else { 716 } else {
723 current_ctx = blk_mq_get_ctx(q);
724
725 if (!cpu_online(ctx->cpu)) {
726 ctx = current_ctx;
727 hctx = q->mq_ops->map_queue(q, ctx->cpu);
728 rq->mq_ctx = ctx;
729 }
730 spin_lock(&ctx->lock); 717 spin_lock(&ctx->lock);
731 __blk_mq_insert_request(hctx, rq); 718 __blk_mq_insert_request(hctx, rq, at_head);
732 spin_unlock(&ctx->lock); 719 spin_unlock(&ctx->lock);
733
734 blk_mq_put_ctx(current_ctx);
735 } 720 }
736 721
737 if (run_queue)
738 __blk_mq_run_hw_queue(hctx);
739}
740EXPORT_SYMBOL(blk_mq_insert_request);
741
742/*
743 * This is a special version of blk_mq_insert_request to bypass FLUSH request
744 * check. Should only be used internally.
745 */
746void blk_mq_run_request(struct request *rq, bool run_queue, bool async)
747{
748 struct request_queue *q = rq->q;
749 struct blk_mq_hw_ctx *hctx;
750 struct blk_mq_ctx *ctx, *current_ctx;
751
752 current_ctx = blk_mq_get_ctx(q);
753
754 ctx = rq->mq_ctx;
755 if (!cpu_online(ctx->cpu)) {
756 ctx = current_ctx;
757 rq->mq_ctx = ctx;
758 }
759 hctx = q->mq_ops->map_queue(q, ctx->cpu);
760
761 /* ctx->cpu might be offline */
762 spin_lock(&ctx->lock);
763 __blk_mq_insert_request(hctx, rq);
764 spin_unlock(&ctx->lock);
765
766 blk_mq_put_ctx(current_ctx); 722 blk_mq_put_ctx(current_ctx);
767 723
768 if (run_queue) 724 if (run_queue)
@@ -798,7 +754,7 @@ static void blk_mq_insert_requests(struct request_queue *q,
798 rq = list_first_entry(list, struct request, queuelist); 754 rq = list_first_entry(list, struct request, queuelist);
799 list_del_init(&rq->queuelist); 755 list_del_init(&rq->queuelist);
800 rq->mq_ctx = ctx; 756 rq->mq_ctx = ctx;
801 __blk_mq_insert_request(hctx, rq); 757 __blk_mq_insert_request(hctx, rq, false);
802 } 758 }
803 spin_unlock(&ctx->lock); 759 spin_unlock(&ctx->lock);
804 760
@@ -888,6 +844,11 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
888 844
889 blk_queue_bounce(q, &bio); 845 blk_queue_bounce(q, &bio);
890 846
847 if (bio_integrity_enabled(bio) && bio_integrity_prep(bio)) {
848 bio_endio(bio, -EIO);
849 return;
850 }
851
891 if (use_plug && blk_attempt_plug_merge(q, bio, &request_count)) 852 if (use_plug && blk_attempt_plug_merge(q, bio, &request_count))
892 return; 853 return;
893 854
@@ -899,6 +860,8 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
899 ctx = blk_mq_get_ctx(q); 860 ctx = blk_mq_get_ctx(q);
900 hctx = q->mq_ops->map_queue(q, ctx->cpu); 861 hctx = q->mq_ops->map_queue(q, ctx->cpu);
901 862
863 if (is_sync)
864 rw |= REQ_SYNC;
902 trace_block_getrq(q, bio, rw); 865 trace_block_getrq(q, bio, rw);
903 rq = __blk_mq_alloc_request(hctx, GFP_ATOMIC, false); 866 rq = __blk_mq_alloc_request(hctx, GFP_ATOMIC, false);
904 if (likely(rq)) 867 if (likely(rq))
@@ -950,7 +913,7 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
950 __blk_mq_free_request(hctx, ctx, rq); 913 __blk_mq_free_request(hctx, ctx, rq);
951 else { 914 else {
952 blk_mq_bio_to_request(rq, bio); 915 blk_mq_bio_to_request(rq, bio);
953 __blk_mq_insert_request(hctx, rq); 916 __blk_mq_insert_request(hctx, rq, false);
954 } 917 }
955 918
956 spin_unlock(&ctx->lock); 919 spin_unlock(&ctx->lock);
@@ -1309,15 +1272,6 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg,
1309 reg->queue_depth = BLK_MQ_MAX_DEPTH; 1272 reg->queue_depth = BLK_MQ_MAX_DEPTH;
1310 } 1273 }
1311 1274
1312 /*
1313 * Set aside a tag for flush requests. It will only be used while
1314 * another flush request is in progress but outside the driver.
1315 *
1316 * TODO: only allocate if flushes are supported
1317 */
1318 reg->queue_depth++;
1319 reg->reserved_tags++;
1320
1321 if (reg->queue_depth < (reg->reserved_tags + BLK_MQ_TAG_MIN)) 1275 if (reg->queue_depth < (reg->reserved_tags + BLK_MQ_TAG_MIN))
1322 return ERR_PTR(-EINVAL); 1276 return ERR_PTR(-EINVAL);
1323 1277
@@ -1360,17 +1314,27 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg,
1360 q->mq_ops = reg->ops; 1314 q->mq_ops = reg->ops;
1361 q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT; 1315 q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT;
1362 1316
1317 q->sg_reserved_size = INT_MAX;
1318
1363 blk_queue_make_request(q, blk_mq_make_request); 1319 blk_queue_make_request(q, blk_mq_make_request);
1364 blk_queue_rq_timed_out(q, reg->ops->timeout); 1320 blk_queue_rq_timed_out(q, reg->ops->timeout);
1365 if (reg->timeout) 1321 if (reg->timeout)
1366 blk_queue_rq_timeout(q, reg->timeout); 1322 blk_queue_rq_timeout(q, reg->timeout);
1367 1323
1324 if (reg->ops->complete)
1325 blk_queue_softirq_done(q, reg->ops->complete);
1326
1368 blk_mq_init_flush(q); 1327 blk_mq_init_flush(q);
1369 blk_mq_init_cpu_queues(q, reg->nr_hw_queues); 1328 blk_mq_init_cpu_queues(q, reg->nr_hw_queues);
1370 1329
1371 if (blk_mq_init_hw_queues(q, reg, driver_data)) 1330 q->flush_rq = kzalloc(round_up(sizeof(struct request) + reg->cmd_size,
1331 cache_line_size()), GFP_KERNEL);
1332 if (!q->flush_rq)
1372 goto err_hw; 1333 goto err_hw;
1373 1334
1335 if (blk_mq_init_hw_queues(q, reg, driver_data))
1336 goto err_flush_rq;
1337
1374 blk_mq_map_swqueue(q); 1338 blk_mq_map_swqueue(q);
1375 1339
1376 mutex_lock(&all_q_mutex); 1340 mutex_lock(&all_q_mutex);
@@ -1378,6 +1342,9 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg,
1378 mutex_unlock(&all_q_mutex); 1342 mutex_unlock(&all_q_mutex);
1379 1343
1380 return q; 1344 return q;
1345
1346err_flush_rq:
1347 kfree(q->flush_rq);
1381err_hw: 1348err_hw:
1382 kfree(q->mq_map); 1349 kfree(q->mq_map);
1383err_map: 1350err_map: