aboutsummaryrefslogtreecommitdiffstats
path: root/block/blk-mq.c
diff options
context:
space:
mode:
authorMing Lei <ming.lei@canonical.com>2014-09-25 11:23:47 -0400
committerJens Axboe <axboe@fb.com>2014-09-25 17:22:45 -0400
commitf70ced09170761acb69840cafaace4abc72cba4b (patch)
treebc62f5926a5e8b74be30316196a41b25ece12368 /block/blk-mq.c
parente97c293cdf77263abdc021de280516e0017afc84 (diff)
blk-mq: support per-distpatch_queue flush machinery
This patch supports to run one single flush machinery for each blk-mq dispatch queue, so that: - current init_request and exit_request callbacks can cover flush request too, then the buggy copying way of initializing flush request's pdu can be fixed - flushing performance gets improved in case of multi hw-queue In fio sync write test over virtio-blk(4 hw queues, ioengine=sync, iodepth=64, numjobs=4, bs=4K), it is observed that througput gets increased a lot over my test environment: - throughput: +70% in case of virtio-blk over null_blk - throughput: +30% in case of virtio-blk over SSD image The multi virtqueue feature isn't merged to QEMU yet, and patches for the feature can be found in below tree: git://kernel.ubuntu.com/ming/qemu.git v2.1.0-mq.4 And simply passing 'num_queues=4 vectors=5' should be enough to enable multi queue(quad queue) feature for QEMU virtio-blk. Suggested-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Ming Lei <ming.lei@canonical.com> Signed-off-by: Jens Axboe <axboe@fb.com>
Diffstat (limited to 'block/blk-mq.c')
-rw-r--r--block/blk-mq.c50
1 files changed, 24 insertions, 26 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 53b6def12fc4..4e7a31466139 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -281,26 +281,6 @@ void blk_mq_free_request(struct request *rq)
281 __blk_mq_free_request(hctx, ctx, rq); 281 __blk_mq_free_request(hctx, ctx, rq);
282} 282}
283 283
284/*
285 * Clone all relevant state from a request that has been put on hold in
286 * the flush state machine into the preallocated flush request that hangs
287 * off the request queue.
288 *
289 * For a driver the flush request should be invisible, that's why we are
290 * impersonating the original request here.
291 */
292void blk_mq_clone_flush_request(struct request *flush_rq,
293 struct request *orig_rq)
294{
295 struct blk_mq_hw_ctx *hctx =
296 orig_rq->q->mq_ops->map_queue(orig_rq->q, orig_rq->mq_ctx->cpu);
297
298 flush_rq->mq_ctx = orig_rq->mq_ctx;
299 flush_rq->tag = orig_rq->tag;
300 memcpy(blk_mq_rq_to_pdu(flush_rq), blk_mq_rq_to_pdu(orig_rq),
301 hctx->cmd_size);
302}
303
304inline void __blk_mq_end_request(struct request *rq, int error) 284inline void __blk_mq_end_request(struct request *rq, int error)
305{ 285{
306 blk_account_io_done(rq); 286 blk_account_io_done(rq);
@@ -1516,12 +1496,20 @@ static void blk_mq_exit_hctx(struct request_queue *q,
1516 struct blk_mq_tag_set *set, 1496 struct blk_mq_tag_set *set,
1517 struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx) 1497 struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
1518{ 1498{
1499 unsigned flush_start_tag = set->queue_depth;
1500
1519 blk_mq_tag_idle(hctx); 1501 blk_mq_tag_idle(hctx);
1520 1502
1503 if (set->ops->exit_request)
1504 set->ops->exit_request(set->driver_data,
1505 hctx->fq->flush_rq, hctx_idx,
1506 flush_start_tag + hctx_idx);
1507
1521 if (set->ops->exit_hctx) 1508 if (set->ops->exit_hctx)
1522 set->ops->exit_hctx(hctx, hctx_idx); 1509 set->ops->exit_hctx(hctx, hctx_idx);
1523 1510
1524 blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier); 1511 blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier);
1512 blk_free_flush_queue(hctx->fq);
1525 kfree(hctx->ctxs); 1513 kfree(hctx->ctxs);
1526 blk_mq_free_bitmap(&hctx->ctx_map); 1514 blk_mq_free_bitmap(&hctx->ctx_map);
1527} 1515}
@@ -1556,6 +1544,7 @@ static int blk_mq_init_hctx(struct request_queue *q,
1556 struct blk_mq_hw_ctx *hctx, unsigned hctx_idx) 1544 struct blk_mq_hw_ctx *hctx, unsigned hctx_idx)
1557{ 1545{
1558 int node; 1546 int node;
1547 unsigned flush_start_tag = set->queue_depth;
1559 1548
1560 node = hctx->numa_node; 1549 node = hctx->numa_node;
1561 if (node == NUMA_NO_NODE) 1550 if (node == NUMA_NO_NODE)
@@ -1594,8 +1583,23 @@ static int blk_mq_init_hctx(struct request_queue *q,
1594 set->ops->init_hctx(hctx, set->driver_data, hctx_idx)) 1583 set->ops->init_hctx(hctx, set->driver_data, hctx_idx))
1595 goto free_bitmap; 1584 goto free_bitmap;
1596 1585
1586 hctx->fq = blk_alloc_flush_queue(q, hctx->numa_node, set->cmd_size);
1587 if (!hctx->fq)
1588 goto exit_hctx;
1589
1590 if (set->ops->init_request &&
1591 set->ops->init_request(set->driver_data,
1592 hctx->fq->flush_rq, hctx_idx,
1593 flush_start_tag + hctx_idx, node))
1594 goto free_fq;
1595
1597 return 0; 1596 return 0;
1598 1597
1598 free_fq:
1599 kfree(hctx->fq);
1600 exit_hctx:
1601 if (set->ops->exit_hctx)
1602 set->ops->exit_hctx(hctx, hctx_idx);
1599 free_bitmap: 1603 free_bitmap:
1600 blk_mq_free_bitmap(&hctx->ctx_map); 1604 blk_mq_free_bitmap(&hctx->ctx_map);
1601 free_ctxs: 1605 free_ctxs:
@@ -1862,16 +1866,10 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
1862 1866
1863 blk_mq_add_queue_tag_set(set, q); 1867 blk_mq_add_queue_tag_set(set, q);
1864 1868
1865 q->fq = blk_alloc_flush_queue(q);
1866 if (!q->fq)
1867 goto err_hw_queues;
1868
1869 blk_mq_map_swqueue(q); 1869 blk_mq_map_swqueue(q);
1870 1870
1871 return q; 1871 return q;
1872 1872
1873err_hw_queues:
1874 blk_mq_exit_hw_queues(q, set, set->nr_hw_queues);
1875err_hw: 1873err_hw:
1876 blk_cleanup_queue(q); 1874 blk_cleanup_queue(q);
1877err_hctxs: 1875err_hctxs: