diff options
author | Chong Yuan <chong.yuan@memblaze.com> | 2015-04-15 13:39:29 -0400 |
---|---|---|
committer | Jens Axboe <axboe@fb.com> | 2015-04-15 13:39:29 -0400 |
commit | 889fa31f00b218a2cef96c32a6b3f57e6d3bf918 (patch) | |
tree | 12338bd403f6fd84d8e2b98624aceaa3a4b9998b /block | |
parent | dc48e56d761610da4ea1088d1bea0a030b8e3e43 (diff) |
blk-mq: reduce unnecessary software queue looping
In flush_busy_ctxs() and blk_mq_hctx_has_pending(), regardless of how many
ctxs assigned to one hctx, they will all loop hctx->ctx_map.map_size
times. Here hctx->ctx_map.map_size is a const ALIGN(nr_cpu_ids, 8) / 8.
Especially, flush_busy_ctxs() is in hot code path. And it's unnecessary.
Change ->map_size to contain the actually mapped software queues, so we
only loop for as many iterations as we have to.
And remove cpumask setting and nr_ctx count in blk_mq_init_cpu_queues()
since they are all re-done in blk_mq_map_swqueue().
blk_mq_map_swqueue().
Signed-off-by: Chong Yuan <chong.yuan@memblaze.com>
Reviewed-by: Wenbo Wang <wenbo.wang@memblaze.com>
Updated by me for formatting and commenting.
Signed-off-by: Jens Axboe <axboe@fb.com>
Diffstat (limited to 'block')
-rw-r--r-- | block/blk-mq.c | 13 |
1 files changed, 9 insertions, 4 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c index 1192f85e5ff3..0b49e42e5310 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c | |||
@@ -1522,8 +1522,6 @@ static int blk_mq_alloc_bitmap(struct blk_mq_ctxmap *bitmap, int node) | |||
1522 | if (!bitmap->map) | 1522 | if (!bitmap->map) |
1523 | return -ENOMEM; | 1523 | return -ENOMEM; |
1524 | 1524 | ||
1525 | bitmap->map_size = num_maps; | ||
1526 | |||
1527 | total = nr_cpu_ids; | 1525 | total = nr_cpu_ids; |
1528 | for (i = 0; i < num_maps; i++) { | 1526 | for (i = 0; i < num_maps; i++) { |
1529 | bitmap->map[i].depth = min(total, bitmap->bits_per_word); | 1527 | bitmap->map[i].depth = min(total, bitmap->bits_per_word); |
@@ -1764,8 +1762,6 @@ static void blk_mq_init_cpu_queues(struct request_queue *q, | |||
1764 | continue; | 1762 | continue; |
1765 | 1763 | ||
1766 | hctx = q->mq_ops->map_queue(q, i); | 1764 | hctx = q->mq_ops->map_queue(q, i); |
1767 | cpumask_set_cpu(i, hctx->cpumask); | ||
1768 | hctx->nr_ctx++; | ||
1769 | 1765 | ||
1770 | /* | 1766 | /* |
1771 | * Set local node, IFF we have more than one hw queue. If | 1767 | * Set local node, IFF we have more than one hw queue. If |
@@ -1802,6 +1798,8 @@ static void blk_mq_map_swqueue(struct request_queue *q) | |||
1802 | } | 1798 | } |
1803 | 1799 | ||
1804 | queue_for_each_hw_ctx(q, hctx, i) { | 1800 | queue_for_each_hw_ctx(q, hctx, i) { |
1801 | struct blk_mq_ctxmap *map = &hctx->ctx_map; | ||
1802 | |||
1805 | /* | 1803 | /* |
1806 | * If no software queues are mapped to this hardware queue, | 1804 | * If no software queues are mapped to this hardware queue, |
1807 | * disable it and free the request entries. | 1805 | * disable it and free the request entries. |
@@ -1818,6 +1816,13 @@ static void blk_mq_map_swqueue(struct request_queue *q) | |||
1818 | } | 1816 | } |
1819 | 1817 | ||
1820 | /* | 1818 | /* |
1819 | * Set the map size to the number of mapped software queues. | ||
1820 | * This is more accurate and more efficient than looping | ||
1821 | * over all possibly mapped software queues. | ||
1822 | */ | ||
1823 | map->map_size = hctx->nr_ctx / map->bits_per_word; | ||
1824 | |||
1825 | /* | ||
1821 | * Initialize batch roundrobin counts | 1826 | * Initialize batch roundrobin counts |
1822 | */ | 1827 | */ |
1823 | hctx->next_cpu = cpumask_first(hctx->cpumask); | 1828 | hctx->next_cpu = cpumask_first(hctx->cpumask); |