aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorChong Yuan <chong.yuan@memblaze.com>2015-04-15 13:39:29 -0400
committerJens Axboe <axboe@fb.com>2015-04-15 13:39:29 -0400
commit889fa31f00b218a2cef96c32a6b3f57e6d3bf918 (patch)
tree12338bd403f6fd84d8e2b98624aceaa3a4b9998b /block
parentdc48e56d761610da4ea1088d1bea0a030b8e3e43 (diff)
blk-mq: reduce unnecessary software queue looping
In flush_busy_ctxs() and blk_mq_hctx_has_pending(), regardless of how many ctxs assigned to one hctx, they will all loop hctx->ctx_map.map_size times. Here hctx->ctx_map.map_size is a const ALIGN(nr_cpu_ids, 8) / 8. Especially, flush_busy_ctxs() is in hot code path. And it's unnecessary. Change ->map_size to contain the actually mapped software queues, so we only loop for as many iterations as we have to. And remove cpumask setting and nr_ctx count in blk_mq_init_cpu_queues() since they are all re-done in blk_mq_map_swqueue(). blk_mq_map_swqueue(). Signed-off-by: Chong Yuan <chong.yuan@memblaze.com> Reviewed-by: Wenbo Wang <wenbo.wang@memblaze.com> Updated by me for formatting and commenting. Signed-off-by: Jens Axboe <axboe@fb.com>
Diffstat (limited to 'block')
-rw-r--r--block/blk-mq.c13
1 files changed, 9 insertions, 4 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 1192f85e5ff3..0b49e42e5310 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1522,8 +1522,6 @@ static int blk_mq_alloc_bitmap(struct blk_mq_ctxmap *bitmap, int node)
1522 if (!bitmap->map) 1522 if (!bitmap->map)
1523 return -ENOMEM; 1523 return -ENOMEM;
1524 1524
1525 bitmap->map_size = num_maps;
1526
1527 total = nr_cpu_ids; 1525 total = nr_cpu_ids;
1528 for (i = 0; i < num_maps; i++) { 1526 for (i = 0; i < num_maps; i++) {
1529 bitmap->map[i].depth = min(total, bitmap->bits_per_word); 1527 bitmap->map[i].depth = min(total, bitmap->bits_per_word);
@@ -1764,8 +1762,6 @@ static void blk_mq_init_cpu_queues(struct request_queue *q,
1764 continue; 1762 continue;
1765 1763
1766 hctx = q->mq_ops->map_queue(q, i); 1764 hctx = q->mq_ops->map_queue(q, i);
1767 cpumask_set_cpu(i, hctx->cpumask);
1768 hctx->nr_ctx++;
1769 1765
1770 /* 1766 /*
1771 * Set local node, IFF we have more than one hw queue. If 1767 * Set local node, IFF we have more than one hw queue. If
@@ -1802,6 +1798,8 @@ static void blk_mq_map_swqueue(struct request_queue *q)
1802 } 1798 }
1803 1799
1804 queue_for_each_hw_ctx(q, hctx, i) { 1800 queue_for_each_hw_ctx(q, hctx, i) {
1801 struct blk_mq_ctxmap *map = &hctx->ctx_map;
1802
1805 /* 1803 /*
1806 * If no software queues are mapped to this hardware queue, 1804 * If no software queues are mapped to this hardware queue,
1807 * disable it and free the request entries. 1805 * disable it and free the request entries.
@@ -1818,6 +1816,13 @@ static void blk_mq_map_swqueue(struct request_queue *q)
1818 } 1816 }
1819 1817
1820 /* 1818 /*
1819 * Set the map size to the number of mapped software queues.
1820 * This is more accurate and more efficient than looping
1821 * over all possibly mapped software queues.
1822 */
1823 map->map_size = hctx->nr_ctx / map->bits_per_word;
1824
1825 /*
1821 * Initialize batch roundrobin counts 1826 * Initialize batch roundrobin counts
1822 */ 1827 */
1823 hctx->next_cpu = cpumask_first(hctx->cpumask); 1828 hctx->next_cpu = cpumask_first(hctx->cpumask);