aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorJens Axboe <axboe@fb.com>2014-05-07 12:26:44 -0400
committerJens Axboe <axboe@fb.com>2014-05-07 12:26:44 -0400
commit506e931f92defdc60c1dc4aa2ff4a19a5dcd8618 (patch)
tree8c0fdc0c0c4186f927246b5164396da446fbc8e5 /block
parent5cf8c2277576fcc48966b105bb42782d7929fc48 (diff)
blk-mq: add basic round-robin of what CPU to queue workqueue work on
Right now we just pick the first CPU in the mask, but that can easily overload that one. Add some basic batching and round-robin all the entries in the mask instead. Signed-off-by: Jens Axboe <axboe@fb.com>
Diffstat (limited to 'block')
-rw-r--r--block/blk-mq.c45
1 files changed, 31 insertions, 14 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 0d379830a278..2410e0cb7aef 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -670,6 +670,30 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
670 } 670 }
671} 671}
672 672
673/*
674 * It'd be great if the workqueue API had a way to pass
675 * in a mask and had some smarts for more clever placement.
676 * For now we just round-robin here, switching for every
677 * BLK_MQ_CPU_WORK_BATCH queued items.
678 */
679static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx)
680{
681 int cpu = hctx->next_cpu;
682
683 if (--hctx->next_cpu_batch <= 0) {
684 int next_cpu;
685
686 next_cpu = cpumask_next(hctx->next_cpu, hctx->cpumask);
687 if (next_cpu >= nr_cpu_ids)
688 next_cpu = cpumask_first(hctx->cpumask);
689
690 hctx->next_cpu = next_cpu;
691 hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;
692 }
693
694 return cpu;
695}
696
673void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async) 697void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
674{ 698{
675 if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state))) 699 if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state)))
@@ -682,13 +706,7 @@ void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async)
682 else { 706 else {
683 unsigned int cpu; 707 unsigned int cpu;
684 708
685 /* 709 cpu = blk_mq_hctx_next_cpu(hctx);
686 * It'd be great if the workqueue API had a way to pass
687 * in a mask and had some smarts for more clever placement
688 * than the first CPU. Or we could round-robin here. For now,
689 * just queue on the first CPU.
690 */
691 cpu = cpumask_first(hctx->cpumask);
692 kblockd_schedule_delayed_work_on(cpu, &hctx->run_work, 0); 710 kblockd_schedule_delayed_work_on(cpu, &hctx->run_work, 0);
693 } 711 }
694} 712}
@@ -795,13 +813,7 @@ void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs)
795 else { 813 else {
796 unsigned int cpu; 814 unsigned int cpu;
797 815
798 /* 816 cpu = blk_mq_hctx_next_cpu(hctx);
799 * It'd be great if the workqueue API had a way to pass
800 * in a mask and had some smarts for more clever placement
801 * than the first CPU. Or we could round-robin here. For now,
802 * just queue on the first CPU.
803 */
804 cpu = cpumask_first(hctx->cpumask);
805 kblockd_schedule_delayed_work_on(cpu, &hctx->delay_work, tmo); 817 kblockd_schedule_delayed_work_on(cpu, &hctx->delay_work, tmo);
806 } 818 }
807} 819}
@@ -1378,6 +1390,11 @@ static void blk_mq_map_swqueue(struct request_queue *q)
1378 ctx->index_hw = hctx->nr_ctx; 1390 ctx->index_hw = hctx->nr_ctx;
1379 hctx->ctxs[hctx->nr_ctx++] = ctx; 1391 hctx->ctxs[hctx->nr_ctx++] = ctx;
1380 } 1392 }
1393
1394 queue_for_each_hw_ctx(q, hctx, i) {
1395 hctx->next_cpu = cpumask_first(hctx->cpumask);
1396 hctx->next_cpu_batch = BLK_MQ_CPU_WORK_BATCH;
1397 }
1381} 1398}
1382 1399
1383struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) 1400struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)