aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2017-06-20 19:56:13 -0400
committerJens Axboe <axboe@kernel.dk>2017-06-21 12:17:49 -0400
commit8e8320c9315c47a6a090188720ccff32a6a6ba18 (patch)
treee407c7eb7afd9d09aea9355426a2942a295be946
parentec2f0fadde446e0ebe28c779ffcac655228b8f1e (diff)
blk-mq: fix performance regression with shared tags
If we have shared tags enabled, then every IO completion will trigger a full loop of every queue belonging to a tag set, and every hardware queue for each of those queues, even if nothing needs to be done. This causes a massive performance regression if you have a lot of shared devices. Instead of doing this huge full scan on every IO, add an atomic counter to the main queue that tracks how many hardware queues have been marked as needing a restart. With that, we can avoid looking for restartable queues, if we don't have to. Max reports that this restores performance. Before this patch, 4K IOPS was limited to 22-23K IOPS. With the patch, we are running at 950-970K IOPS. Fixes: 6d8c6c0f97ad ("blk-mq: Restart a single queue if tag sets are shared") Reported-by: Max Gurtovoy <maxg@mellanox.com> Tested-by: Max Gurtovoy <maxg@mellanox.com> Reviewed-by: Bart Van Assche <bart.vanassche@sandisk.com> Tested-by: Bart Van Assche <bart.vanassche@wdc.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--block/blk-mq-sched.c58
-rw-r--r--block/blk-mq-sched.h9
-rw-r--r--block/blk-mq.c16
-rw-r--r--include/linux/blkdev.h2
4 files changed, 61 insertions, 24 deletions
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 1f5b692526ae..0ded5e846335 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -68,6 +68,45 @@ static void blk_mq_sched_assign_ioc(struct request_queue *q,
68 __blk_mq_sched_assign_ioc(q, rq, bio, ioc); 68 __blk_mq_sched_assign_ioc(q, rq, bio, ioc);
69} 69}
70 70
71/*
72 * Mark a hardware queue as needing a restart. For shared queues, maintain
73 * a count of how many hardware queues are marked for restart.
74 */
75static void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
76{
77 if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
78 return;
79
80 if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
81 struct request_queue *q = hctx->queue;
82
83 if (!test_and_set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
84 atomic_inc(&q->shared_hctx_restart);
85 } else
86 set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
87}
88
89static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
90{
91 if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
92 return false;
93
94 if (hctx->flags & BLK_MQ_F_TAG_SHARED) {
95 struct request_queue *q = hctx->queue;
96
97 if (test_and_clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
98 atomic_dec(&q->shared_hctx_restart);
99 } else
100 clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
101
102 if (blk_mq_hctx_has_pending(hctx)) {
103 blk_mq_run_hw_queue(hctx, true);
104 return true;
105 }
106
107 return false;
108}
109
71struct request *blk_mq_sched_get_request(struct request_queue *q, 110struct request *blk_mq_sched_get_request(struct request_queue *q,
72 struct bio *bio, 111 struct bio *bio,
73 unsigned int op, 112 unsigned int op,
@@ -266,18 +305,6 @@ static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
266 return true; 305 return true;
267} 306}
268 307
269static bool blk_mq_sched_restart_hctx(struct blk_mq_hw_ctx *hctx)
270{
271 if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state)) {
272 clear_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
273 if (blk_mq_hctx_has_pending(hctx)) {
274 blk_mq_run_hw_queue(hctx, true);
275 return true;
276 }
277 }
278 return false;
279}
280
281/** 308/**
282 * list_for_each_entry_rcu_rr - iterate in a round-robin fashion over rcu list 309 * list_for_each_entry_rcu_rr - iterate in a round-robin fashion over rcu list
283 * @pos: loop cursor. 310 * @pos: loop cursor.
@@ -309,6 +336,13 @@ void blk_mq_sched_restart(struct blk_mq_hw_ctx *const hctx)
309 unsigned int i, j; 336 unsigned int i, j;
310 337
311 if (set->flags & BLK_MQ_F_TAG_SHARED) { 338 if (set->flags & BLK_MQ_F_TAG_SHARED) {
339 /*
340 * If this is 0, then we know that no hardware queues
341 * have RESTART marked. We're done.
342 */
343 if (!atomic_read(&queue->shared_hctx_restart))
344 return;
345
312 rcu_read_lock(); 346 rcu_read_lock();
313 list_for_each_entry_rcu_rr(q, queue, &set->tag_list, 347 list_for_each_entry_rcu_rr(q, queue, &set->tag_list,
314 tag_set_list) { 348 tag_set_list) {
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index edafb5383b7b..5007edece51a 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -115,15 +115,6 @@ static inline bool blk_mq_sched_has_work(struct blk_mq_hw_ctx *hctx)
115 return false; 115 return false;
116} 116}
117 117
118/*
119 * Mark a hardware queue as needing a restart.
120 */
121static inline void blk_mq_sched_mark_restart_hctx(struct blk_mq_hw_ctx *hctx)
122{
123 if (!test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
124 set_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
125}
126
127static inline bool blk_mq_sched_needs_restart(struct blk_mq_hw_ctx *hctx) 118static inline bool blk_mq_sched_needs_restart(struct blk_mq_hw_ctx *hctx)
128{ 119{
129 return test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state); 120 return test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index bb66c96850b1..958cedaff8b8 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2103,20 +2103,30 @@ static void blk_mq_map_swqueue(struct request_queue *q,
2103 } 2103 }
2104} 2104}
2105 2105
2106/*
2107 * Caller needs to ensure that we're either frozen/quiesced, or that
2108 * the queue isn't live yet.
2109 */
2106static void queue_set_hctx_shared(struct request_queue *q, bool shared) 2110static void queue_set_hctx_shared(struct request_queue *q, bool shared)
2107{ 2111{
2108 struct blk_mq_hw_ctx *hctx; 2112 struct blk_mq_hw_ctx *hctx;
2109 int i; 2113 int i;
2110 2114
2111 queue_for_each_hw_ctx(q, hctx, i) { 2115 queue_for_each_hw_ctx(q, hctx, i) {
2112 if (shared) 2116 if (shared) {
2117 if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
2118 atomic_inc(&q->shared_hctx_restart);
2113 hctx->flags |= BLK_MQ_F_TAG_SHARED; 2119 hctx->flags |= BLK_MQ_F_TAG_SHARED;
2114 else 2120 } else {
2121 if (test_bit(BLK_MQ_S_SCHED_RESTART, &hctx->state))
2122 atomic_dec(&q->shared_hctx_restart);
2115 hctx->flags &= ~BLK_MQ_F_TAG_SHARED; 2123 hctx->flags &= ~BLK_MQ_F_TAG_SHARED;
2124 }
2116 } 2125 }
2117} 2126}
2118 2127
2119static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set, bool shared) 2128static void blk_mq_update_tag_set_depth(struct blk_mq_tag_set *set,
2129 bool shared)
2120{ 2130{
2121 struct request_queue *q; 2131 struct request_queue *q;
2122 2132
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index b74a3edcb3da..1ddd36bd2173 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -391,6 +391,8 @@ struct request_queue {
391 int nr_rqs[2]; /* # allocated [a]sync rqs */ 391 int nr_rqs[2]; /* # allocated [a]sync rqs */
392 int nr_rqs_elvpriv; /* # allocated rqs w/ elvpriv */ 392 int nr_rqs_elvpriv; /* # allocated rqs w/ elvpriv */
393 393
394 atomic_t shared_hctx_restart;
395
394 struct blk_queue_stats *stats; 396 struct blk_queue_stats *stats;
395 struct rq_wb *rq_wb; 397 struct rq_wb *rq_wb;
396 398