aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@lst.de>2014-04-25 05:32:53 -0400
committerJens Axboe <axboe@fb.com>2014-04-25 10:24:07 -0400
commit38535201633077cbaf8b32886b5e3005b36c9024 (patch)
treead94c4a3d3d7e1b47f2721ac36ab1698349e208d
parent87ee7b112193bd081ba1a171fa5f6f39c429ef56 (diff)
blk-mq: respect rq_affinity
The blk-mq code is using it's own version of the I/O completion affinity tunables, which causes a few issues: - the rq_affinity sysfs file doesn't work for blk-mq devices, even if it still is present, thus breaking existing tuning setups. - the rq_affinity = 1 mode, which is the defauly for legacy request based drivers isn't implemented at all. - blk-mq drivers don't implement any completion affinity with the default flag settings. This patches removes the blk-mq ipi_redirect flag and sysfs file, as well as the internal BLK_MQ_F_SHOULD_IPI flag and replaces it with code that respects the queue-wide rq_affinity flags and also implements the rq_affinity = 1 mode. This means I/O completion affinity can now only be tuned block-queue wide instead of per context, which seems more sensible to me anyway. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Jens Axboe <axboe@fb.com>
-rw-r--r--block/blk-mq-sysfs.c42
-rw-r--r--block/blk-mq.c8
-rw-r--r--block/blk-mq.h1
-rw-r--r--include/linux/blk-mq.h1
4 files changed, 6 insertions, 46 deletions
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index 9176a6984857..8145b5b25b4b 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -203,42 +203,6 @@ static ssize_t blk_mq_hw_sysfs_rq_list_show(struct blk_mq_hw_ctx *hctx,
203 return ret; 203 return ret;
204} 204}
205 205
206static ssize_t blk_mq_hw_sysfs_ipi_show(struct blk_mq_hw_ctx *hctx, char *page)
207{
208 ssize_t ret;
209
210 spin_lock(&hctx->lock);
211 ret = sprintf(page, "%u\n", !!(hctx->flags & BLK_MQ_F_SHOULD_IPI));
212 spin_unlock(&hctx->lock);
213
214 return ret;
215}
216
217static ssize_t blk_mq_hw_sysfs_ipi_store(struct blk_mq_hw_ctx *hctx,
218 const char *page, size_t len)
219{
220 struct blk_mq_ctx *ctx;
221 unsigned long ret;
222 unsigned int i;
223
224 if (kstrtoul(page, 10, &ret)) {
225 pr_err("blk-mq-sysfs: invalid input '%s'\n", page);
226 return -EINVAL;
227 }
228
229 spin_lock(&hctx->lock);
230 if (ret)
231 hctx->flags |= BLK_MQ_F_SHOULD_IPI;
232 else
233 hctx->flags &= ~BLK_MQ_F_SHOULD_IPI;
234 spin_unlock(&hctx->lock);
235
236 hctx_for_each_ctx(hctx, ctx, i)
237 ctx->ipi_redirect = !!ret;
238
239 return len;
240}
241
242static ssize_t blk_mq_hw_sysfs_tags_show(struct blk_mq_hw_ctx *hctx, char *page) 206static ssize_t blk_mq_hw_sysfs_tags_show(struct blk_mq_hw_ctx *hctx, char *page)
243{ 207{
244 return blk_mq_tag_sysfs_show(hctx->tags, page); 208 return blk_mq_tag_sysfs_show(hctx->tags, page);
@@ -307,11 +271,6 @@ static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_pending = {
307 .attr = {.name = "pending", .mode = S_IRUGO }, 271 .attr = {.name = "pending", .mode = S_IRUGO },
308 .show = blk_mq_hw_sysfs_rq_list_show, 272 .show = blk_mq_hw_sysfs_rq_list_show,
309}; 273};
310static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_ipi = {
311 .attr = {.name = "ipi_redirect", .mode = S_IRUGO | S_IWUSR},
312 .show = blk_mq_hw_sysfs_ipi_show,
313 .store = blk_mq_hw_sysfs_ipi_store,
314};
315static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_tags = { 274static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_tags = {
316 .attr = {.name = "tags", .mode = S_IRUGO }, 275 .attr = {.name = "tags", .mode = S_IRUGO },
317 .show = blk_mq_hw_sysfs_tags_show, 276 .show = blk_mq_hw_sysfs_tags_show,
@@ -326,7 +285,6 @@ static struct attribute *default_hw_ctx_attrs[] = {
326 &blk_mq_hw_sysfs_run.attr, 285 &blk_mq_hw_sysfs_run.attr,
327 &blk_mq_hw_sysfs_dispatched.attr, 286 &blk_mq_hw_sysfs_dispatched.attr,
328 &blk_mq_hw_sysfs_pending.attr, 287 &blk_mq_hw_sysfs_pending.attr,
329 &blk_mq_hw_sysfs_ipi.attr,
330 &blk_mq_hw_sysfs_tags.attr, 288 &blk_mq_hw_sysfs_tags.attr,
331 &blk_mq_hw_sysfs_cpus.attr, 289 &blk_mq_hw_sysfs_cpus.attr,
332 NULL, 290 NULL,
diff --git a/block/blk-mq.c b/block/blk-mq.c
index a84112c94e74..f2e92eb92803 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -326,15 +326,19 @@ static void __blk_mq_complete_request_remote(void *data)
326void __blk_mq_complete_request(struct request *rq) 326void __blk_mq_complete_request(struct request *rq)
327{ 327{
328 struct blk_mq_ctx *ctx = rq->mq_ctx; 328 struct blk_mq_ctx *ctx = rq->mq_ctx;
329 bool shared = false;
329 int cpu; 330 int cpu;
330 331
331 if (!ctx->ipi_redirect) { 332 if (!test_bit(QUEUE_FLAG_SAME_COMP, &rq->q->queue_flags)) {
332 rq->q->softirq_done_fn(rq); 333 rq->q->softirq_done_fn(rq);
333 return; 334 return;
334 } 335 }
335 336
336 cpu = get_cpu(); 337 cpu = get_cpu();
337 if (cpu != ctx->cpu && cpu_online(ctx->cpu)) { 338 if (!test_bit(QUEUE_FLAG_SAME_FORCE, &rq->q->queue_flags))
339 shared = cpus_share_cache(cpu, ctx->cpu);
340
341 if (cpu != ctx->cpu && !shared && cpu_online(ctx->cpu)) {
338 rq->csd.func = __blk_mq_complete_request_remote; 342 rq->csd.func = __blk_mq_complete_request_remote;
339 rq->csd.info = rq; 343 rq->csd.info = rq;
340 rq->csd.flags = 0; 344 rq->csd.flags = 0;
diff --git a/block/blk-mq.h b/block/blk-mq.h
index b41a784de50d..1ae364ceaf8b 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -11,7 +11,6 @@ struct blk_mq_ctx {
11 11
12 unsigned int cpu; 12 unsigned int cpu;
13 unsigned int index_hw; 13 unsigned int index_hw;
14 unsigned int ipi_redirect;
15 14
16 /* incremented at dispatch time */ 15 /* incremented at dispatch time */
17 unsigned long rq_dispatched[2]; 16 unsigned long rq_dispatched[2];
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index ab469d525894..3b561d651a02 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -122,7 +122,6 @@ enum {
122 122
123 BLK_MQ_F_SHOULD_MERGE = 1 << 0, 123 BLK_MQ_F_SHOULD_MERGE = 1 << 0,
124 BLK_MQ_F_SHOULD_SORT = 1 << 1, 124 BLK_MQ_F_SHOULD_SORT = 1 << 1,
125 BLK_MQ_F_SHOULD_IPI = 1 << 2,
126 125
127 BLK_MQ_S_STOPPED = 0, 126 BLK_MQ_S_STOPPED = 0,
128 127