diff options
author | Christoph Hellwig <hch@lst.de> | 2014-04-25 05:32:53 -0400 |
---|---|---|
committer | Jens Axboe <axboe@fb.com> | 2014-04-25 10:24:07 -0400 |
commit | 38535201633077cbaf8b32886b5e3005b36c9024 (patch) | |
tree | ad94c4a3d3d7e1b47f2721ac36ab1698349e208d | |
parent | 87ee7b112193bd081ba1a171fa5f6f39c429ef56 (diff) |
blk-mq: respect rq_affinity
The blk-mq code is using it's own version of the I/O completion affinity
tunables, which causes a few issues:
- the rq_affinity sysfs file doesn't work for blk-mq devices, even if it
still is present, thus breaking existing tuning setups.
- the rq_affinity = 1 mode, which is the defauly for legacy request based
drivers isn't implemented at all.
- blk-mq drivers don't implement any completion affinity with the default
flag settings.
This patches removes the blk-mq ipi_redirect flag and sysfs file, as well
as the internal BLK_MQ_F_SHOULD_IPI flag and replaces it with code that
respects the queue-wide rq_affinity flags and also implements the
rq_affinity = 1 mode.
This means I/O completion affinity can now only be tuned block-queue wide
instead of per context, which seems more sensible to me anyway.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
-rw-r--r-- | block/blk-mq-sysfs.c | 42 | ||||
-rw-r--r-- | block/blk-mq.c | 8 | ||||
-rw-r--r-- | block/blk-mq.h | 1 | ||||
-rw-r--r-- | include/linux/blk-mq.h | 1 |
4 files changed, 6 insertions, 46 deletions
diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c index 9176a6984857..8145b5b25b4b 100644 --- a/block/blk-mq-sysfs.c +++ b/block/blk-mq-sysfs.c | |||
@@ -203,42 +203,6 @@ static ssize_t blk_mq_hw_sysfs_rq_list_show(struct blk_mq_hw_ctx *hctx, | |||
203 | return ret; | 203 | return ret; |
204 | } | 204 | } |
205 | 205 | ||
206 | static ssize_t blk_mq_hw_sysfs_ipi_show(struct blk_mq_hw_ctx *hctx, char *page) | ||
207 | { | ||
208 | ssize_t ret; | ||
209 | |||
210 | spin_lock(&hctx->lock); | ||
211 | ret = sprintf(page, "%u\n", !!(hctx->flags & BLK_MQ_F_SHOULD_IPI)); | ||
212 | spin_unlock(&hctx->lock); | ||
213 | |||
214 | return ret; | ||
215 | } | ||
216 | |||
217 | static ssize_t blk_mq_hw_sysfs_ipi_store(struct blk_mq_hw_ctx *hctx, | ||
218 | const char *page, size_t len) | ||
219 | { | ||
220 | struct blk_mq_ctx *ctx; | ||
221 | unsigned long ret; | ||
222 | unsigned int i; | ||
223 | |||
224 | if (kstrtoul(page, 10, &ret)) { | ||
225 | pr_err("blk-mq-sysfs: invalid input '%s'\n", page); | ||
226 | return -EINVAL; | ||
227 | } | ||
228 | |||
229 | spin_lock(&hctx->lock); | ||
230 | if (ret) | ||
231 | hctx->flags |= BLK_MQ_F_SHOULD_IPI; | ||
232 | else | ||
233 | hctx->flags &= ~BLK_MQ_F_SHOULD_IPI; | ||
234 | spin_unlock(&hctx->lock); | ||
235 | |||
236 | hctx_for_each_ctx(hctx, ctx, i) | ||
237 | ctx->ipi_redirect = !!ret; | ||
238 | |||
239 | return len; | ||
240 | } | ||
241 | |||
242 | static ssize_t blk_mq_hw_sysfs_tags_show(struct blk_mq_hw_ctx *hctx, char *page) | 206 | static ssize_t blk_mq_hw_sysfs_tags_show(struct blk_mq_hw_ctx *hctx, char *page) |
243 | { | 207 | { |
244 | return blk_mq_tag_sysfs_show(hctx->tags, page); | 208 | return blk_mq_tag_sysfs_show(hctx->tags, page); |
@@ -307,11 +271,6 @@ static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_pending = { | |||
307 | .attr = {.name = "pending", .mode = S_IRUGO }, | 271 | .attr = {.name = "pending", .mode = S_IRUGO }, |
308 | .show = blk_mq_hw_sysfs_rq_list_show, | 272 | .show = blk_mq_hw_sysfs_rq_list_show, |
309 | }; | 273 | }; |
310 | static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_ipi = { | ||
311 | .attr = {.name = "ipi_redirect", .mode = S_IRUGO | S_IWUSR}, | ||
312 | .show = blk_mq_hw_sysfs_ipi_show, | ||
313 | .store = blk_mq_hw_sysfs_ipi_store, | ||
314 | }; | ||
315 | static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_tags = { | 274 | static struct blk_mq_hw_ctx_sysfs_entry blk_mq_hw_sysfs_tags = { |
316 | .attr = {.name = "tags", .mode = S_IRUGO }, | 275 | .attr = {.name = "tags", .mode = S_IRUGO }, |
317 | .show = blk_mq_hw_sysfs_tags_show, | 276 | .show = blk_mq_hw_sysfs_tags_show, |
@@ -326,7 +285,6 @@ static struct attribute *default_hw_ctx_attrs[] = { | |||
326 | &blk_mq_hw_sysfs_run.attr, | 285 | &blk_mq_hw_sysfs_run.attr, |
327 | &blk_mq_hw_sysfs_dispatched.attr, | 286 | &blk_mq_hw_sysfs_dispatched.attr, |
328 | &blk_mq_hw_sysfs_pending.attr, | 287 | &blk_mq_hw_sysfs_pending.attr, |
329 | &blk_mq_hw_sysfs_ipi.attr, | ||
330 | &blk_mq_hw_sysfs_tags.attr, | 288 | &blk_mq_hw_sysfs_tags.attr, |
331 | &blk_mq_hw_sysfs_cpus.attr, | 289 | &blk_mq_hw_sysfs_cpus.attr, |
332 | NULL, | 290 | NULL, |
diff --git a/block/blk-mq.c b/block/blk-mq.c index a84112c94e74..f2e92eb92803 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c | |||
@@ -326,15 +326,19 @@ static void __blk_mq_complete_request_remote(void *data) | |||
326 | void __blk_mq_complete_request(struct request *rq) | 326 | void __blk_mq_complete_request(struct request *rq) |
327 | { | 327 | { |
328 | struct blk_mq_ctx *ctx = rq->mq_ctx; | 328 | struct blk_mq_ctx *ctx = rq->mq_ctx; |
329 | bool shared = false; | ||
329 | int cpu; | 330 | int cpu; |
330 | 331 | ||
331 | if (!ctx->ipi_redirect) { | 332 | if (!test_bit(QUEUE_FLAG_SAME_COMP, &rq->q->queue_flags)) { |
332 | rq->q->softirq_done_fn(rq); | 333 | rq->q->softirq_done_fn(rq); |
333 | return; | 334 | return; |
334 | } | 335 | } |
335 | 336 | ||
336 | cpu = get_cpu(); | 337 | cpu = get_cpu(); |
337 | if (cpu != ctx->cpu && cpu_online(ctx->cpu)) { | 338 | if (!test_bit(QUEUE_FLAG_SAME_FORCE, &rq->q->queue_flags)) |
339 | shared = cpus_share_cache(cpu, ctx->cpu); | ||
340 | |||
341 | if (cpu != ctx->cpu && !shared && cpu_online(ctx->cpu)) { | ||
338 | rq->csd.func = __blk_mq_complete_request_remote; | 342 | rq->csd.func = __blk_mq_complete_request_remote; |
339 | rq->csd.info = rq; | 343 | rq->csd.info = rq; |
340 | rq->csd.flags = 0; | 344 | rq->csd.flags = 0; |
diff --git a/block/blk-mq.h b/block/blk-mq.h index b41a784de50d..1ae364ceaf8b 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h | |||
@@ -11,7 +11,6 @@ struct blk_mq_ctx { | |||
11 | 11 | ||
12 | unsigned int cpu; | 12 | unsigned int cpu; |
13 | unsigned int index_hw; | 13 | unsigned int index_hw; |
14 | unsigned int ipi_redirect; | ||
15 | 14 | ||
16 | /* incremented at dispatch time */ | 15 | /* incremented at dispatch time */ |
17 | unsigned long rq_dispatched[2]; | 16 | unsigned long rq_dispatched[2]; |
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index ab469d525894..3b561d651a02 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h | |||
@@ -122,7 +122,6 @@ enum { | |||
122 | 122 | ||
123 | BLK_MQ_F_SHOULD_MERGE = 1 << 0, | 123 | BLK_MQ_F_SHOULD_MERGE = 1 << 0, |
124 | BLK_MQ_F_SHOULD_SORT = 1 << 1, | 124 | BLK_MQ_F_SHOULD_SORT = 1 << 1, |
125 | BLK_MQ_F_SHOULD_IPI = 1 << 2, | ||
126 | 125 | ||
127 | BLK_MQ_S_STOPPED = 0, | 126 | BLK_MQ_S_STOPPED = 0, |
128 | 127 | ||