aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMing Lei <ming.lei@redhat.com>2018-12-17 10:44:05 -0500
committerJens Axboe <axboe@kernel.dk>2018-12-17 13:19:54 -0500
commitc16d6b5a9f47d0e581882269fca1d73be60208b2 (patch)
tree778a7ca836078f6cca15630f25b0d652320f5cb8
parent7211aef86f79583e59b88a0aba0bc830566f7e8e (diff)
blk-mq: fix dispatch from sw queue
When a request is added to rq list of sw queue(ctx), the rq may be from a different type of hctx, especially after multi queue mapping is introduced. So when dispach request from sw queue via blk_mq_flush_busy_ctxs() or blk_mq_dequeue_from_ctx(), one request belonging to other queue type of hctx can be dispatched to current hctx in case that read queue or poll queue is enabled. This patch fixes this issue by introducing per-queue-type list. Cc: Christoph Hellwig <hch@lst.de> Signed-off-by: Ming Lei <ming.lei@redhat.com> Changed by me to not use separately cacheline aligned lists, just place them all in the same cacheline where we had just the one list and lock before. Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--block/blk-mq-debugfs.c69
-rw-r--r--block/blk-mq-sched.c11
-rw-r--r--block/blk-mq.c29
-rw-r--r--block/blk-mq.h4
4 files changed, 68 insertions, 45 deletions
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 1e12033be9ea..90d68760af08 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -652,36 +652,43 @@ static int hctx_dispatch_busy_show(void *data, struct seq_file *m)
652 return 0; 652 return 0;
653} 653}
654 654
655static void *ctx_rq_list_start(struct seq_file *m, loff_t *pos) 655#define CTX_RQ_SEQ_OPS(name, type) \
656 __acquires(&ctx->lock) 656static void *ctx_##name##_rq_list_start(struct seq_file *m, loff_t *pos) \
657{ 657 __acquires(&ctx->lock) \
658 struct blk_mq_ctx *ctx = m->private; 658{ \
659 659 struct blk_mq_ctx *ctx = m->private; \
660 spin_lock(&ctx->lock); 660 \
661 return seq_list_start(&ctx->rq_list, *pos); 661 spin_lock(&ctx->lock); \
662} 662 return seq_list_start(&ctx->rq_lists[type], *pos); \
663 663} \
664static void *ctx_rq_list_next(struct seq_file *m, void *v, loff_t *pos) 664 \
665{ 665static void *ctx_##name##_rq_list_next(struct seq_file *m, void *v, \
666 struct blk_mq_ctx *ctx = m->private; 666 loff_t *pos) \
667 667{ \
668 return seq_list_next(v, &ctx->rq_list, pos); 668 struct blk_mq_ctx *ctx = m->private; \
669} 669 \
670 return seq_list_next(v, &ctx->rq_lists[type], pos); \
671} \
672 \
673static void ctx_##name##_rq_list_stop(struct seq_file *m, void *v) \
674 __releases(&ctx->lock) \
675{ \
676 struct blk_mq_ctx *ctx = m->private; \
677 \
678 spin_unlock(&ctx->lock); \
679} \
680 \
681static const struct seq_operations ctx_##name##_rq_list_seq_ops = { \
682 .start = ctx_##name##_rq_list_start, \
683 .next = ctx_##name##_rq_list_next, \
684 .stop = ctx_##name##_rq_list_stop, \
685 .show = blk_mq_debugfs_rq_show, \
686}
687
688CTX_RQ_SEQ_OPS(default, HCTX_TYPE_DEFAULT);
689CTX_RQ_SEQ_OPS(read, HCTX_TYPE_READ);
690CTX_RQ_SEQ_OPS(poll, HCTX_TYPE_POLL);
670 691
671static void ctx_rq_list_stop(struct seq_file *m, void *v)
672 __releases(&ctx->lock)
673{
674 struct blk_mq_ctx *ctx = m->private;
675
676 spin_unlock(&ctx->lock);
677}
678
679static const struct seq_operations ctx_rq_list_seq_ops = {
680 .start = ctx_rq_list_start,
681 .next = ctx_rq_list_next,
682 .stop = ctx_rq_list_stop,
683 .show = blk_mq_debugfs_rq_show,
684};
685static int ctx_dispatched_show(void *data, struct seq_file *m) 692static int ctx_dispatched_show(void *data, struct seq_file *m)
686{ 693{
687 struct blk_mq_ctx *ctx = data; 694 struct blk_mq_ctx *ctx = data;
@@ -819,7 +826,9 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_hctx_attrs[] = {
819}; 826};
820 827
821static const struct blk_mq_debugfs_attr blk_mq_debugfs_ctx_attrs[] = { 828static const struct blk_mq_debugfs_attr blk_mq_debugfs_ctx_attrs[] = {
822 {"rq_list", 0400, .seq_ops = &ctx_rq_list_seq_ops}, 829 {"default_rq_list", 0400, .seq_ops = &ctx_default_rq_list_seq_ops},
830 {"read_rq_list", 0400, .seq_ops = &ctx_read_rq_list_seq_ops},
831 {"poll_rq_list", 0400, .seq_ops = &ctx_poll_rq_list_seq_ops},
823 {"dispatched", 0600, ctx_dispatched_show, ctx_dispatched_write}, 832 {"dispatched", 0600, ctx_dispatched_show, ctx_dispatched_write},
824 {"merged", 0600, ctx_merged_show, ctx_merged_write}, 833 {"merged", 0600, ctx_merged_show, ctx_merged_write},
825 {"completed", 0600, ctx_completed_show, ctx_completed_write}, 834 {"completed", 0600, ctx_completed_show, ctx_completed_write},
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 056fa9baf44e..140933e4a7d1 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -302,11 +302,14 @@ EXPORT_SYMBOL_GPL(blk_mq_bio_list_merge);
302 * too much time checking for merges. 302 * too much time checking for merges.
303 */ 303 */
304static bool blk_mq_attempt_merge(struct request_queue *q, 304static bool blk_mq_attempt_merge(struct request_queue *q,
305 struct blk_mq_hw_ctx *hctx,
305 struct blk_mq_ctx *ctx, struct bio *bio) 306 struct blk_mq_ctx *ctx, struct bio *bio)
306{ 307{
308 enum hctx_type type = hctx->type;
309
307 lockdep_assert_held(&ctx->lock); 310 lockdep_assert_held(&ctx->lock);
308 311
309 if (blk_mq_bio_list_merge(q, &ctx->rq_list, bio)) { 312 if (blk_mq_bio_list_merge(q, &ctx->rq_lists[type], bio)) {
310 ctx->rq_merged++; 313 ctx->rq_merged++;
311 return true; 314 return true;
312 } 315 }
@@ -320,17 +323,19 @@ bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio)
320 struct blk_mq_ctx *ctx = blk_mq_get_ctx(q); 323 struct blk_mq_ctx *ctx = blk_mq_get_ctx(q);
321 struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, bio->bi_opf, ctx->cpu); 324 struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(q, bio->bi_opf, ctx->cpu);
322 bool ret = false; 325 bool ret = false;
326 enum hctx_type type;
323 327
324 if (e && e->type->ops.bio_merge) { 328 if (e && e->type->ops.bio_merge) {
325 blk_mq_put_ctx(ctx); 329 blk_mq_put_ctx(ctx);
326 return e->type->ops.bio_merge(hctx, bio); 330 return e->type->ops.bio_merge(hctx, bio);
327 } 331 }
328 332
333 type = hctx->type;
329 if ((hctx->flags & BLK_MQ_F_SHOULD_MERGE) && 334 if ((hctx->flags & BLK_MQ_F_SHOULD_MERGE) &&
330 !list_empty_careful(&ctx->rq_list)) { 335 !list_empty_careful(&ctx->rq_lists[type])) {
331 /* default per sw-queue merge */ 336 /* default per sw-queue merge */
332 spin_lock(&ctx->lock); 337 spin_lock(&ctx->lock);
333 ret = blk_mq_attempt_merge(q, ctx, bio); 338 ret = blk_mq_attempt_merge(q, hctx, ctx, bio);
334 spin_unlock(&ctx->lock); 339 spin_unlock(&ctx->lock);
335 } 340 }
336 341
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 313f28b2d079..9c1c1544bac3 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -958,9 +958,10 @@ static bool flush_busy_ctx(struct sbitmap *sb, unsigned int bitnr, void *data)
958 struct flush_busy_ctx_data *flush_data = data; 958 struct flush_busy_ctx_data *flush_data = data;
959 struct blk_mq_hw_ctx *hctx = flush_data->hctx; 959 struct blk_mq_hw_ctx *hctx = flush_data->hctx;
960 struct blk_mq_ctx *ctx = hctx->ctxs[bitnr]; 960 struct blk_mq_ctx *ctx = hctx->ctxs[bitnr];
961 enum hctx_type type = hctx->type;
961 962
962 spin_lock(&ctx->lock); 963 spin_lock(&ctx->lock);
963 list_splice_tail_init(&ctx->rq_list, flush_data->list); 964 list_splice_tail_init(&ctx->rq_lists[type], flush_data->list);
964 sbitmap_clear_bit(sb, bitnr); 965 sbitmap_clear_bit(sb, bitnr);
965 spin_unlock(&ctx->lock); 966 spin_unlock(&ctx->lock);
966 return true; 967 return true;
@@ -992,12 +993,13 @@ static bool dispatch_rq_from_ctx(struct sbitmap *sb, unsigned int bitnr,
992 struct dispatch_rq_data *dispatch_data = data; 993 struct dispatch_rq_data *dispatch_data = data;
993 struct blk_mq_hw_ctx *hctx = dispatch_data->hctx; 994 struct blk_mq_hw_ctx *hctx = dispatch_data->hctx;
994 struct blk_mq_ctx *ctx = hctx->ctxs[bitnr]; 995 struct blk_mq_ctx *ctx = hctx->ctxs[bitnr];
996 enum hctx_type type = hctx->type;
995 997
996 spin_lock(&ctx->lock); 998 spin_lock(&ctx->lock);
997 if (!list_empty(&ctx->rq_list)) { 999 if (!list_empty(&ctx->rq_lists[type])) {
998 dispatch_data->rq = list_entry_rq(ctx->rq_list.next); 1000 dispatch_data->rq = list_entry_rq(ctx->rq_lists[type].next);
999 list_del_init(&dispatch_data->rq->queuelist); 1001 list_del_init(&dispatch_data->rq->queuelist);
1000 if (list_empty(&ctx->rq_list)) 1002 if (list_empty(&ctx->rq_lists[type]))
1001 sbitmap_clear_bit(sb, bitnr); 1003 sbitmap_clear_bit(sb, bitnr);
1002 } 1004 }
1003 spin_unlock(&ctx->lock); 1005 spin_unlock(&ctx->lock);
@@ -1608,15 +1610,16 @@ static inline void __blk_mq_insert_req_list(struct blk_mq_hw_ctx *hctx,
1608 bool at_head) 1610 bool at_head)
1609{ 1611{
1610 struct blk_mq_ctx *ctx = rq->mq_ctx; 1612 struct blk_mq_ctx *ctx = rq->mq_ctx;
1613 enum hctx_type type = hctx->type;
1611 1614
1612 lockdep_assert_held(&ctx->lock); 1615 lockdep_assert_held(&ctx->lock);
1613 1616
1614 trace_block_rq_insert(hctx->queue, rq); 1617 trace_block_rq_insert(hctx->queue, rq);
1615 1618
1616 if (at_head) 1619 if (at_head)
1617 list_add(&rq->queuelist, &ctx->rq_list); 1620 list_add(&rq->queuelist, &ctx->rq_lists[type]);
1618 else 1621 else
1619 list_add_tail(&rq->queuelist, &ctx->rq_list); 1622 list_add_tail(&rq->queuelist, &ctx->rq_lists[type]);
1620} 1623}
1621 1624
1622void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq, 1625void __blk_mq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
@@ -1651,6 +1654,7 @@ void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
1651 1654
1652{ 1655{
1653 struct request *rq; 1656 struct request *rq;
1657 enum hctx_type type = hctx->type;
1654 1658
1655 /* 1659 /*
1656 * preemption doesn't flush plug list, so it's possible ctx->cpu is 1660 * preemption doesn't flush plug list, so it's possible ctx->cpu is
@@ -1662,7 +1666,7 @@ void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
1662 } 1666 }
1663 1667
1664 spin_lock(&ctx->lock); 1668 spin_lock(&ctx->lock);
1665 list_splice_tail_init(list, &ctx->rq_list); 1669 list_splice_tail_init(list, &ctx->rq_lists[type]);
1666 blk_mq_hctx_mark_pending(hctx, ctx); 1670 blk_mq_hctx_mark_pending(hctx, ctx);
1667 spin_unlock(&ctx->lock); 1671 spin_unlock(&ctx->lock);
1668} 1672}
@@ -2200,13 +2204,15 @@ static int blk_mq_hctx_notify_dead(unsigned int cpu, struct hlist_node *node)
2200 struct blk_mq_hw_ctx *hctx; 2204 struct blk_mq_hw_ctx *hctx;
2201 struct blk_mq_ctx *ctx; 2205 struct blk_mq_ctx *ctx;
2202 LIST_HEAD(tmp); 2206 LIST_HEAD(tmp);
2207 enum hctx_type type;
2203 2208
2204 hctx = hlist_entry_safe(node, struct blk_mq_hw_ctx, cpuhp_dead); 2209 hctx = hlist_entry_safe(node, struct blk_mq_hw_ctx, cpuhp_dead);
2205 ctx = __blk_mq_get_ctx(hctx->queue, cpu); 2210 ctx = __blk_mq_get_ctx(hctx->queue, cpu);
2211 type = hctx->type;
2206 2212
2207 spin_lock(&ctx->lock); 2213 spin_lock(&ctx->lock);
2208 if (!list_empty(&ctx->rq_list)) { 2214 if (!list_empty(&ctx->rq_lists[type])) {
2209 list_splice_init(&ctx->rq_list, &tmp); 2215 list_splice_init(&ctx->rq_lists[type], &tmp);
2210 blk_mq_hctx_clear_pending(hctx, ctx); 2216 blk_mq_hctx_clear_pending(hctx, ctx);
2211 } 2217 }
2212 spin_unlock(&ctx->lock); 2218 spin_unlock(&ctx->lock);
@@ -2343,10 +2349,13 @@ static void blk_mq_init_cpu_queues(struct request_queue *q,
2343 for_each_possible_cpu(i) { 2349 for_each_possible_cpu(i) {
2344 struct blk_mq_ctx *__ctx = per_cpu_ptr(q->queue_ctx, i); 2350 struct blk_mq_ctx *__ctx = per_cpu_ptr(q->queue_ctx, i);
2345 struct blk_mq_hw_ctx *hctx; 2351 struct blk_mq_hw_ctx *hctx;
2352 int k;
2346 2353
2347 __ctx->cpu = i; 2354 __ctx->cpu = i;
2348 spin_lock_init(&__ctx->lock); 2355 spin_lock_init(&__ctx->lock);
2349 INIT_LIST_HEAD(&__ctx->rq_list); 2356 for (k = HCTX_TYPE_DEFAULT; k < HCTX_MAX_TYPES; k++)
2357 INIT_LIST_HEAD(&__ctx->rq_lists[k]);
2358
2350 __ctx->queue = q; 2359 __ctx->queue = q;
2351 2360
2352 /* 2361 /*
diff --git a/block/blk-mq.h b/block/blk-mq.h
index d1ed096723fb..d943d46b0785 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -18,8 +18,8 @@ struct blk_mq_ctxs {
18struct blk_mq_ctx { 18struct blk_mq_ctx {
19 struct { 19 struct {
20 spinlock_t lock; 20 spinlock_t lock;
21 struct list_head rq_list; 21 struct list_head rq_lists[HCTX_MAX_TYPES];
22 } ____cacheline_aligned_in_smp; 22 } ____cacheline_aligned_in_smp;
23 23
24 unsigned int cpu; 24 unsigned int cpu;
25 unsigned short index_hw[HCTX_MAX_TYPES]; 25 unsigned short index_hw[HCTX_MAX_TYPES];