aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2014-07-01 12:31:13 -0400
committerJens Axboe <axboe@fb.com>2014-07-01 12:31:13 -0400
commit780db2071ac4d167ee4154ad9c96088f1bba044b (patch)
tree87d9cee361861470e3f7a7845c97b2d03cb40411 /block
parent776687bce42bb22cce48b5da950e48ebbb9a948f (diff)
blk-mq: decouble blk-mq freezing from generic bypassing
blk_mq freezing is entangled with generic bypassing which bypasses blkcg and io scheduler and lets IO requests fall through the block layer to the drivers in FIFO order. This allows forward progress on IOs with the advanced features disabled so that those features can be configured or altered without worrying about stalling IO which may lead to deadlock through memory allocation. However, generic bypassing doesn't quite fit blk-mq. blk-mq currently doesn't make use of blkcg or ioscheds and it maps bypssing to freezing, which blocks request processing and drains all the in-flight ones. This causes problems as bypassing assumes that request processing is online. blk-mq works around this by conditionally allowing request processing for the problem case - during queue initialization. Another weirdity is that except for during queue cleanup, bypassing started on the generic side prevents blk-mq from processing new requests but doesn't drain the in-flight ones. This shouldn't break anything but again highlights that something isn't quite right here. The root cause is conflating blk-mq freezing and generic bypassing which are two different mechanisms. The only intersecting purpose that they serve is during queue cleanup. Let's properly separate blk-mq freezing from generic bypassing and simply use it where necessary. * request_queue->mq_freeze_depth is added and blk_mq_[un]freeze_queue() now operate on this counter instead of ->bypass_depth. The replacement for QUEUE_FLAG_BYPASS isn't added but the counter is tested directly. This will be further updated by later changes. * blk_mq_drain_queue() is dropped and "__" prefix is dropped from blk_mq_freeze_queue(). Queue cleanup path now calls blk_mq_freeze_queue() directly. * blk_queue_enter()'s fast path condition is simplified to simply check @q->mq_freeze_depth. Previously, the condition was !blk_queue_dying(q) && (!blk_queue_bypass(q) || !blk_queue_init_done(q)) mq_freeze_depth is incremented right after dying is set and blk_queue_init_done() exception isn't necessary as blk-mq doesn't start frozen, which only leaves the blk_queue_bypass() test which can be replaced by @q->mq_freeze_depth test. This change simplifies the code and reduces confusion in the area. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Jens Axboe <axboe@kernel.dk> Cc: Nicholas A. Bellinger <nab@linux-iscsi.org> Signed-off-by: Jens Axboe <axboe@fb.com>
Diffstat (limited to 'block')
-rw-r--r--block/blk-core.c2
-rw-r--r--block/blk-mq.c17
-rw-r--r--block/blk-mq.h2
3 files changed, 8 insertions, 13 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index 0d0bdd65b2d7..c359d72e9d76 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -514,7 +514,7 @@ void blk_cleanup_queue(struct request_queue *q)
514 * prevent that q->request_fn() gets invoked after draining finished. 514 * prevent that q->request_fn() gets invoked after draining finished.
515 */ 515 */
516 if (q->mq_ops) { 516 if (q->mq_ops) {
517 blk_mq_drain_queue(q); 517 blk_mq_freeze_queue(q);
518 spin_lock_irq(lock); 518 spin_lock_irq(lock);
519 } else { 519 } else {
520 spin_lock_irq(lock); 520 spin_lock_irq(lock);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index f4bdddd7ed99..1e324a123d40 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -84,15 +84,14 @@ static int blk_mq_queue_enter(struct request_queue *q)
84 smp_mb(); 84 smp_mb();
85 85
86 /* we have problems freezing the queue if it's initializing */ 86 /* we have problems freezing the queue if it's initializing */
87 if (!blk_queue_dying(q) && 87 if (!q->mq_freeze_depth)
88 (!blk_queue_bypass(q) || !blk_queue_init_done(q)))
89 return 0; 88 return 0;
90 89
91 __percpu_counter_add(&q->mq_usage_counter, -1, 1000000); 90 __percpu_counter_add(&q->mq_usage_counter, -1, 1000000);
92 91
93 spin_lock_irq(q->queue_lock); 92 spin_lock_irq(q->queue_lock);
94 ret = wait_event_interruptible_lock_irq(q->mq_freeze_wq, 93 ret = wait_event_interruptible_lock_irq(q->mq_freeze_wq,
95 !blk_queue_bypass(q) || blk_queue_dying(q), 94 !q->mq_freeze_depth || blk_queue_dying(q),
96 *q->queue_lock); 95 *q->queue_lock);
97 /* inc usage with lock hold to avoid freeze_queue runs here */ 96 /* inc usage with lock hold to avoid freeze_queue runs here */
98 if (!ret && !blk_queue_dying(q)) 97 if (!ret && !blk_queue_dying(q))
@@ -129,11 +128,10 @@ void blk_mq_drain_queue(struct request_queue *q)
129 * Guarantee no request is in use, so we can change any data structure of 128 * Guarantee no request is in use, so we can change any data structure of
130 * the queue afterward. 129 * the queue afterward.
131 */ 130 */
132static void blk_mq_freeze_queue(struct request_queue *q) 131void blk_mq_freeze_queue(struct request_queue *q)
133{ 132{
134 spin_lock_irq(q->queue_lock); 133 spin_lock_irq(q->queue_lock);
135 q->bypass_depth++; 134 q->mq_freeze_depth++;
136 queue_flag_set(QUEUE_FLAG_BYPASS, q);
137 spin_unlock_irq(q->queue_lock); 135 spin_unlock_irq(q->queue_lock);
138 136
139 blk_mq_drain_queue(q); 137 blk_mq_drain_queue(q);
@@ -144,11 +142,8 @@ static void blk_mq_unfreeze_queue(struct request_queue *q)
144 bool wake = false; 142 bool wake = false;
145 143
146 spin_lock_irq(q->queue_lock); 144 spin_lock_irq(q->queue_lock);
147 if (!--q->bypass_depth) { 145 wake = !--q->mq_freeze_depth;
148 queue_flag_clear(QUEUE_FLAG_BYPASS, q); 146 WARN_ON_ONCE(q->mq_freeze_depth < 0);
149 wake = true;
150 }
151 WARN_ON_ONCE(q->bypass_depth < 0);
152 spin_unlock_irq(q->queue_lock); 147 spin_unlock_irq(q->queue_lock);
153 if (wake) 148 if (wake)
154 wake_up_all(&q->mq_freeze_wq); 149 wake_up_all(&q->mq_freeze_wq);
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 26460884c6cd..ca4964a6295d 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -28,7 +28,7 @@ struct blk_mq_ctx {
28void __blk_mq_complete_request(struct request *rq); 28void __blk_mq_complete_request(struct request *rq);
29void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); 29void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
30void blk_mq_init_flush(struct request_queue *q); 30void blk_mq_init_flush(struct request_queue *q);
31void blk_mq_drain_queue(struct request_queue *q); 31void blk_mq_freeze_queue(struct request_queue *q);
32void blk_mq_free_queue(struct request_queue *q); 32void blk_mq_free_queue(struct request_queue *q);
33void blk_mq_clone_flush_request(struct request *flush_rq, 33void blk_mq_clone_flush_request(struct request *flush_rq,
34 struct request *orig_rq); 34 struct request *orig_rq);