diff options
author | Tejun Heo <tj@kernel.org> | 2014-09-23 15:24:32 -0400 |
---|---|---|
committer | Jens Axboe <axboe@fb.com> | 2014-09-24 10:29:36 -0400 |
commit | 0a30288da1aec914e158c2d7a3482a85f632750f (patch) | |
tree | 748c41c772a551dd7c94d46770aadd8a06ec8013 | |
parent | 452b6361c4d9baf6940adb7b1316e0f386c39799 (diff) |
blk-mq, percpu_ref: implement a kludge for SCSI blk-mq stall during probe
blk-mq uses percpu_ref for its usage counter which tracks the number
of in-flight commands and used to synchronously drain the queue on
freeze. percpu_ref shutdown takes measureable wallclock time as it
involves a sched RCU grace period. This means that draining a blk-mq
takes measureable wallclock time. One would think that this shouldn't
matter as queue shutdown should be a rare event which takes place
asynchronously w.r.t. userland.
Unfortunately, SCSI probing involves synchronously setting up and then
tearing down a lot of request_queues back-to-back for non-existent
LUNs. This means that SCSI probing may take more than ten seconds
when scsi-mq is used.
This will be properly fixed by implementing a mechanism to keep
q->mq_usage_counter in atomic mode till genhd registration; however,
that involves rather big updates to percpu_ref which is difficult to
apply late in the devel cycle (v3.17-rc6 at the moment). As a
stop-gap measure till the proper fix can be implemented in the next
cycle, this patch introduces __percpu_ref_kill_expedited() and makes
blk_mq_freeze_queue() use it. This is heavy-handed but should work
for testing the experimental SCSI blk-mq implementation.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Christoph Hellwig <hch@infradead.org>
Link: http://lkml.kernel.org/g/20140919113815.GA10791@lst.de
Fixes: add703fda981 ("blk-mq: use percpu_ref for mq usage count")
Cc: Kent Overstreet <kmo@daterainc.com>
Cc: Jens Axboe <axboe@kernel.dk>
Tested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
-rw-r--r-- | block/blk-mq.c | 11 | ||||
-rw-r--r-- | include/linux/percpu-refcount.h | 1 | ||||
-rw-r--r-- | lib/percpu-refcount.c | 16 |
3 files changed, 27 insertions, 1 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c index c88e6089746d..df8e1e09dd17 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c | |||
@@ -119,7 +119,16 @@ void blk_mq_freeze_queue(struct request_queue *q) | |||
119 | spin_unlock_irq(q->queue_lock); | 119 | spin_unlock_irq(q->queue_lock); |
120 | 120 | ||
121 | if (freeze) { | 121 | if (freeze) { |
122 | percpu_ref_kill(&q->mq_usage_counter); | 122 | /* |
123 | * XXX: Temporary kludge to work around SCSI blk-mq stall. | ||
124 | * SCSI synchronously creates and destroys many queues | ||
125 | * back-to-back during probe leading to lengthy stalls. | ||
126 | * This will be fixed by keeping ->mq_usage_counter in | ||
127 | * atomic mode until genhd registration, but, for now, | ||
128 | * let's work around using expedited synchronization. | ||
129 | */ | ||
130 | __percpu_ref_kill_expedited(&q->mq_usage_counter); | ||
131 | |||
123 | blk_mq_run_queues(q, false); | 132 | blk_mq_run_queues(q, false); |
124 | } | 133 | } |
125 | wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->mq_usage_counter)); | 134 | wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->mq_usage_counter)); |
diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index 3dfbf237cd8f..ef5894ca8e50 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h | |||
@@ -71,6 +71,7 @@ void percpu_ref_reinit(struct percpu_ref *ref); | |||
71 | void percpu_ref_exit(struct percpu_ref *ref); | 71 | void percpu_ref_exit(struct percpu_ref *ref); |
72 | void percpu_ref_kill_and_confirm(struct percpu_ref *ref, | 72 | void percpu_ref_kill_and_confirm(struct percpu_ref *ref, |
73 | percpu_ref_func_t *confirm_kill); | 73 | percpu_ref_func_t *confirm_kill); |
74 | void __percpu_ref_kill_expedited(struct percpu_ref *ref); | ||
74 | 75 | ||
75 | /** | 76 | /** |
76 | * percpu_ref_kill - drop the initial ref | 77 | * percpu_ref_kill - drop the initial ref |
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c index fe5a3342e960..a89cf09a8268 100644 --- a/lib/percpu-refcount.c +++ b/lib/percpu-refcount.c | |||
@@ -184,3 +184,19 @@ void percpu_ref_kill_and_confirm(struct percpu_ref *ref, | |||
184 | call_rcu_sched(&ref->rcu, percpu_ref_kill_rcu); | 184 | call_rcu_sched(&ref->rcu, percpu_ref_kill_rcu); |
185 | } | 185 | } |
186 | EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm); | 186 | EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm); |
187 | |||
188 | /* | ||
189 | * XXX: Temporary kludge to work around SCSI blk-mq stall. Used only by | ||
190 | * block/blk-mq.c::blk_mq_freeze_queue(). Will be removed during v3.18 | ||
191 | * devel cycle. Do not use anywhere else. | ||
192 | */ | ||
193 | void __percpu_ref_kill_expedited(struct percpu_ref *ref) | ||
194 | { | ||
195 | WARN_ONCE(ref->pcpu_count_ptr & PCPU_REF_DEAD, | ||
196 | "percpu_ref_kill() called more than once on %pf!", | ||
197 | ref->release); | ||
198 | |||
199 | ref->pcpu_count_ptr |= PCPU_REF_DEAD; | ||
200 | synchronize_sched_expedited(); | ||
201 | percpu_ref_kill_rcu(&ref->rcu); | ||
202 | } | ||