aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2014-09-23 15:24:32 -0400
committerJens Axboe <axboe@fb.com>2014-09-24 10:29:36 -0400
commit0a30288da1aec914e158c2d7a3482a85f632750f (patch)
tree748c41c772a551dd7c94d46770aadd8a06ec8013
parent452b6361c4d9baf6940adb7b1316e0f386c39799 (diff)
blk-mq, percpu_ref: implement a kludge for SCSI blk-mq stall during probe
blk-mq uses percpu_ref for its usage counter which tracks the number of in-flight commands and used to synchronously drain the queue on freeze. percpu_ref shutdown takes measureable wallclock time as it involves a sched RCU grace period. This means that draining a blk-mq takes measureable wallclock time. One would think that this shouldn't matter as queue shutdown should be a rare event which takes place asynchronously w.r.t. userland. Unfortunately, SCSI probing involves synchronously setting up and then tearing down a lot of request_queues back-to-back for non-existent LUNs. This means that SCSI probing may take more than ten seconds when scsi-mq is used. This will be properly fixed by implementing a mechanism to keep q->mq_usage_counter in atomic mode till genhd registration; however, that involves rather big updates to percpu_ref which is difficult to apply late in the devel cycle (v3.17-rc6 at the moment). As a stop-gap measure till the proper fix can be implemented in the next cycle, this patch introduces __percpu_ref_kill_expedited() and makes blk_mq_freeze_queue() use it. This is heavy-handed but should work for testing the experimental SCSI blk-mq implementation. Signed-off-by: Tejun Heo <tj@kernel.org> Reported-by: Christoph Hellwig <hch@infradead.org> Link: http://lkml.kernel.org/g/20140919113815.GA10791@lst.de Fixes: add703fda981 ("blk-mq: use percpu_ref for mq usage count") Cc: Kent Overstreet <kmo@daterainc.com> Cc: Jens Axboe <axboe@kernel.dk> Tested-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Jens Axboe <axboe@fb.com>
-rw-r--r--block/blk-mq.c11
-rw-r--r--include/linux/percpu-refcount.h1
-rw-r--r--lib/percpu-refcount.c16
3 files changed, 27 insertions, 1 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c
index c88e6089746d..df8e1e09dd17 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -119,7 +119,16 @@ void blk_mq_freeze_queue(struct request_queue *q)
119 spin_unlock_irq(q->queue_lock); 119 spin_unlock_irq(q->queue_lock);
120 120
121 if (freeze) { 121 if (freeze) {
122 percpu_ref_kill(&q->mq_usage_counter); 122 /*
123 * XXX: Temporary kludge to work around SCSI blk-mq stall.
124 * SCSI synchronously creates and destroys many queues
125 * back-to-back during probe leading to lengthy stalls.
126 * This will be fixed by keeping ->mq_usage_counter in
127 * atomic mode until genhd registration, but, for now,
128 * let's work around using expedited synchronization.
129 */
130 __percpu_ref_kill_expedited(&q->mq_usage_counter);
131
123 blk_mq_run_queues(q, false); 132 blk_mq_run_queues(q, false);
124 } 133 }
125 wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->mq_usage_counter)); 134 wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->mq_usage_counter));
diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index 3dfbf237cd8f..ef5894ca8e50 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -71,6 +71,7 @@ void percpu_ref_reinit(struct percpu_ref *ref);
71void percpu_ref_exit(struct percpu_ref *ref); 71void percpu_ref_exit(struct percpu_ref *ref);
72void percpu_ref_kill_and_confirm(struct percpu_ref *ref, 72void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
73 percpu_ref_func_t *confirm_kill); 73 percpu_ref_func_t *confirm_kill);
74void __percpu_ref_kill_expedited(struct percpu_ref *ref);
74 75
75/** 76/**
76 * percpu_ref_kill - drop the initial ref 77 * percpu_ref_kill - drop the initial ref
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index fe5a3342e960..a89cf09a8268 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -184,3 +184,19 @@ void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
184 call_rcu_sched(&ref->rcu, percpu_ref_kill_rcu); 184 call_rcu_sched(&ref->rcu, percpu_ref_kill_rcu);
185} 185}
186EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm); 186EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);
187
188/*
189 * XXX: Temporary kludge to work around SCSI blk-mq stall. Used only by
190 * block/blk-mq.c::blk_mq_freeze_queue(). Will be removed during v3.18
191 * devel cycle. Do not use anywhere else.
192 */
193void __percpu_ref_kill_expedited(struct percpu_ref *ref)
194{
195 WARN_ONCE(ref->pcpu_count_ptr & PCPU_REF_DEAD,
196 "percpu_ref_kill() called more than once on %pf!",
197 ref->release);
198
199 ref->pcpu_count_ptr |= PCPU_REF_DEAD;
200 synchronize_sched_expedited();
201 percpu_ref_kill_rcu(&ref->rcu);
202}