aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2014-07-01 12:34:38 -0400
committerJens Axboe <axboe@fb.com>2014-07-01 12:34:38 -0400
commitadd703fda981b9719d37f371498b9f129acbd997 (patch)
tree905e2f2f3a7753536a83f9c4b047e44f039a4bfe
parent72d6f02a8d4e0dda74de3a541b1c4ae82f5f7b45 (diff)
blk-mq: use percpu_ref for mq usage count
Currently, blk-mq uses a percpu_counter to keep track of how many usages are in flight. The percpu_counter is drained while freezing to ensure that no usage is left in-flight after freezing is complete. blk_mq_queue_enter/exit() and blk_mq_[un]freeze_queue() implement this per-cpu gating mechanism. This type of code has relatively high chance of subtle bugs which are extremely difficult to trigger and it's way too hairy to be open coded in blk-mq. percpu_ref can serve the same purpose after the recent changes. This patch replaces the open-coded per-cpu usage counting and draining mechanism with percpu_ref. blk_mq_queue_enter() performs tryget_live on the ref and exit() performs put. blk_mq_freeze_queue() kills the ref and waits until the reference count reaches zero. blk_mq_unfreeze_queue() revives the ref and wakes up the waiters. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Jens Axboe <axboe@kernel.dk> Cc: Nicholas A. Bellinger <nab@linux-iscsi.org> Cc: Kent Overstreet <kmo@daterainc.com> Signed-off-by: Jens Axboe <axboe@fb.com>
-rw-r--r--block/blk-mq.c68
-rw-r--r--include/linux/blkdev.h3
2 files changed, 31 insertions, 40 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 22682fb4be65..5189cb1e478a 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -78,34 +78,32 @@ static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx,
78 78
79static int blk_mq_queue_enter(struct request_queue *q) 79static int blk_mq_queue_enter(struct request_queue *q)
80{ 80{
81 int ret; 81 while (true) {
82 82 int ret;
83 __percpu_counter_add(&q->mq_usage_counter, 1, 1000000);
84 smp_mb();
85
86 /* we have problems freezing the queue if it's initializing */
87 if (!q->mq_freeze_depth)
88 return 0;
89
90 __percpu_counter_add(&q->mq_usage_counter, -1, 1000000);
91 83
92 spin_lock_irq(q->queue_lock); 84 if (percpu_ref_tryget_live(&q->mq_usage_counter))
93 ret = wait_event_interruptible_lock_irq(q->mq_freeze_wq, 85 return 0;
94 !q->mq_freeze_depth || blk_queue_dying(q),
95 *q->queue_lock);
96 /* inc usage with lock hold to avoid freeze_queue runs here */
97 if (!ret && !blk_queue_dying(q))
98 __percpu_counter_add(&q->mq_usage_counter, 1, 1000000);
99 else if (blk_queue_dying(q))
100 ret = -ENODEV;
101 spin_unlock_irq(q->queue_lock);
102 86
103 return ret; 87 ret = wait_event_interruptible(q->mq_freeze_wq,
88 !q->mq_freeze_depth || blk_queue_dying(q));
89 if (blk_queue_dying(q))
90 return -ENODEV;
91 if (ret)
92 return ret;
93 }
104} 94}
105 95
106static void blk_mq_queue_exit(struct request_queue *q) 96static void blk_mq_queue_exit(struct request_queue *q)
107{ 97{
108 __percpu_counter_add(&q->mq_usage_counter, -1, 1000000); 98 percpu_ref_put(&q->mq_usage_counter);
99}
100
101static void blk_mq_usage_counter_release(struct percpu_ref *ref)
102{
103 struct request_queue *q =
104 container_of(ref, struct request_queue, mq_usage_counter);
105
106 wake_up_all(&q->mq_freeze_wq);
109} 107}
110 108
111/* 109/*
@@ -118,18 +116,9 @@ void blk_mq_freeze_queue(struct request_queue *q)
118 q->mq_freeze_depth++; 116 q->mq_freeze_depth++;
119 spin_unlock_irq(q->queue_lock); 117 spin_unlock_irq(q->queue_lock);
120 118
121 while (true) { 119 percpu_ref_kill(&q->mq_usage_counter);
122 s64 count; 120 blk_mq_run_queues(q, false);
123 121 wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->mq_usage_counter));
124 spin_lock_irq(q->queue_lock);
125 count = percpu_counter_sum(&q->mq_usage_counter);
126 spin_unlock_irq(q->queue_lock);
127
128 if (count == 0)
129 break;
130 blk_mq_start_hw_queues(q);
131 msleep(10);
132 }
133} 122}
134 123
135static void blk_mq_unfreeze_queue(struct request_queue *q) 124static void blk_mq_unfreeze_queue(struct request_queue *q)
@@ -140,8 +129,10 @@ static void blk_mq_unfreeze_queue(struct request_queue *q)
140 wake = !--q->mq_freeze_depth; 129 wake = !--q->mq_freeze_depth;
141 WARN_ON_ONCE(q->mq_freeze_depth < 0); 130 WARN_ON_ONCE(q->mq_freeze_depth < 0);
142 spin_unlock_irq(q->queue_lock); 131 spin_unlock_irq(q->queue_lock);
143 if (wake) 132 if (wake) {
133 percpu_ref_reinit(&q->mq_usage_counter);
144 wake_up_all(&q->mq_freeze_wq); 134 wake_up_all(&q->mq_freeze_wq);
135 }
145} 136}
146 137
147bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx) 138bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
@@ -1785,7 +1776,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
1785 if (!q) 1776 if (!q)
1786 goto err_hctxs; 1777 goto err_hctxs;
1787 1778
1788 if (percpu_counter_init(&q->mq_usage_counter, 0)) 1779 if (percpu_ref_init(&q->mq_usage_counter, blk_mq_usage_counter_release))
1789 goto err_map; 1780 goto err_map;
1790 1781
1791 setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q); 1782 setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q);
@@ -1878,7 +1869,7 @@ void blk_mq_free_queue(struct request_queue *q)
1878 blk_mq_exit_hw_queues(q, set, set->nr_hw_queues); 1869 blk_mq_exit_hw_queues(q, set, set->nr_hw_queues);
1879 blk_mq_free_hw_queues(q, set); 1870 blk_mq_free_hw_queues(q, set);
1880 1871
1881 percpu_counter_destroy(&q->mq_usage_counter); 1872 percpu_ref_exit(&q->mq_usage_counter);
1882 1873
1883 free_percpu(q->queue_ctx); 1874 free_percpu(q->queue_ctx);
1884 kfree(q->queue_hw_ctx); 1875 kfree(q->queue_hw_ctx);
@@ -2037,8 +2028,7 @@ static int __init blk_mq_init(void)
2037{ 2028{
2038 blk_mq_cpu_init(); 2029 blk_mq_cpu_init();
2039 2030
2040 /* Must be called after percpu_counter_hotcpu_callback() */ 2031 hotcpu_notifier(blk_mq_queue_reinit_notify, 0);
2041 hotcpu_notifier(blk_mq_queue_reinit_notify, -10);
2042 2032
2043 return 0; 2033 return 0;
2044} 2034}
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c8f344ff74fe..518b46555b80 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -21,6 +21,7 @@
21#include <linux/bsg.h> 21#include <linux/bsg.h>
22#include <linux/smp.h> 22#include <linux/smp.h>
23#include <linux/rcupdate.h> 23#include <linux/rcupdate.h>
24#include <linux/percpu-refcount.h>
24 25
25#include <asm/scatterlist.h> 26#include <asm/scatterlist.h>
26 27
@@ -484,7 +485,7 @@ struct request_queue {
484#endif 485#endif
485 struct rcu_head rcu_head; 486 struct rcu_head rcu_head;
486 wait_queue_head_t mq_freeze_wq; 487 wait_queue_head_t mq_freeze_wq;
487 struct percpu_counter mq_usage_counter; 488 struct percpu_ref mq_usage_counter;
488 struct list_head all_q_node; 489 struct list_head all_q_node;
489 490
490 struct blk_mq_tag_set *tag_set; 491 struct blk_mq_tag_set *tag_set;