diff options
author | Tejun Heo <tj@kernel.org> | 2014-07-01 12:34:38 -0400 |
---|---|---|
committer | Jens Axboe <axboe@fb.com> | 2014-07-01 12:34:38 -0400 |
commit | add703fda981b9719d37f371498b9f129acbd997 (patch) | |
tree | 905e2f2f3a7753536a83f9c4b047e44f039a4bfe | |
parent | 72d6f02a8d4e0dda74de3a541b1c4ae82f5f7b45 (diff) |
blk-mq: use percpu_ref for mq usage count
Currently, blk-mq uses a percpu_counter to keep track of how many
usages are in flight. The percpu_counter is drained while freezing to
ensure that no usage is left in-flight after freezing is complete.
blk_mq_queue_enter/exit() and blk_mq_[un]freeze_queue() implement this
per-cpu gating mechanism.
This type of code has relatively high chance of subtle bugs which are
extremely difficult to trigger and it's way too hairy to be open coded
in blk-mq. percpu_ref can serve the same purpose after the recent
changes. This patch replaces the open-coded per-cpu usage counting
and draining mechanism with percpu_ref.
blk_mq_queue_enter() performs tryget_live on the ref and exit()
performs put. blk_mq_freeze_queue() kills the ref and waits until the
reference count reaches zero. blk_mq_unfreeze_queue() revives the ref
and wakes up the waiters.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Nicholas A. Bellinger <nab@linux-iscsi.org>
Cc: Kent Overstreet <kmo@daterainc.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
-rw-r--r-- | block/blk-mq.c | 68 | ||||
-rw-r--r-- | include/linux/blkdev.h | 3 |
2 files changed, 31 insertions, 40 deletions
diff --git a/block/blk-mq.c b/block/blk-mq.c index 22682fb4be65..5189cb1e478a 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c | |||
@@ -78,34 +78,32 @@ static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx, | |||
78 | 78 | ||
79 | static int blk_mq_queue_enter(struct request_queue *q) | 79 | static int blk_mq_queue_enter(struct request_queue *q) |
80 | { | 80 | { |
81 | int ret; | 81 | while (true) { |
82 | 82 | int ret; | |
83 | __percpu_counter_add(&q->mq_usage_counter, 1, 1000000); | ||
84 | smp_mb(); | ||
85 | |||
86 | /* we have problems freezing the queue if it's initializing */ | ||
87 | if (!q->mq_freeze_depth) | ||
88 | return 0; | ||
89 | |||
90 | __percpu_counter_add(&q->mq_usage_counter, -1, 1000000); | ||
91 | 83 | ||
92 | spin_lock_irq(q->queue_lock); | 84 | if (percpu_ref_tryget_live(&q->mq_usage_counter)) |
93 | ret = wait_event_interruptible_lock_irq(q->mq_freeze_wq, | 85 | return 0; |
94 | !q->mq_freeze_depth || blk_queue_dying(q), | ||
95 | *q->queue_lock); | ||
96 | /* inc usage with lock hold to avoid freeze_queue runs here */ | ||
97 | if (!ret && !blk_queue_dying(q)) | ||
98 | __percpu_counter_add(&q->mq_usage_counter, 1, 1000000); | ||
99 | else if (blk_queue_dying(q)) | ||
100 | ret = -ENODEV; | ||
101 | spin_unlock_irq(q->queue_lock); | ||
102 | 86 | ||
103 | return ret; | 87 | ret = wait_event_interruptible(q->mq_freeze_wq, |
88 | !q->mq_freeze_depth || blk_queue_dying(q)); | ||
89 | if (blk_queue_dying(q)) | ||
90 | return -ENODEV; | ||
91 | if (ret) | ||
92 | return ret; | ||
93 | } | ||
104 | } | 94 | } |
105 | 95 | ||
106 | static void blk_mq_queue_exit(struct request_queue *q) | 96 | static void blk_mq_queue_exit(struct request_queue *q) |
107 | { | 97 | { |
108 | __percpu_counter_add(&q->mq_usage_counter, -1, 1000000); | 98 | percpu_ref_put(&q->mq_usage_counter); |
99 | } | ||
100 | |||
101 | static void blk_mq_usage_counter_release(struct percpu_ref *ref) | ||
102 | { | ||
103 | struct request_queue *q = | ||
104 | container_of(ref, struct request_queue, mq_usage_counter); | ||
105 | |||
106 | wake_up_all(&q->mq_freeze_wq); | ||
109 | } | 107 | } |
110 | 108 | ||
111 | /* | 109 | /* |
@@ -118,18 +116,9 @@ void blk_mq_freeze_queue(struct request_queue *q) | |||
118 | q->mq_freeze_depth++; | 116 | q->mq_freeze_depth++; |
119 | spin_unlock_irq(q->queue_lock); | 117 | spin_unlock_irq(q->queue_lock); |
120 | 118 | ||
121 | while (true) { | 119 | percpu_ref_kill(&q->mq_usage_counter); |
122 | s64 count; | 120 | blk_mq_run_queues(q, false); |
123 | 121 | wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->mq_usage_counter)); | |
124 | spin_lock_irq(q->queue_lock); | ||
125 | count = percpu_counter_sum(&q->mq_usage_counter); | ||
126 | spin_unlock_irq(q->queue_lock); | ||
127 | |||
128 | if (count == 0) | ||
129 | break; | ||
130 | blk_mq_start_hw_queues(q); | ||
131 | msleep(10); | ||
132 | } | ||
133 | } | 122 | } |
134 | 123 | ||
135 | static void blk_mq_unfreeze_queue(struct request_queue *q) | 124 | static void blk_mq_unfreeze_queue(struct request_queue *q) |
@@ -140,8 +129,10 @@ static void blk_mq_unfreeze_queue(struct request_queue *q) | |||
140 | wake = !--q->mq_freeze_depth; | 129 | wake = !--q->mq_freeze_depth; |
141 | WARN_ON_ONCE(q->mq_freeze_depth < 0); | 130 | WARN_ON_ONCE(q->mq_freeze_depth < 0); |
142 | spin_unlock_irq(q->queue_lock); | 131 | spin_unlock_irq(q->queue_lock); |
143 | if (wake) | 132 | if (wake) { |
133 | percpu_ref_reinit(&q->mq_usage_counter); | ||
144 | wake_up_all(&q->mq_freeze_wq); | 134 | wake_up_all(&q->mq_freeze_wq); |
135 | } | ||
145 | } | 136 | } |
146 | 137 | ||
147 | bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx) | 138 | bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx) |
@@ -1785,7 +1776,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) | |||
1785 | if (!q) | 1776 | if (!q) |
1786 | goto err_hctxs; | 1777 | goto err_hctxs; |
1787 | 1778 | ||
1788 | if (percpu_counter_init(&q->mq_usage_counter, 0)) | 1779 | if (percpu_ref_init(&q->mq_usage_counter, blk_mq_usage_counter_release)) |
1789 | goto err_map; | 1780 | goto err_map; |
1790 | 1781 | ||
1791 | setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q); | 1782 | setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q); |
@@ -1878,7 +1869,7 @@ void blk_mq_free_queue(struct request_queue *q) | |||
1878 | blk_mq_exit_hw_queues(q, set, set->nr_hw_queues); | 1869 | blk_mq_exit_hw_queues(q, set, set->nr_hw_queues); |
1879 | blk_mq_free_hw_queues(q, set); | 1870 | blk_mq_free_hw_queues(q, set); |
1880 | 1871 | ||
1881 | percpu_counter_destroy(&q->mq_usage_counter); | 1872 | percpu_ref_exit(&q->mq_usage_counter); |
1882 | 1873 | ||
1883 | free_percpu(q->queue_ctx); | 1874 | free_percpu(q->queue_ctx); |
1884 | kfree(q->queue_hw_ctx); | 1875 | kfree(q->queue_hw_ctx); |
@@ -2037,8 +2028,7 @@ static int __init blk_mq_init(void) | |||
2037 | { | 2028 | { |
2038 | blk_mq_cpu_init(); | 2029 | blk_mq_cpu_init(); |
2039 | 2030 | ||
2040 | /* Must be called after percpu_counter_hotcpu_callback() */ | 2031 | hotcpu_notifier(blk_mq_queue_reinit_notify, 0); |
2041 | hotcpu_notifier(blk_mq_queue_reinit_notify, -10); | ||
2042 | 2032 | ||
2043 | return 0; | 2033 | return 0; |
2044 | } | 2034 | } |
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c8f344ff74fe..518b46555b80 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h | |||
@@ -21,6 +21,7 @@ | |||
21 | #include <linux/bsg.h> | 21 | #include <linux/bsg.h> |
22 | #include <linux/smp.h> | 22 | #include <linux/smp.h> |
23 | #include <linux/rcupdate.h> | 23 | #include <linux/rcupdate.h> |
24 | #include <linux/percpu-refcount.h> | ||
24 | 25 | ||
25 | #include <asm/scatterlist.h> | 26 | #include <asm/scatterlist.h> |
26 | 27 | ||
@@ -484,7 +485,7 @@ struct request_queue { | |||
484 | #endif | 485 | #endif |
485 | struct rcu_head rcu_head; | 486 | struct rcu_head rcu_head; |
486 | wait_queue_head_t mq_freeze_wq; | 487 | wait_queue_head_t mq_freeze_wq; |
487 | struct percpu_counter mq_usage_counter; | 488 | struct percpu_ref mq_usage_counter; |
488 | struct list_head all_q_node; | 489 | struct list_head all_q_node; |
489 | 490 | ||
490 | struct blk_mq_tag_set *tag_set; | 491 | struct blk_mq_tag_set *tag_set; |