aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2012-04-19 19:29:24 -0400
committerJens Axboe <axboe@kernel.dk>2012-04-20 04:06:40 -0400
commita637120e49021d197e9578cba545bbaa459cbb51 (patch)
tree0d502a8fcc55c89eb4d79a7578e46a9273d1f2c8 /block
parent496fb7806d616185a46865a4f3a20ef19dc6c7e3 (diff)
blkcg: use radix tree to index blkgs from blkcg
blkg lookup is currently performed by traversing linked list anchored at blkcg->blkg_list. This is very unscalable and with blk-throttle enabled and enough request queues on the system, this can get very ugly quickly (blk-throttle performs look up on every bio submission). This patch makes blkcg use radix tree to index blkgs combined with simple last-looked-up hint. This is mostly identical to how icqs are indexed from ioc. Note that because __blkg_lookup() may be invoked without holding queue lock, hint is only updated from __blkg_lookup_create(). Due to cfq's cfqq caching, this makes hint updates overly lazy. This will be improved with scheduled blkcg aware request allocation. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Vivek Goyal <vgoyal@redhat.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block')
-rw-r--r--block/blk-cgroup.c52
-rw-r--r--block/blk-cgroup.h6
2 files changed, 50 insertions, 8 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 30a7a9c58b38..02cf6335e9bd 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -142,11 +142,21 @@ static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg,
142 struct request_queue *q) 142 struct request_queue *q)
143{ 143{
144 struct blkcg_gq *blkg; 144 struct blkcg_gq *blkg;
145 struct hlist_node *n;
146 145
147 hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) 146 blkg = rcu_dereference(blkcg->blkg_hint);
148 if (blkg->q == q) 147 if (blkg && blkg->q == q)
149 return blkg; 148 return blkg;
149
150 /*
151 * Hint didn't match. Look up from the radix tree. Note that we
152 * may not be holding queue_lock and thus are not sure whether
153 * @blkg from blkg_tree has already been removed or not, so we
154 * can't update hint to the lookup result. Leave it to the caller.
155 */
156 blkg = radix_tree_lookup(&blkcg->blkg_tree, q->id);
157 if (blkg && blkg->q == q)
158 return blkg;
159
150 return NULL; 160 return NULL;
151} 161}
152 162
@@ -179,9 +189,12 @@ static struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
179 WARN_ON_ONCE(!rcu_read_lock_held()); 189 WARN_ON_ONCE(!rcu_read_lock_held());
180 lockdep_assert_held(q->queue_lock); 190 lockdep_assert_held(q->queue_lock);
181 191
192 /* lookup and update hint on success, see __blkg_lookup() for details */
182 blkg = __blkg_lookup(blkcg, q); 193 blkg = __blkg_lookup(blkcg, q);
183 if (blkg) 194 if (blkg) {
195 rcu_assign_pointer(blkcg->blkg_hint, blkg);
184 return blkg; 196 return blkg;
197 }
185 198
186 /* blkg holds a reference to blkcg */ 199 /* blkg holds a reference to blkcg */
187 if (!css_tryget(&blkcg->css)) 200 if (!css_tryget(&blkcg->css))
@@ -194,12 +207,24 @@ static struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg,
194 goto err_put; 207 goto err_put;
195 208
196 /* insert */ 209 /* insert */
210 ret = radix_tree_preload(GFP_ATOMIC);
211 if (ret)
212 goto err_free;
213
197 spin_lock(&blkcg->lock); 214 spin_lock(&blkcg->lock);
198 hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list); 215 ret = radix_tree_insert(&blkcg->blkg_tree, q->id, blkg);
199 list_add(&blkg->q_node, &q->blkg_list); 216 if (likely(!ret)) {
217 hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
218 list_add(&blkg->q_node, &q->blkg_list);
219 }
200 spin_unlock(&blkcg->lock); 220 spin_unlock(&blkcg->lock);
201 return blkg;
202 221
222 radix_tree_preload_end();
223
224 if (!ret)
225 return blkg;
226err_free:
227 blkg_free(blkg);
203err_put: 228err_put:
204 css_put(&blkcg->css); 229 css_put(&blkcg->css);
205 return ERR_PTR(ret); 230 return ERR_PTR(ret);
@@ -229,10 +254,20 @@ static void blkg_destroy(struct blkcg_gq *blkg)
229 /* Something wrong if we are trying to remove same group twice */ 254 /* Something wrong if we are trying to remove same group twice */
230 WARN_ON_ONCE(list_empty(&blkg->q_node)); 255 WARN_ON_ONCE(list_empty(&blkg->q_node));
231 WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node)); 256 WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node));
257
258 radix_tree_delete(&blkcg->blkg_tree, blkg->q->id);
232 list_del_init(&blkg->q_node); 259 list_del_init(&blkg->q_node);
233 hlist_del_init_rcu(&blkg->blkcg_node); 260 hlist_del_init_rcu(&blkg->blkcg_node);
234 261
235 /* 262 /*
263 * Both setting lookup hint to and clearing it from @blkg are done
264 * under queue_lock. If it's not pointing to @blkg now, it never
265 * will. Hint assignment itself can race safely.
266 */
267 if (rcu_dereference_raw(blkcg->blkg_hint) == blkg)
268 rcu_assign_pointer(blkcg->blkg_hint, NULL);
269
270 /*
236 * Put the reference taken at the time of creation so that when all 271 * Put the reference taken at the time of creation so that when all
237 * queues are gone, group can be destroyed. 272 * queues are gone, group can be destroyed.
238 */ 273 */
@@ -593,6 +628,7 @@ static struct cgroup_subsys_state *blkcg_create(struct cgroup *cgroup)
593 blkcg->id = atomic64_inc_return(&id_seq); /* root is 0, start from 1 */ 628 blkcg->id = atomic64_inc_return(&id_seq); /* root is 0, start from 1 */
594done: 629done:
595 spin_lock_init(&blkcg->lock); 630 spin_lock_init(&blkcg->lock);
631 INIT_RADIX_TREE(&blkcg->blkg_tree, GFP_ATOMIC);
596 INIT_HLIST_HEAD(&blkcg->blkg_list); 632 INIT_HLIST_HEAD(&blkcg->blkg_list);
597 633
598 return &blkcg->css; 634 return &blkcg->css;
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 44cb9086ed42..8ac457ce7783 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -16,6 +16,7 @@
16#include <linux/cgroup.h> 16#include <linux/cgroup.h>
17#include <linux/u64_stats_sync.h> 17#include <linux/u64_stats_sync.h>
18#include <linux/seq_file.h> 18#include <linux/seq_file.h>
19#include <linux/radix-tree.h>
19 20
20/* Max limits for throttle policy */ 21/* Max limits for throttle policy */
21#define THROTL_IOPS_MAX UINT_MAX 22#define THROTL_IOPS_MAX UINT_MAX
@@ -37,9 +38,14 @@ enum blkg_rwstat_type {
37 BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR, 38 BLKG_RWSTAT_TOTAL = BLKG_RWSTAT_NR,
38}; 39};
39 40
41struct blkcg_gq;
42
40struct blkcg { 43struct blkcg {
41 struct cgroup_subsys_state css; 44 struct cgroup_subsys_state css;
42 spinlock_t lock; 45 spinlock_t lock;
46
47 struct radix_tree_root blkg_tree;
48 struct blkcg_gq *blkg_hint;
43 struct hlist_head blkg_list; 49 struct hlist_head blkg_list;
44 50
45 /* for policies to test whether associated blkcg has changed */ 51 /* for policies to test whether associated blkcg has changed */