aboutsummaryrefslogtreecommitdiffstats
path: root/block/blk-cgroup.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-07-11 16:03:24 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-07-11 16:03:24 -0400
commit36805aaea5ae3cf1bb32f1643e0a800bb69f0d5b (patch)
tree5565132549a0733772b3a2ac6b5cda516ea8cdce /block/blk-cgroup.c
parent6d2fa9e141ea56a571ec842fd4f3a86bea44a203 (diff)
parentd50235b7bc3ee0a0427984d763ea7534149531b4 (diff)
Merge branch 'for-3.11/core' of git://git.kernel.dk/linux-block
Pull core block IO updates from Jens Axboe: "Here are the core IO block bits for 3.11. It contains: - A tweak to the reserved tag logic from Jan, for weirdo devices with just 3 free tags. But for those it improves things substantially for random writes. - Periodic writeback fix from Jan. Marked for stable as well. - Fix for a race condition in IO scheduler switching from Jianpeng. - The hierarchical blk-cgroup support from Tejun. This is the grunt of the series. - blk-throttle fix from Vivek. Just a note that I'm in the middle of a relocation, whole family is flying out tomorrow. Hence I will be awal the remainder of this week, but back at work again on Monday the 15th. CC'ing Tejun, since any potential "surprises" will most likely be from the blk-cgroup work. But it's been brewing for a while and sitting in my tree and linux-next for a long time, so should be solid." * 'for-3.11/core' of git://git.kernel.dk/linux-block: (36 commits) elevator: Fix a race in elevator switching block: Reserve only one queue tag for sync IO if only 3 tags are available writeback: Fix periodic writeback after fs mount blk-throttle: implement proper hierarchy support blk-throttle: implement throtl_grp->has_rules[] blk-throttle: Account for child group's start time in parent while bio climbs up blk-throttle: add throtl_qnode for dispatch fairness blk-throttle: make throtl_pending_timer_fn() ready for hierarchy blk-throttle: make tg_dispatch_one_bio() ready for hierarchy blk-throttle: make blk_throtl_bio() ready for hierarchy blk-throttle: make blk_throtl_drain() ready for hierarchy blk-throttle: dispatch from throtl_pending_timer_fn() blk-throttle: implement dispatch looping blk-throttle: separate out throtl_service_queue->pending_timer from throtl_data->dispatch_work blk-throttle: set REQ_THROTTLED from throtl_charge_bio() and gate stats update with it blk-throttle: implement sq_to_tg(), sq_to_td() and throtl_log() blk-throttle: add throtl_service_queue->parent_sq blk-throttle: generalize update_disptime optimization in blk_throtl_bio() blk-throttle: dispatch to throtl_data->service_queue.bio_lists[] blk-throttle: move bio_lists[] and friends to throtl_service_queue ...
Diffstat (limited to 'block/blk-cgroup.c')
-rw-r--r--block/blk-cgroup.c105
1 files changed, 40 insertions, 65 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index e8918ffaf96d..290792a13e3c 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -32,26 +32,6 @@ EXPORT_SYMBOL_GPL(blkcg_root);
32 32
33static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS]; 33static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS];
34 34
35static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg,
36 struct request_queue *q, bool update_hint);
37
38/**
39 * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants
40 * @d_blkg: loop cursor pointing to the current descendant
41 * @pos_cgrp: used for iteration
42 * @p_blkg: target blkg to walk descendants of
43 *
44 * Walk @c_blkg through the descendants of @p_blkg. Must be used with RCU
45 * read locked. If called under either blkcg or queue lock, the iteration
46 * is guaranteed to include all and only online blkgs. The caller may
47 * update @pos_cgrp by calling cgroup_rightmost_descendant() to skip
48 * subtree.
49 */
50#define blkg_for_each_descendant_pre(d_blkg, pos_cgrp, p_blkg) \
51 cgroup_for_each_descendant_pre((pos_cgrp), (p_blkg)->blkcg->css.cgroup) \
52 if (((d_blkg) = __blkg_lookup(cgroup_to_blkcg(pos_cgrp), \
53 (p_blkg)->q, false)))
54
55static bool blkcg_policy_enabled(struct request_queue *q, 35static bool blkcg_policy_enabled(struct request_queue *q,
56 const struct blkcg_policy *pol) 36 const struct blkcg_policy *pol)
57{ 37{
@@ -71,18 +51,8 @@ static void blkg_free(struct blkcg_gq *blkg)
71 if (!blkg) 51 if (!blkg)
72 return; 52 return;
73 53
74 for (i = 0; i < BLKCG_MAX_POLS; i++) { 54 for (i = 0; i < BLKCG_MAX_POLS; i++)
75 struct blkcg_policy *pol = blkcg_policy[i]; 55 kfree(blkg->pd[i]);
76 struct blkg_policy_data *pd = blkg->pd[i];
77
78 if (!pd)
79 continue;
80
81 if (pol && pol->pd_exit_fn)
82 pol->pd_exit_fn(blkg);
83
84 kfree(pd);
85 }
86 56
87 blk_exit_rl(&blkg->rl); 57 blk_exit_rl(&blkg->rl);
88 kfree(blkg); 58 kfree(blkg);
@@ -134,10 +104,6 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q,
134 blkg->pd[i] = pd; 104 blkg->pd[i] = pd;
135 pd->blkg = blkg; 105 pd->blkg = blkg;
136 pd->plid = i; 106 pd->plid = i;
137
138 /* invoke per-policy init */
139 if (pol->pd_init_fn)
140 pol->pd_init_fn(blkg);
141 } 107 }
142 108
143 return blkg; 109 return blkg;
@@ -158,8 +124,8 @@ err_free:
158 * @q's bypass state. If @update_hint is %true, the caller should be 124 * @q's bypass state. If @update_hint is %true, the caller should be
159 * holding @q->queue_lock and lookup hint is updated on success. 125 * holding @q->queue_lock and lookup hint is updated on success.
160 */ 126 */
161static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, 127struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, struct request_queue *q,
162 struct request_queue *q, bool update_hint) 128 bool update_hint)
163{ 129{
164 struct blkcg_gq *blkg; 130 struct blkcg_gq *blkg;
165 131
@@ -234,16 +200,25 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
234 } 200 }
235 blkg = new_blkg; 201 blkg = new_blkg;
236 202
237 /* link parent and insert */ 203 /* link parent */
238 if (blkcg_parent(blkcg)) { 204 if (blkcg_parent(blkcg)) {
239 blkg->parent = __blkg_lookup(blkcg_parent(blkcg), q, false); 205 blkg->parent = __blkg_lookup(blkcg_parent(blkcg), q, false);
240 if (WARN_ON_ONCE(!blkg->parent)) { 206 if (WARN_ON_ONCE(!blkg->parent)) {
241 blkg = ERR_PTR(-EINVAL); 207 ret = -EINVAL;
242 goto err_put_css; 208 goto err_put_css;
243 } 209 }
244 blkg_get(blkg->parent); 210 blkg_get(blkg->parent);
245 } 211 }
246 212
213 /* invoke per-policy init */
214 for (i = 0; i < BLKCG_MAX_POLS; i++) {
215 struct blkcg_policy *pol = blkcg_policy[i];
216
217 if (blkg->pd[i] && pol->pd_init_fn)
218 pol->pd_init_fn(blkg);
219 }
220
221 /* insert */
247 spin_lock(&blkcg->lock); 222 spin_lock(&blkcg->lock);
248 ret = radix_tree_insert(&blkcg->blkg_tree, q->id, blkg); 223 ret = radix_tree_insert(&blkcg->blkg_tree, q->id, blkg);
249 if (likely(!ret)) { 224 if (likely(!ret)) {
@@ -394,30 +369,38 @@ static void blkg_destroy_all(struct request_queue *q)
394 q->root_rl.blkg = NULL; 369 q->root_rl.blkg = NULL;
395} 370}
396 371
397static void blkg_rcu_free(struct rcu_head *rcu_head) 372/*
373 * A group is RCU protected, but having an rcu lock does not mean that one
374 * can access all the fields of blkg and assume these are valid. For
375 * example, don't try to follow throtl_data and request queue links.
376 *
377 * Having a reference to blkg under an rcu allows accesses to only values
378 * local to groups like group stats and group rate limits.
379 */
380void __blkg_release_rcu(struct rcu_head *rcu_head)
398{ 381{
399 blkg_free(container_of(rcu_head, struct blkcg_gq, rcu_head)); 382 struct blkcg_gq *blkg = container_of(rcu_head, struct blkcg_gq, rcu_head);
400} 383 int i;
384
385 /* tell policies that this one is being freed */
386 for (i = 0; i < BLKCG_MAX_POLS; i++) {
387 struct blkcg_policy *pol = blkcg_policy[i];
388
389 if (blkg->pd[i] && pol->pd_exit_fn)
390 pol->pd_exit_fn(blkg);
391 }
401 392
402void __blkg_release(struct blkcg_gq *blkg)
403{
404 /* release the blkcg and parent blkg refs this blkg has been holding */ 393 /* release the blkcg and parent blkg refs this blkg has been holding */
405 css_put(&blkg->blkcg->css); 394 css_put(&blkg->blkcg->css);
406 if (blkg->parent) 395 if (blkg->parent) {
396 spin_lock_irq(blkg->q->queue_lock);
407 blkg_put(blkg->parent); 397 blkg_put(blkg->parent);
398 spin_unlock_irq(blkg->q->queue_lock);
399 }
408 400
409 /* 401 blkg_free(blkg);
410 * A group is freed in rcu manner. But having an rcu lock does not
411 * mean that one can access all the fields of blkg and assume these
412 * are valid. For example, don't try to follow throtl_data and
413 * request queue links.
414 *
415 * Having a reference to blkg under an rcu allows acess to only
416 * values local to groups like group stats and group rate limits
417 */
418 call_rcu(&blkg->rcu_head, blkg_rcu_free);
419} 402}
420EXPORT_SYMBOL_GPL(__blkg_release); 403EXPORT_SYMBOL_GPL(__blkg_release_rcu);
421 404
422/* 405/*
423 * The next function used by blk_queue_for_each_rl(). It's a bit tricky 406 * The next function used by blk_queue_for_each_rl(). It's a bit tricky
@@ -928,14 +911,6 @@ struct cgroup_subsys blkio_subsys = {
928 .subsys_id = blkio_subsys_id, 911 .subsys_id = blkio_subsys_id,
929 .base_cftypes = blkcg_files, 912 .base_cftypes = blkcg_files,
930 .module = THIS_MODULE, 913 .module = THIS_MODULE,
931
932 /*
933 * blkio subsystem is utterly broken in terms of hierarchy support.
934 * It treats all cgroups equally regardless of where they're
935 * located in the hierarchy - all cgroups are treated as if they're
936 * right below the root. Fix it and remove the following.
937 */
938 .broken_hierarchy = true,
939}; 914};
940EXPORT_SYMBOL_GPL(blkio_subsys); 915EXPORT_SYMBOL_GPL(blkio_subsys);
941 916