diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-07-11 16:03:24 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-07-11 16:03:24 -0400 |
commit | 36805aaea5ae3cf1bb32f1643e0a800bb69f0d5b (patch) | |
tree | 5565132549a0733772b3a2ac6b5cda516ea8cdce /block/blk-cgroup.c | |
parent | 6d2fa9e141ea56a571ec842fd4f3a86bea44a203 (diff) | |
parent | d50235b7bc3ee0a0427984d763ea7534149531b4 (diff) |
Merge branch 'for-3.11/core' of git://git.kernel.dk/linux-block
Pull core block IO updates from Jens Axboe:
"Here are the core IO block bits for 3.11. It contains:
- A tweak to the reserved tag logic from Jan, for weirdo devices with
just 3 free tags. But for those it improves things substantially
for random writes.
- Periodic writeback fix from Jan. Marked for stable as well.
- Fix for a race condition in IO scheduler switching from Jianpeng.
- The hierarchical blk-cgroup support from Tejun. This is the grunt
of the series.
- blk-throttle fix from Vivek.
Just a note that I'm in the middle of a relocation, whole family is
flying out tomorrow. Hence I will be awal the remainder of this week,
but back at work again on Monday the 15th. CC'ing Tejun, since any
potential "surprises" will most likely be from the blk-cgroup work.
But it's been brewing for a while and sitting in my tree and
linux-next for a long time, so should be solid."
* 'for-3.11/core' of git://git.kernel.dk/linux-block: (36 commits)
elevator: Fix a race in elevator switching
block: Reserve only one queue tag for sync IO if only 3 tags are available
writeback: Fix periodic writeback after fs mount
blk-throttle: implement proper hierarchy support
blk-throttle: implement throtl_grp->has_rules[]
blk-throttle: Account for child group's start time in parent while bio climbs up
blk-throttle: add throtl_qnode for dispatch fairness
blk-throttle: make throtl_pending_timer_fn() ready for hierarchy
blk-throttle: make tg_dispatch_one_bio() ready for hierarchy
blk-throttle: make blk_throtl_bio() ready for hierarchy
blk-throttle: make blk_throtl_drain() ready for hierarchy
blk-throttle: dispatch from throtl_pending_timer_fn()
blk-throttle: implement dispatch looping
blk-throttle: separate out throtl_service_queue->pending_timer from throtl_data->dispatch_work
blk-throttle: set REQ_THROTTLED from throtl_charge_bio() and gate stats update with it
blk-throttle: implement sq_to_tg(), sq_to_td() and throtl_log()
blk-throttle: add throtl_service_queue->parent_sq
blk-throttle: generalize update_disptime optimization in blk_throtl_bio()
blk-throttle: dispatch to throtl_data->service_queue.bio_lists[]
blk-throttle: move bio_lists[] and friends to throtl_service_queue
...
Diffstat (limited to 'block/blk-cgroup.c')
-rw-r--r-- | block/blk-cgroup.c | 105 |
1 files changed, 40 insertions, 65 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index e8918ffaf96d..290792a13e3c 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c | |||
@@ -32,26 +32,6 @@ EXPORT_SYMBOL_GPL(blkcg_root); | |||
32 | 32 | ||
33 | static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS]; | 33 | static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS]; |
34 | 34 | ||
35 | static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, | ||
36 | struct request_queue *q, bool update_hint); | ||
37 | |||
38 | /** | ||
39 | * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants | ||
40 | * @d_blkg: loop cursor pointing to the current descendant | ||
41 | * @pos_cgrp: used for iteration | ||
42 | * @p_blkg: target blkg to walk descendants of | ||
43 | * | ||
44 | * Walk @c_blkg through the descendants of @p_blkg. Must be used with RCU | ||
45 | * read locked. If called under either blkcg or queue lock, the iteration | ||
46 | * is guaranteed to include all and only online blkgs. The caller may | ||
47 | * update @pos_cgrp by calling cgroup_rightmost_descendant() to skip | ||
48 | * subtree. | ||
49 | */ | ||
50 | #define blkg_for_each_descendant_pre(d_blkg, pos_cgrp, p_blkg) \ | ||
51 | cgroup_for_each_descendant_pre((pos_cgrp), (p_blkg)->blkcg->css.cgroup) \ | ||
52 | if (((d_blkg) = __blkg_lookup(cgroup_to_blkcg(pos_cgrp), \ | ||
53 | (p_blkg)->q, false))) | ||
54 | |||
55 | static bool blkcg_policy_enabled(struct request_queue *q, | 35 | static bool blkcg_policy_enabled(struct request_queue *q, |
56 | const struct blkcg_policy *pol) | 36 | const struct blkcg_policy *pol) |
57 | { | 37 | { |
@@ -71,18 +51,8 @@ static void blkg_free(struct blkcg_gq *blkg) | |||
71 | if (!blkg) | 51 | if (!blkg) |
72 | return; | 52 | return; |
73 | 53 | ||
74 | for (i = 0; i < BLKCG_MAX_POLS; i++) { | 54 | for (i = 0; i < BLKCG_MAX_POLS; i++) |
75 | struct blkcg_policy *pol = blkcg_policy[i]; | 55 | kfree(blkg->pd[i]); |
76 | struct blkg_policy_data *pd = blkg->pd[i]; | ||
77 | |||
78 | if (!pd) | ||
79 | continue; | ||
80 | |||
81 | if (pol && pol->pd_exit_fn) | ||
82 | pol->pd_exit_fn(blkg); | ||
83 | |||
84 | kfree(pd); | ||
85 | } | ||
86 | 56 | ||
87 | blk_exit_rl(&blkg->rl); | 57 | blk_exit_rl(&blkg->rl); |
88 | kfree(blkg); | 58 | kfree(blkg); |
@@ -134,10 +104,6 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q, | |||
134 | blkg->pd[i] = pd; | 104 | blkg->pd[i] = pd; |
135 | pd->blkg = blkg; | 105 | pd->blkg = blkg; |
136 | pd->plid = i; | 106 | pd->plid = i; |
137 | |||
138 | /* invoke per-policy init */ | ||
139 | if (pol->pd_init_fn) | ||
140 | pol->pd_init_fn(blkg); | ||
141 | } | 107 | } |
142 | 108 | ||
143 | return blkg; | 109 | return blkg; |
@@ -158,8 +124,8 @@ err_free: | |||
158 | * @q's bypass state. If @update_hint is %true, the caller should be | 124 | * @q's bypass state. If @update_hint is %true, the caller should be |
159 | * holding @q->queue_lock and lookup hint is updated on success. | 125 | * holding @q->queue_lock and lookup hint is updated on success. |
160 | */ | 126 | */ |
161 | static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, | 127 | struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, struct request_queue *q, |
162 | struct request_queue *q, bool update_hint) | 128 | bool update_hint) |
163 | { | 129 | { |
164 | struct blkcg_gq *blkg; | 130 | struct blkcg_gq *blkg; |
165 | 131 | ||
@@ -234,16 +200,25 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg, | |||
234 | } | 200 | } |
235 | blkg = new_blkg; | 201 | blkg = new_blkg; |
236 | 202 | ||
237 | /* link parent and insert */ | 203 | /* link parent */ |
238 | if (blkcg_parent(blkcg)) { | 204 | if (blkcg_parent(blkcg)) { |
239 | blkg->parent = __blkg_lookup(blkcg_parent(blkcg), q, false); | 205 | blkg->parent = __blkg_lookup(blkcg_parent(blkcg), q, false); |
240 | if (WARN_ON_ONCE(!blkg->parent)) { | 206 | if (WARN_ON_ONCE(!blkg->parent)) { |
241 | blkg = ERR_PTR(-EINVAL); | 207 | ret = -EINVAL; |
242 | goto err_put_css; | 208 | goto err_put_css; |
243 | } | 209 | } |
244 | blkg_get(blkg->parent); | 210 | blkg_get(blkg->parent); |
245 | } | 211 | } |
246 | 212 | ||
213 | /* invoke per-policy init */ | ||
214 | for (i = 0; i < BLKCG_MAX_POLS; i++) { | ||
215 | struct blkcg_policy *pol = blkcg_policy[i]; | ||
216 | |||
217 | if (blkg->pd[i] && pol->pd_init_fn) | ||
218 | pol->pd_init_fn(blkg); | ||
219 | } | ||
220 | |||
221 | /* insert */ | ||
247 | spin_lock(&blkcg->lock); | 222 | spin_lock(&blkcg->lock); |
248 | ret = radix_tree_insert(&blkcg->blkg_tree, q->id, blkg); | 223 | ret = radix_tree_insert(&blkcg->blkg_tree, q->id, blkg); |
249 | if (likely(!ret)) { | 224 | if (likely(!ret)) { |
@@ -394,30 +369,38 @@ static void blkg_destroy_all(struct request_queue *q) | |||
394 | q->root_rl.blkg = NULL; | 369 | q->root_rl.blkg = NULL; |
395 | } | 370 | } |
396 | 371 | ||
397 | static void blkg_rcu_free(struct rcu_head *rcu_head) | 372 | /* |
373 | * A group is RCU protected, but having an rcu lock does not mean that one | ||
374 | * can access all the fields of blkg and assume these are valid. For | ||
375 | * example, don't try to follow throtl_data and request queue links. | ||
376 | * | ||
377 | * Having a reference to blkg under an rcu allows accesses to only values | ||
378 | * local to groups like group stats and group rate limits. | ||
379 | */ | ||
380 | void __blkg_release_rcu(struct rcu_head *rcu_head) | ||
398 | { | 381 | { |
399 | blkg_free(container_of(rcu_head, struct blkcg_gq, rcu_head)); | 382 | struct blkcg_gq *blkg = container_of(rcu_head, struct blkcg_gq, rcu_head); |
400 | } | 383 | int i; |
384 | |||
385 | /* tell policies that this one is being freed */ | ||
386 | for (i = 0; i < BLKCG_MAX_POLS; i++) { | ||
387 | struct blkcg_policy *pol = blkcg_policy[i]; | ||
388 | |||
389 | if (blkg->pd[i] && pol->pd_exit_fn) | ||
390 | pol->pd_exit_fn(blkg); | ||
391 | } | ||
401 | 392 | ||
402 | void __blkg_release(struct blkcg_gq *blkg) | ||
403 | { | ||
404 | /* release the blkcg and parent blkg refs this blkg has been holding */ | 393 | /* release the blkcg and parent blkg refs this blkg has been holding */ |
405 | css_put(&blkg->blkcg->css); | 394 | css_put(&blkg->blkcg->css); |
406 | if (blkg->parent) | 395 | if (blkg->parent) { |
396 | spin_lock_irq(blkg->q->queue_lock); | ||
407 | blkg_put(blkg->parent); | 397 | blkg_put(blkg->parent); |
398 | spin_unlock_irq(blkg->q->queue_lock); | ||
399 | } | ||
408 | 400 | ||
409 | /* | 401 | blkg_free(blkg); |
410 | * A group is freed in rcu manner. But having an rcu lock does not | ||
411 | * mean that one can access all the fields of blkg and assume these | ||
412 | * are valid. For example, don't try to follow throtl_data and | ||
413 | * request queue links. | ||
414 | * | ||
415 | * Having a reference to blkg under an rcu allows acess to only | ||
416 | * values local to groups like group stats and group rate limits | ||
417 | */ | ||
418 | call_rcu(&blkg->rcu_head, blkg_rcu_free); | ||
419 | } | 402 | } |
420 | EXPORT_SYMBOL_GPL(__blkg_release); | 403 | EXPORT_SYMBOL_GPL(__blkg_release_rcu); |
421 | 404 | ||
422 | /* | 405 | /* |
423 | * The next function used by blk_queue_for_each_rl(). It's a bit tricky | 406 | * The next function used by blk_queue_for_each_rl(). It's a bit tricky |
@@ -928,14 +911,6 @@ struct cgroup_subsys blkio_subsys = { | |||
928 | .subsys_id = blkio_subsys_id, | 911 | .subsys_id = blkio_subsys_id, |
929 | .base_cftypes = blkcg_files, | 912 | .base_cftypes = blkcg_files, |
930 | .module = THIS_MODULE, | 913 | .module = THIS_MODULE, |
931 | |||
932 | /* | ||
933 | * blkio subsystem is utterly broken in terms of hierarchy support. | ||
934 | * It treats all cgroups equally regardless of where they're | ||
935 | * located in the hierarchy - all cgroups are treated as if they're | ||
936 | * right below the root. Fix it and remove the following. | ||
937 | */ | ||
938 | .broken_hierarchy = true, | ||
939 | }; | 914 | }; |
940 | EXPORT_SYMBOL_GPL(blkio_subsys); | 915 | EXPORT_SYMBOL_GPL(blkio_subsys); |
941 | 916 | ||