diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-02-28 15:52:24 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-02-28 15:52:24 -0500 |
commit | ee89f81252179dcbf6cd65bd48299f5e52292d88 (patch) | |
tree | 805846cd12821f84cfe619d44c9e3e36e0b0f9e6 /block/blk-cgroup.c | |
parent | 21f3b24da9328415792efc780f50b9f434c12465 (diff) | |
parent | de33127d8d3f1d570aad8c2223cd81b206636bc1 (diff) |
Merge branch 'for-3.9/core' of git://git.kernel.dk/linux-block
Pull block IO core bits from Jens Axboe:
"Below are the core block IO bits for 3.9. It was delayed a few days
since my workstation kept crashing every 2-8h after pulling it into
current -git, but turns out it is a bug in the new pstate code (divide
by zero, will report separately). In any case, it contains:
- The big cfq/blkcg update from Tejun and and Vivek.
- Additional block and writeback tracepoints from Tejun.
- Improvement of the should sort (based on queues) logic in the plug
flushing.
- _io() variants of the wait_for_completion() interface, using
io_schedule() instead of schedule() to contribute to io wait
properly.
- Various little fixes.
You'll get two trivial merge conflicts, which should be easy enough to
fix up"
Fix up the trivial conflicts due to hlist traversal cleanups (commit
b67bfe0d42ca: "hlist: drop the node parameter from iterators").
* 'for-3.9/core' of git://git.kernel.dk/linux-block: (39 commits)
block: remove redundant check to bd_openers()
block: use i_size_write() in bd_set_size()
cfq: fix lock imbalance with failed allocations
drivers/block/swim3.c: fix null pointer dereference
block: don't select PERCPU_RWSEM
block: account iowait time when waiting for completion of IO request
sched: add wait_for_completion_io[_timeout]
writeback: add more tracepoints
block: add block_{touch|dirty}_buffer tracepoint
buffer: make touch_buffer() an exported function
block: add @req to bio_{front|back}_merge tracepoints
block: add missing block_bio_complete() tracepoint
block: Remove should_sort judgement when flush blk_plug
block,elevator: use new hashtable implementation
cfq-iosched: add hierarchical cfq_group statistics
cfq-iosched: collect stats from dead cfqgs
cfq-iosched: separate out cfqg_stats_reset() from cfq_pd_reset_stats()
blkcg: make blkcg_print_blkgs() grab q locks instead of blkcg lock
block: RCU free request_queue
blkcg: implement blkg_[rw]stat_recursive_sum() and blkg_[rw]stat_merge()
...
Diffstat (limited to 'block/blk-cgroup.c')
-rw-r--r-- | block/blk-cgroup.c | 277 |
1 files changed, 236 insertions, 41 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 8bdebb6781e1..b2b9837f9dd3 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c | |||
@@ -26,11 +26,32 @@ | |||
26 | 26 | ||
27 | static DEFINE_MUTEX(blkcg_pol_mutex); | 27 | static DEFINE_MUTEX(blkcg_pol_mutex); |
28 | 28 | ||
29 | struct blkcg blkcg_root = { .cfq_weight = 2 * CFQ_WEIGHT_DEFAULT }; | 29 | struct blkcg blkcg_root = { .cfq_weight = 2 * CFQ_WEIGHT_DEFAULT, |
30 | .cfq_leaf_weight = 2 * CFQ_WEIGHT_DEFAULT, }; | ||
30 | EXPORT_SYMBOL_GPL(blkcg_root); | 31 | EXPORT_SYMBOL_GPL(blkcg_root); |
31 | 32 | ||
32 | static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS]; | 33 | static struct blkcg_policy *blkcg_policy[BLKCG_MAX_POLS]; |
33 | 34 | ||
35 | static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, | ||
36 | struct request_queue *q, bool update_hint); | ||
37 | |||
38 | /** | ||
39 | * blkg_for_each_descendant_pre - pre-order walk of a blkg's descendants | ||
40 | * @d_blkg: loop cursor pointing to the current descendant | ||
41 | * @pos_cgrp: used for iteration | ||
42 | * @p_blkg: target blkg to walk descendants of | ||
43 | * | ||
44 | * Walk @c_blkg through the descendants of @p_blkg. Must be used with RCU | ||
45 | * read locked. If called under either blkcg or queue lock, the iteration | ||
46 | * is guaranteed to include all and only online blkgs. The caller may | ||
47 | * update @pos_cgrp by calling cgroup_rightmost_descendant() to skip | ||
48 | * subtree. | ||
49 | */ | ||
50 | #define blkg_for_each_descendant_pre(d_blkg, pos_cgrp, p_blkg) \ | ||
51 | cgroup_for_each_descendant_pre((pos_cgrp), (p_blkg)->blkcg->css.cgroup) \ | ||
52 | if (((d_blkg) = __blkg_lookup(cgroup_to_blkcg(pos_cgrp), \ | ||
53 | (p_blkg)->q, false))) | ||
54 | |||
34 | static bool blkcg_policy_enabled(struct request_queue *q, | 55 | static bool blkcg_policy_enabled(struct request_queue *q, |
35 | const struct blkcg_policy *pol) | 56 | const struct blkcg_policy *pol) |
36 | { | 57 | { |
@@ -112,9 +133,10 @@ static struct blkcg_gq *blkg_alloc(struct blkcg *blkcg, struct request_queue *q, | |||
112 | 133 | ||
113 | blkg->pd[i] = pd; | 134 | blkg->pd[i] = pd; |
114 | pd->blkg = blkg; | 135 | pd->blkg = blkg; |
136 | pd->plid = i; | ||
115 | 137 | ||
116 | /* invoke per-policy init */ | 138 | /* invoke per-policy init */ |
117 | if (blkcg_policy_enabled(blkg->q, pol)) | 139 | if (pol->pd_init_fn) |
118 | pol->pd_init_fn(blkg); | 140 | pol->pd_init_fn(blkg); |
119 | } | 141 | } |
120 | 142 | ||
@@ -125,8 +147,19 @@ err_free: | |||
125 | return NULL; | 147 | return NULL; |
126 | } | 148 | } |
127 | 149 | ||
150 | /** | ||
151 | * __blkg_lookup - internal version of blkg_lookup() | ||
152 | * @blkcg: blkcg of interest | ||
153 | * @q: request_queue of interest | ||
154 | * @update_hint: whether to update lookup hint with the result or not | ||
155 | * | ||
156 | * This is internal version and shouldn't be used by policy | ||
157 | * implementations. Looks up blkgs for the @blkcg - @q pair regardless of | ||
158 | * @q's bypass state. If @update_hint is %true, the caller should be | ||
159 | * holding @q->queue_lock and lookup hint is updated on success. | ||
160 | */ | ||
128 | static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, | 161 | static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, |
129 | struct request_queue *q) | 162 | struct request_queue *q, bool update_hint) |
130 | { | 163 | { |
131 | struct blkcg_gq *blkg; | 164 | struct blkcg_gq *blkg; |
132 | 165 | ||
@@ -135,14 +168,19 @@ static struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, | |||
135 | return blkg; | 168 | return blkg; |
136 | 169 | ||
137 | /* | 170 | /* |
138 | * Hint didn't match. Look up from the radix tree. Note that we | 171 | * Hint didn't match. Look up from the radix tree. Note that the |
139 | * may not be holding queue_lock and thus are not sure whether | 172 | * hint can only be updated under queue_lock as otherwise @blkg |
140 | * @blkg from blkg_tree has already been removed or not, so we | 173 | * could have already been removed from blkg_tree. The caller is |
141 | * can't update hint to the lookup result. Leave it to the caller. | 174 | * responsible for grabbing queue_lock if @update_hint. |
142 | */ | 175 | */ |
143 | blkg = radix_tree_lookup(&blkcg->blkg_tree, q->id); | 176 | blkg = radix_tree_lookup(&blkcg->blkg_tree, q->id); |
144 | if (blkg && blkg->q == q) | 177 | if (blkg && blkg->q == q) { |
178 | if (update_hint) { | ||
179 | lockdep_assert_held(q->queue_lock); | ||
180 | rcu_assign_pointer(blkcg->blkg_hint, blkg); | ||
181 | } | ||
145 | return blkg; | 182 | return blkg; |
183 | } | ||
146 | 184 | ||
147 | return NULL; | 185 | return NULL; |
148 | } | 186 | } |
@@ -162,7 +200,7 @@ struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q) | |||
162 | 200 | ||
163 | if (unlikely(blk_queue_bypass(q))) | 201 | if (unlikely(blk_queue_bypass(q))) |
164 | return NULL; | 202 | return NULL; |
165 | return __blkg_lookup(blkcg, q); | 203 | return __blkg_lookup(blkcg, q, false); |
166 | } | 204 | } |
167 | EXPORT_SYMBOL_GPL(blkg_lookup); | 205 | EXPORT_SYMBOL_GPL(blkg_lookup); |
168 | 206 | ||
@@ -170,75 +208,129 @@ EXPORT_SYMBOL_GPL(blkg_lookup); | |||
170 | * If @new_blkg is %NULL, this function tries to allocate a new one as | 208 | * If @new_blkg is %NULL, this function tries to allocate a new one as |
171 | * necessary using %GFP_ATOMIC. @new_blkg is always consumed on return. | 209 | * necessary using %GFP_ATOMIC. @new_blkg is always consumed on return. |
172 | */ | 210 | */ |
173 | static struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg, | 211 | static struct blkcg_gq *blkg_create(struct blkcg *blkcg, |
174 | struct request_queue *q, | 212 | struct request_queue *q, |
175 | struct blkcg_gq *new_blkg) | 213 | struct blkcg_gq *new_blkg) |
176 | { | 214 | { |
177 | struct blkcg_gq *blkg; | 215 | struct blkcg_gq *blkg; |
178 | int ret; | 216 | int i, ret; |
179 | 217 | ||
180 | WARN_ON_ONCE(!rcu_read_lock_held()); | 218 | WARN_ON_ONCE(!rcu_read_lock_held()); |
181 | lockdep_assert_held(q->queue_lock); | 219 | lockdep_assert_held(q->queue_lock); |
182 | 220 | ||
183 | /* lookup and update hint on success, see __blkg_lookup() for details */ | ||
184 | blkg = __blkg_lookup(blkcg, q); | ||
185 | if (blkg) { | ||
186 | rcu_assign_pointer(blkcg->blkg_hint, blkg); | ||
187 | goto out_free; | ||
188 | } | ||
189 | |||
190 | /* blkg holds a reference to blkcg */ | 221 | /* blkg holds a reference to blkcg */ |
191 | if (!css_tryget(&blkcg->css)) { | 222 | if (!css_tryget(&blkcg->css)) { |
192 | blkg = ERR_PTR(-EINVAL); | 223 | ret = -EINVAL; |
193 | goto out_free; | 224 | goto err_free_blkg; |
194 | } | 225 | } |
195 | 226 | ||
196 | /* allocate */ | 227 | /* allocate */ |
197 | if (!new_blkg) { | 228 | if (!new_blkg) { |
198 | new_blkg = blkg_alloc(blkcg, q, GFP_ATOMIC); | 229 | new_blkg = blkg_alloc(blkcg, q, GFP_ATOMIC); |
199 | if (unlikely(!new_blkg)) { | 230 | if (unlikely(!new_blkg)) { |
200 | blkg = ERR_PTR(-ENOMEM); | 231 | ret = -ENOMEM; |
201 | goto out_put; | 232 | goto err_put_css; |
202 | } | 233 | } |
203 | } | 234 | } |
204 | blkg = new_blkg; | 235 | blkg = new_blkg; |
205 | 236 | ||
206 | /* insert */ | 237 | /* link parent and insert */ |
238 | if (blkcg_parent(blkcg)) { | ||
239 | blkg->parent = __blkg_lookup(blkcg_parent(blkcg), q, false); | ||
240 | if (WARN_ON_ONCE(!blkg->parent)) { | ||
241 | blkg = ERR_PTR(-EINVAL); | ||
242 | goto err_put_css; | ||
243 | } | ||
244 | blkg_get(blkg->parent); | ||
245 | } | ||
246 | |||
207 | spin_lock(&blkcg->lock); | 247 | spin_lock(&blkcg->lock); |
208 | ret = radix_tree_insert(&blkcg->blkg_tree, q->id, blkg); | 248 | ret = radix_tree_insert(&blkcg->blkg_tree, q->id, blkg); |
209 | if (likely(!ret)) { | 249 | if (likely(!ret)) { |
210 | hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list); | 250 | hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list); |
211 | list_add(&blkg->q_node, &q->blkg_list); | 251 | list_add(&blkg->q_node, &q->blkg_list); |
252 | |||
253 | for (i = 0; i < BLKCG_MAX_POLS; i++) { | ||
254 | struct blkcg_policy *pol = blkcg_policy[i]; | ||
255 | |||
256 | if (blkg->pd[i] && pol->pd_online_fn) | ||
257 | pol->pd_online_fn(blkg); | ||
258 | } | ||
212 | } | 259 | } |
260 | blkg->online = true; | ||
213 | spin_unlock(&blkcg->lock); | 261 | spin_unlock(&blkcg->lock); |
214 | 262 | ||
215 | if (!ret) | 263 | if (!ret) |
216 | return blkg; | 264 | return blkg; |
217 | 265 | ||
218 | blkg = ERR_PTR(ret); | 266 | /* @blkg failed fully initialized, use the usual release path */ |
219 | out_put: | 267 | blkg_put(blkg); |
268 | return ERR_PTR(ret); | ||
269 | |||
270 | err_put_css: | ||
220 | css_put(&blkcg->css); | 271 | css_put(&blkcg->css); |
221 | out_free: | 272 | err_free_blkg: |
222 | blkg_free(new_blkg); | 273 | blkg_free(new_blkg); |
223 | return blkg; | 274 | return ERR_PTR(ret); |
224 | } | 275 | } |
225 | 276 | ||
277 | /** | ||
278 | * blkg_lookup_create - lookup blkg, try to create one if not there | ||
279 | * @blkcg: blkcg of interest | ||
280 | * @q: request_queue of interest | ||
281 | * | ||
282 | * Lookup blkg for the @blkcg - @q pair. If it doesn't exist, try to | ||
283 | * create one. blkg creation is performed recursively from blkcg_root such | ||
284 | * that all non-root blkg's have access to the parent blkg. This function | ||
285 | * should be called under RCU read lock and @q->queue_lock. | ||
286 | * | ||
287 | * Returns pointer to the looked up or created blkg on success, ERR_PTR() | ||
288 | * value on error. If @q is dead, returns ERR_PTR(-EINVAL). If @q is not | ||
289 | * dead and bypassing, returns ERR_PTR(-EBUSY). | ||
290 | */ | ||
226 | struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, | 291 | struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, |
227 | struct request_queue *q) | 292 | struct request_queue *q) |
228 | { | 293 | { |
294 | struct blkcg_gq *blkg; | ||
295 | |||
296 | WARN_ON_ONCE(!rcu_read_lock_held()); | ||
297 | lockdep_assert_held(q->queue_lock); | ||
298 | |||
229 | /* | 299 | /* |
230 | * This could be the first entry point of blkcg implementation and | 300 | * This could be the first entry point of blkcg implementation and |
231 | * we shouldn't allow anything to go through for a bypassing queue. | 301 | * we shouldn't allow anything to go through for a bypassing queue. |
232 | */ | 302 | */ |
233 | if (unlikely(blk_queue_bypass(q))) | 303 | if (unlikely(blk_queue_bypass(q))) |
234 | return ERR_PTR(blk_queue_dying(q) ? -EINVAL : -EBUSY); | 304 | return ERR_PTR(blk_queue_dying(q) ? -EINVAL : -EBUSY); |
235 | return __blkg_lookup_create(blkcg, q, NULL); | 305 | |
306 | blkg = __blkg_lookup(blkcg, q, true); | ||
307 | if (blkg) | ||
308 | return blkg; | ||
309 | |||
310 | /* | ||
311 | * Create blkgs walking down from blkcg_root to @blkcg, so that all | ||
312 | * non-root blkgs have access to their parents. | ||
313 | */ | ||
314 | while (true) { | ||
315 | struct blkcg *pos = blkcg; | ||
316 | struct blkcg *parent = blkcg_parent(blkcg); | ||
317 | |||
318 | while (parent && !__blkg_lookup(parent, q, false)) { | ||
319 | pos = parent; | ||
320 | parent = blkcg_parent(parent); | ||
321 | } | ||
322 | |||
323 | blkg = blkg_create(pos, q, NULL); | ||
324 | if (pos == blkcg || IS_ERR(blkg)) | ||
325 | return blkg; | ||
326 | } | ||
236 | } | 327 | } |
237 | EXPORT_SYMBOL_GPL(blkg_lookup_create); | 328 | EXPORT_SYMBOL_GPL(blkg_lookup_create); |
238 | 329 | ||
239 | static void blkg_destroy(struct blkcg_gq *blkg) | 330 | static void blkg_destroy(struct blkcg_gq *blkg) |
240 | { | 331 | { |
241 | struct blkcg *blkcg = blkg->blkcg; | 332 | struct blkcg *blkcg = blkg->blkcg; |
333 | int i; | ||
242 | 334 | ||
243 | lockdep_assert_held(blkg->q->queue_lock); | 335 | lockdep_assert_held(blkg->q->queue_lock); |
244 | lockdep_assert_held(&blkcg->lock); | 336 | lockdep_assert_held(&blkcg->lock); |
@@ -247,6 +339,14 @@ static void blkg_destroy(struct blkcg_gq *blkg) | |||
247 | WARN_ON_ONCE(list_empty(&blkg->q_node)); | 339 | WARN_ON_ONCE(list_empty(&blkg->q_node)); |
248 | WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node)); | 340 | WARN_ON_ONCE(hlist_unhashed(&blkg->blkcg_node)); |
249 | 341 | ||
342 | for (i = 0; i < BLKCG_MAX_POLS; i++) { | ||
343 | struct blkcg_policy *pol = blkcg_policy[i]; | ||
344 | |||
345 | if (blkg->pd[i] && pol->pd_offline_fn) | ||
346 | pol->pd_offline_fn(blkg); | ||
347 | } | ||
348 | blkg->online = false; | ||
349 | |||
250 | radix_tree_delete(&blkcg->blkg_tree, blkg->q->id); | 350 | radix_tree_delete(&blkcg->blkg_tree, blkg->q->id); |
251 | list_del_init(&blkg->q_node); | 351 | list_del_init(&blkg->q_node); |
252 | hlist_del_init_rcu(&blkg->blkcg_node); | 352 | hlist_del_init_rcu(&blkg->blkcg_node); |
@@ -301,8 +401,10 @@ static void blkg_rcu_free(struct rcu_head *rcu_head) | |||
301 | 401 | ||
302 | void __blkg_release(struct blkcg_gq *blkg) | 402 | void __blkg_release(struct blkcg_gq *blkg) |
303 | { | 403 | { |
304 | /* release the extra blkcg reference this blkg has been holding */ | 404 | /* release the blkcg and parent blkg refs this blkg has been holding */ |
305 | css_put(&blkg->blkcg->css); | 405 | css_put(&blkg->blkcg->css); |
406 | if (blkg->parent) | ||
407 | blkg_put(blkg->parent); | ||
306 | 408 | ||
307 | /* | 409 | /* |
308 | * A group is freed in rcu manner. But having an rcu lock does not | 410 | * A group is freed in rcu manner. But having an rcu lock does not |
@@ -401,8 +503,9 @@ static const char *blkg_dev_name(struct blkcg_gq *blkg) | |||
401 | * | 503 | * |
402 | * This function invokes @prfill on each blkg of @blkcg if pd for the | 504 | * This function invokes @prfill on each blkg of @blkcg if pd for the |
403 | * policy specified by @pol exists. @prfill is invoked with @sf, the | 505 | * policy specified by @pol exists. @prfill is invoked with @sf, the |
404 | * policy data and @data. If @show_total is %true, the sum of the return | 506 | * policy data and @data and the matching queue lock held. If @show_total |
405 | * values from @prfill is printed with "Total" label at the end. | 507 | * is %true, the sum of the return values from @prfill is printed with |
508 | * "Total" label at the end. | ||
406 | * | 509 | * |
407 | * This is to be used to construct print functions for | 510 | * This is to be used to construct print functions for |
408 | * cftype->read_seq_string method. | 511 | * cftype->read_seq_string method. |
@@ -416,11 +519,14 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, | |||
416 | struct blkcg_gq *blkg; | 519 | struct blkcg_gq *blkg; |
417 | u64 total = 0; | 520 | u64 total = 0; |
418 | 521 | ||
419 | spin_lock_irq(&blkcg->lock); | 522 | rcu_read_lock(); |
420 | hlist_for_each_entry(blkg, &blkcg->blkg_list, blkcg_node) | 523 | hlist_for_each_entry_rcu(blkg, &blkcg->blkg_list, blkcg_node) { |
524 | spin_lock_irq(blkg->q->queue_lock); | ||
421 | if (blkcg_policy_enabled(blkg->q, pol)) | 525 | if (blkcg_policy_enabled(blkg->q, pol)) |
422 | total += prfill(sf, blkg->pd[pol->plid], data); | 526 | total += prfill(sf, blkg->pd[pol->plid], data); |
423 | spin_unlock_irq(&blkcg->lock); | 527 | spin_unlock_irq(blkg->q->queue_lock); |
528 | } | ||
529 | rcu_read_unlock(); | ||
424 | 530 | ||
425 | if (show_total) | 531 | if (show_total) |
426 | seq_printf(sf, "Total %llu\n", (unsigned long long)total); | 532 | seq_printf(sf, "Total %llu\n", (unsigned long long)total); |
@@ -479,6 +585,7 @@ u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, | |||
479 | seq_printf(sf, "%s Total %llu\n", dname, (unsigned long long)v); | 585 | seq_printf(sf, "%s Total %llu\n", dname, (unsigned long long)v); |
480 | return v; | 586 | return v; |
481 | } | 587 | } |
588 | EXPORT_SYMBOL_GPL(__blkg_prfill_rwstat); | ||
482 | 589 | ||
483 | /** | 590 | /** |
484 | * blkg_prfill_stat - prfill callback for blkg_stat | 591 | * blkg_prfill_stat - prfill callback for blkg_stat |
@@ -512,6 +619,82 @@ u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, | |||
512 | EXPORT_SYMBOL_GPL(blkg_prfill_rwstat); | 619 | EXPORT_SYMBOL_GPL(blkg_prfill_rwstat); |
513 | 620 | ||
514 | /** | 621 | /** |
622 | * blkg_stat_recursive_sum - collect hierarchical blkg_stat | ||
623 | * @pd: policy private data of interest | ||
624 | * @off: offset to the blkg_stat in @pd | ||
625 | * | ||
626 | * Collect the blkg_stat specified by @off from @pd and all its online | ||
627 | * descendants and return the sum. The caller must be holding the queue | ||
628 | * lock for online tests. | ||
629 | */ | ||
630 | u64 blkg_stat_recursive_sum(struct blkg_policy_data *pd, int off) | ||
631 | { | ||
632 | struct blkcg_policy *pol = blkcg_policy[pd->plid]; | ||
633 | struct blkcg_gq *pos_blkg; | ||
634 | struct cgroup *pos_cgrp; | ||
635 | u64 sum; | ||
636 | |||
637 | lockdep_assert_held(pd->blkg->q->queue_lock); | ||
638 | |||
639 | sum = blkg_stat_read((void *)pd + off); | ||
640 | |||
641 | rcu_read_lock(); | ||
642 | blkg_for_each_descendant_pre(pos_blkg, pos_cgrp, pd_to_blkg(pd)) { | ||
643 | struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol); | ||
644 | struct blkg_stat *stat = (void *)pos_pd + off; | ||
645 | |||
646 | if (pos_blkg->online) | ||
647 | sum += blkg_stat_read(stat); | ||
648 | } | ||
649 | rcu_read_unlock(); | ||
650 | |||
651 | return sum; | ||
652 | } | ||
653 | EXPORT_SYMBOL_GPL(blkg_stat_recursive_sum); | ||
654 | |||
655 | /** | ||
656 | * blkg_rwstat_recursive_sum - collect hierarchical blkg_rwstat | ||
657 | * @pd: policy private data of interest | ||
658 | * @off: offset to the blkg_stat in @pd | ||
659 | * | ||
660 | * Collect the blkg_rwstat specified by @off from @pd and all its online | ||
661 | * descendants and return the sum. The caller must be holding the queue | ||
662 | * lock for online tests. | ||
663 | */ | ||
664 | struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkg_policy_data *pd, | ||
665 | int off) | ||
666 | { | ||
667 | struct blkcg_policy *pol = blkcg_policy[pd->plid]; | ||
668 | struct blkcg_gq *pos_blkg; | ||
669 | struct cgroup *pos_cgrp; | ||
670 | struct blkg_rwstat sum; | ||
671 | int i; | ||
672 | |||
673 | lockdep_assert_held(pd->blkg->q->queue_lock); | ||
674 | |||
675 | sum = blkg_rwstat_read((void *)pd + off); | ||
676 | |||
677 | rcu_read_lock(); | ||
678 | blkg_for_each_descendant_pre(pos_blkg, pos_cgrp, pd_to_blkg(pd)) { | ||
679 | struct blkg_policy_data *pos_pd = blkg_to_pd(pos_blkg, pol); | ||
680 | struct blkg_rwstat *rwstat = (void *)pos_pd + off; | ||
681 | struct blkg_rwstat tmp; | ||
682 | |||
683 | if (!pos_blkg->online) | ||
684 | continue; | ||
685 | |||
686 | tmp = blkg_rwstat_read(rwstat); | ||
687 | |||
688 | for (i = 0; i < BLKG_RWSTAT_NR; i++) | ||
689 | sum.cnt[i] += tmp.cnt[i]; | ||
690 | } | ||
691 | rcu_read_unlock(); | ||
692 | |||
693 | return sum; | ||
694 | } | ||
695 | EXPORT_SYMBOL_GPL(blkg_rwstat_recursive_sum); | ||
696 | |||
697 | /** | ||
515 | * blkg_conf_prep - parse and prepare for per-blkg config update | 698 | * blkg_conf_prep - parse and prepare for per-blkg config update |
516 | * @blkcg: target block cgroup | 699 | * @blkcg: target block cgroup |
517 | * @pol: target policy | 700 | * @pol: target policy |
@@ -656,6 +839,7 @@ static struct cgroup_subsys_state *blkcg_css_alloc(struct cgroup *cgroup) | |||
656 | return ERR_PTR(-ENOMEM); | 839 | return ERR_PTR(-ENOMEM); |
657 | 840 | ||
658 | blkcg->cfq_weight = CFQ_WEIGHT_DEFAULT; | 841 | blkcg->cfq_weight = CFQ_WEIGHT_DEFAULT; |
842 | blkcg->cfq_leaf_weight = CFQ_WEIGHT_DEFAULT; | ||
659 | blkcg->id = atomic64_inc_return(&id_seq); /* root is 0, start from 1 */ | 843 | blkcg->id = atomic64_inc_return(&id_seq); /* root is 0, start from 1 */ |
660 | done: | 844 | done: |
661 | spin_lock_init(&blkcg->lock); | 845 | spin_lock_init(&blkcg->lock); |
@@ -775,7 +959,7 @@ int blkcg_activate_policy(struct request_queue *q, | |||
775 | const struct blkcg_policy *pol) | 959 | const struct blkcg_policy *pol) |
776 | { | 960 | { |
777 | LIST_HEAD(pds); | 961 | LIST_HEAD(pds); |
778 | struct blkcg_gq *blkg; | 962 | struct blkcg_gq *blkg, *new_blkg; |
779 | struct blkg_policy_data *pd, *n; | 963 | struct blkg_policy_data *pd, *n; |
780 | int cnt = 0, ret; | 964 | int cnt = 0, ret; |
781 | bool preloaded; | 965 | bool preloaded; |
@@ -784,19 +968,27 @@ int blkcg_activate_policy(struct request_queue *q, | |||
784 | return 0; | 968 | return 0; |
785 | 969 | ||
786 | /* preallocations for root blkg */ | 970 | /* preallocations for root blkg */ |
787 | blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL); | 971 | new_blkg = blkg_alloc(&blkcg_root, q, GFP_KERNEL); |
788 | if (!blkg) | 972 | if (!new_blkg) |
789 | return -ENOMEM; | 973 | return -ENOMEM; |
790 | 974 | ||
791 | preloaded = !radix_tree_preload(GFP_KERNEL); | 975 | preloaded = !radix_tree_preload(GFP_KERNEL); |
792 | 976 | ||
793 | blk_queue_bypass_start(q); | 977 | blk_queue_bypass_start(q); |
794 | 978 | ||
795 | /* make sure the root blkg exists and count the existing blkgs */ | 979 | /* |
980 | * Make sure the root blkg exists and count the existing blkgs. As | ||
981 | * @q is bypassing at this point, blkg_lookup_create() can't be | ||
982 | * used. Open code it. | ||
983 | */ | ||
796 | spin_lock_irq(q->queue_lock); | 984 | spin_lock_irq(q->queue_lock); |
797 | 985 | ||
798 | rcu_read_lock(); | 986 | rcu_read_lock(); |
799 | blkg = __blkg_lookup_create(&blkcg_root, q, blkg); | 987 | blkg = __blkg_lookup(&blkcg_root, q, false); |
988 | if (blkg) | ||
989 | blkg_free(new_blkg); | ||
990 | else | ||
991 | blkg = blkg_create(&blkcg_root, q, new_blkg); | ||
800 | rcu_read_unlock(); | 992 | rcu_read_unlock(); |
801 | 993 | ||
802 | if (preloaded) | 994 | if (preloaded) |
@@ -844,6 +1036,7 @@ int blkcg_activate_policy(struct request_queue *q, | |||
844 | 1036 | ||
845 | blkg->pd[pol->plid] = pd; | 1037 | blkg->pd[pol->plid] = pd; |
846 | pd->blkg = blkg; | 1038 | pd->blkg = blkg; |
1039 | pd->plid = pol->plid; | ||
847 | pol->pd_init_fn(blkg); | 1040 | pol->pd_init_fn(blkg); |
848 | 1041 | ||
849 | spin_unlock(&blkg->blkcg->lock); | 1042 | spin_unlock(&blkg->blkcg->lock); |
@@ -890,6 +1083,8 @@ void blkcg_deactivate_policy(struct request_queue *q, | |||
890 | /* grab blkcg lock too while removing @pd from @blkg */ | 1083 | /* grab blkcg lock too while removing @pd from @blkg */ |
891 | spin_lock(&blkg->blkcg->lock); | 1084 | spin_lock(&blkg->blkcg->lock); |
892 | 1085 | ||
1086 | if (pol->pd_offline_fn) | ||
1087 | pol->pd_offline_fn(blkg); | ||
893 | if (pol->pd_exit_fn) | 1088 | if (pol->pd_exit_fn) |
894 | pol->pd_exit_fn(blkg); | 1089 | pol->pd_exit_fn(blkg); |
895 | 1090 | ||