diff options
author | Tejun Heo <tj@kernel.org> | 2012-03-05 16:15:06 -0500 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2012-03-06 15:27:22 -0500 |
commit | cd1604fab4f95f7cfc227d3955fd7ae14da61f38 (patch) | |
tree | 021881faedc1c2468730f9f54d364083e70dce76 /block | |
parent | f51b802c17e2a21926b29911493f5e7ddf6eee87 (diff) |
blkcg: factor out blkio_group creation
Currently both blk-throttle and cfq-iosched implement their own
blkio_group creation code in throtl_get_tg() and cfq_get_cfqg(). This
patch factors out the common code into blkg_lookup_create(), which
returns ERR_PTR value so that transitional failures due to queue
bypass can be distinguished from other failures.
* New plkio_policy_ops methods blkio_alloc_group_fn() and
blkio_link_group_fn added. Both are transitional and will be
removed once the blkg management code is fully moved into
blk-cgroup.c.
* blkio_alloc_group_fn() allocates policy-specific blkg which is
usually a larger data structure with blkg as the first entry and
intiailizes it. Note that initialization of blkg proper, including
percpu stats, is responsibility of blk-cgroup proper.
Note that default config (weight, bps...) initialization is done
from this method; otherwise, we end up violating locking order
between blkcg and q locks via blkcg_get_CONF() functions.
* blkio_link_group_fn() is called under queue_lock and responsible for
linking the blkg to the queue. blkcg side is handled by blk-cgroup
proper.
* The common blkg creation function is named blkg_lookup_create() and
blkiocg_lookup_group() is renamed to blkg_lookup() for consistency.
Also, throtl / cfq related functions are similarly [re]named for
consistency.
This simplifies blkcg policy implementations and enables further
cleanup.
-v2: Vivek noticed that blkg_lookup_create() incorrectly tested
blk_queue_dead() instead of blk_queue_bypass() leading a user of
the function ending up creating a new blkg on bypassing queue.
This is a bug introduced while relocating bypass patches before
this one. Fixed.
-v3: ERR_PTR patch folded into this one. @for_root added to
blkg_lookup_create() to allow creating root group on a bypassed
queue during elevator switch.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block')
-rw-r--r-- | block/blk-cgroup.c | 117 | ||||
-rw-r--r-- | block/blk-cgroup.h | 30 | ||||
-rw-r--r-- | block/blk-throttle.c | 155 | ||||
-rw-r--r-- | block/cfq-iosched.c | 131 | ||||
-rw-r--r-- | block/cfq.h | 8 |
5 files changed, 193 insertions, 248 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index f1b08d3cba5..bc989149631 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c | |||
@@ -465,38 +465,93 @@ void blkiocg_update_io_merged_stats(struct blkio_group *blkg, bool direction, | |||
465 | } | 465 | } |
466 | EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats); | 466 | EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats); |
467 | 467 | ||
468 | /* | 468 | struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg, |
469 | * This function allocates the per cpu stats for blkio_group. Should be called | 469 | struct request_queue *q, |
470 | * from sleepable context as alloc_per_cpu() requires that. | 470 | enum blkio_policy_id plid, |
471 | */ | 471 | bool for_root) |
472 | int blkio_alloc_blkg_stats(struct blkio_group *blkg) | 472 | __releases(q->queue_lock) __acquires(q->queue_lock) |
473 | { | 473 | { |
474 | /* Allocate memory for per cpu stats */ | 474 | struct blkio_policy_type *pol = blkio_policy[plid]; |
475 | blkg->stats_cpu = alloc_percpu(struct blkio_group_stats_cpu); | 475 | struct blkio_group *blkg, *new_blkg; |
476 | if (!blkg->stats_cpu) | ||
477 | return -ENOMEM; | ||
478 | return 0; | ||
479 | } | ||
480 | EXPORT_SYMBOL_GPL(blkio_alloc_blkg_stats); | ||
481 | 476 | ||
482 | void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, | 477 | WARN_ON_ONCE(!rcu_read_lock_held()); |
483 | struct blkio_group *blkg, struct request_queue *q, dev_t dev, | 478 | lockdep_assert_held(q->queue_lock); |
484 | enum blkio_policy_id plid) | ||
485 | { | ||
486 | unsigned long flags; | ||
487 | 479 | ||
488 | spin_lock_irqsave(&blkcg->lock, flags); | 480 | /* |
489 | spin_lock_init(&blkg->stats_lock); | 481 | * This could be the first entry point of blkcg implementation and |
490 | rcu_assign_pointer(blkg->q, q); | 482 | * we shouldn't allow anything to go through for a bypassing queue. |
491 | blkg->blkcg_id = css_id(&blkcg->css); | 483 | * The following can be removed if blkg lookup is guaranteed to |
484 | * fail on a bypassing queue. | ||
485 | */ | ||
486 | if (unlikely(blk_queue_bypass(q)) && !for_root) | ||
487 | return ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY); | ||
488 | |||
489 | blkg = blkg_lookup(blkcg, q, plid); | ||
490 | if (blkg) | ||
491 | return blkg; | ||
492 | |||
493 | if (!css_tryget(&blkcg->css)) | ||
494 | return ERR_PTR(-EINVAL); | ||
495 | |||
496 | /* | ||
497 | * Allocate and initialize. | ||
498 | * | ||
499 | * FIXME: The following is broken. Percpu memory allocation | ||
500 | * requires %GFP_KERNEL context and can't be performed from IO | ||
501 | * path. Allocation here should inherently be atomic and the | ||
502 | * following lock dancing can be removed once the broken percpu | ||
503 | * allocation is fixed. | ||
504 | */ | ||
505 | spin_unlock_irq(q->queue_lock); | ||
506 | rcu_read_unlock(); | ||
507 | |||
508 | new_blkg = pol->ops.blkio_alloc_group_fn(q, blkcg); | ||
509 | if (new_blkg) { | ||
510 | new_blkg->stats_cpu = alloc_percpu(struct blkio_group_stats_cpu); | ||
511 | |||
512 | spin_lock_init(&new_blkg->stats_lock); | ||
513 | rcu_assign_pointer(new_blkg->q, q); | ||
514 | new_blkg->blkcg_id = css_id(&blkcg->css); | ||
515 | new_blkg->plid = plid; | ||
516 | cgroup_path(blkcg->css.cgroup, new_blkg->path, | ||
517 | sizeof(new_blkg->path)); | ||
518 | } | ||
519 | |||
520 | rcu_read_lock(); | ||
521 | spin_lock_irq(q->queue_lock); | ||
522 | css_put(&blkcg->css); | ||
523 | |||
524 | /* did bypass get turned on inbetween? */ | ||
525 | if (unlikely(blk_queue_bypass(q)) && !for_root) { | ||
526 | blkg = ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY); | ||
527 | goto out; | ||
528 | } | ||
529 | |||
530 | /* did someone beat us to it? */ | ||
531 | blkg = blkg_lookup(blkcg, q, plid); | ||
532 | if (unlikely(blkg)) | ||
533 | goto out; | ||
534 | |||
535 | /* did alloc fail? */ | ||
536 | if (unlikely(!new_blkg || !new_blkg->stats_cpu)) { | ||
537 | blkg = ERR_PTR(-ENOMEM); | ||
538 | goto out; | ||
539 | } | ||
540 | |||
541 | /* insert */ | ||
542 | spin_lock(&blkcg->lock); | ||
543 | swap(blkg, new_blkg); | ||
492 | hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list); | 544 | hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list); |
493 | blkg->plid = plid; | 545 | pol->ops.blkio_link_group_fn(q, blkg); |
494 | spin_unlock_irqrestore(&blkcg->lock, flags); | 546 | spin_unlock(&blkcg->lock); |
495 | /* Need to take css reference ? */ | 547 | out: |
496 | cgroup_path(blkcg->css.cgroup, blkg->path, sizeof(blkg->path)); | 548 | if (new_blkg) { |
497 | blkg->dev = dev; | 549 | free_percpu(new_blkg->stats_cpu); |
550 | kfree(new_blkg); | ||
551 | } | ||
552 | return blkg; | ||
498 | } | 553 | } |
499 | EXPORT_SYMBOL_GPL(blkiocg_add_blkio_group); | 554 | EXPORT_SYMBOL_GPL(blkg_lookup_create); |
500 | 555 | ||
501 | static void __blkiocg_del_blkio_group(struct blkio_group *blkg) | 556 | static void __blkiocg_del_blkio_group(struct blkio_group *blkg) |
502 | { | 557 | { |
@@ -533,9 +588,9 @@ int blkiocg_del_blkio_group(struct blkio_group *blkg) | |||
533 | EXPORT_SYMBOL_GPL(blkiocg_del_blkio_group); | 588 | EXPORT_SYMBOL_GPL(blkiocg_del_blkio_group); |
534 | 589 | ||
535 | /* called under rcu_read_lock(). */ | 590 | /* called under rcu_read_lock(). */ |
536 | struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg, | 591 | struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg, |
537 | struct request_queue *q, | 592 | struct request_queue *q, |
538 | enum blkio_policy_id plid) | 593 | enum blkio_policy_id plid) |
539 | { | 594 | { |
540 | struct blkio_group *blkg; | 595 | struct blkio_group *blkg; |
541 | struct hlist_node *n; | 596 | struct hlist_node *n; |
@@ -545,7 +600,7 @@ struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg, | |||
545 | return blkg; | 600 | return blkg; |
546 | return NULL; | 601 | return NULL; |
547 | } | 602 | } |
548 | EXPORT_SYMBOL_GPL(blkiocg_lookup_group); | 603 | EXPORT_SYMBOL_GPL(blkg_lookup); |
549 | 604 | ||
550 | void blkg_destroy_all(struct request_queue *q) | 605 | void blkg_destroy_all(struct request_queue *q) |
551 | { | 606 | { |
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index 562fa55d97b..2600ae7e6f6 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h | |||
@@ -204,6 +204,10 @@ extern unsigned int blkcg_get_read_iops(struct blkio_cgroup *blkcg, | |||
204 | extern unsigned int blkcg_get_write_iops(struct blkio_cgroup *blkcg, | 204 | extern unsigned int blkcg_get_write_iops(struct blkio_cgroup *blkcg, |
205 | dev_t dev); | 205 | dev_t dev); |
206 | 206 | ||
207 | typedef struct blkio_group *(blkio_alloc_group_fn)(struct request_queue *q, | ||
208 | struct blkio_cgroup *blkcg); | ||
209 | typedef void (blkio_link_group_fn)(struct request_queue *q, | ||
210 | struct blkio_group *blkg); | ||
207 | typedef void (blkio_unlink_group_fn)(struct request_queue *q, | 211 | typedef void (blkio_unlink_group_fn)(struct request_queue *q, |
208 | struct blkio_group *blkg); | 212 | struct blkio_group *blkg); |
209 | typedef bool (blkio_clear_queue_fn)(struct request_queue *q); | 213 | typedef bool (blkio_clear_queue_fn)(struct request_queue *q); |
@@ -219,6 +223,8 @@ typedef void (blkio_update_group_write_iops_fn)(struct request_queue *q, | |||
219 | struct blkio_group *blkg, unsigned int write_iops); | 223 | struct blkio_group *blkg, unsigned int write_iops); |
220 | 224 | ||
221 | struct blkio_policy_ops { | 225 | struct blkio_policy_ops { |
226 | blkio_alloc_group_fn *blkio_alloc_group_fn; | ||
227 | blkio_link_group_fn *blkio_link_group_fn; | ||
222 | blkio_unlink_group_fn *blkio_unlink_group_fn; | 228 | blkio_unlink_group_fn *blkio_unlink_group_fn; |
223 | blkio_clear_queue_fn *blkio_clear_queue_fn; | 229 | blkio_clear_queue_fn *blkio_clear_queue_fn; |
224 | blkio_update_group_weight_fn *blkio_update_group_weight_fn; | 230 | blkio_update_group_weight_fn *blkio_update_group_weight_fn; |
@@ -307,14 +313,14 @@ static inline void blkiocg_set_start_empty_time(struct blkio_group *blkg) {} | |||
307 | extern struct blkio_cgroup blkio_root_cgroup; | 313 | extern struct blkio_cgroup blkio_root_cgroup; |
308 | extern struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup); | 314 | extern struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup); |
309 | extern struct blkio_cgroup *task_blkio_cgroup(struct task_struct *tsk); | 315 | extern struct blkio_cgroup *task_blkio_cgroup(struct task_struct *tsk); |
310 | extern void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, | ||
311 | struct blkio_group *blkg, struct request_queue *q, dev_t dev, | ||
312 | enum blkio_policy_id plid); | ||
313 | extern int blkio_alloc_blkg_stats(struct blkio_group *blkg); | ||
314 | extern int blkiocg_del_blkio_group(struct blkio_group *blkg); | 316 | extern int blkiocg_del_blkio_group(struct blkio_group *blkg); |
315 | extern struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg, | 317 | extern struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg, |
316 | struct request_queue *q, | 318 | struct request_queue *q, |
317 | enum blkio_policy_id plid); | 319 | enum blkio_policy_id plid); |
320 | struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg, | ||
321 | struct request_queue *q, | ||
322 | enum blkio_policy_id plid, | ||
323 | bool for_root); | ||
318 | void blkiocg_update_timeslice_used(struct blkio_group *blkg, | 324 | void blkiocg_update_timeslice_used(struct blkio_group *blkg, |
319 | unsigned long time, | 325 | unsigned long time, |
320 | unsigned long unaccounted_time); | 326 | unsigned long unaccounted_time); |
@@ -335,17 +341,11 @@ cgroup_to_blkio_cgroup(struct cgroup *cgroup) { return NULL; } | |||
335 | static inline struct blkio_cgroup * | 341 | static inline struct blkio_cgroup * |
336 | task_blkio_cgroup(struct task_struct *tsk) { return NULL; } | 342 | task_blkio_cgroup(struct task_struct *tsk) { return NULL; } |
337 | 343 | ||
338 | static inline void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, | ||
339 | struct blkio_group *blkg, void *key, dev_t dev, | ||
340 | enum blkio_policy_id plid) {} | ||
341 | |||
342 | static inline int blkio_alloc_blkg_stats(struct blkio_group *blkg) { return 0; } | ||
343 | |||
344 | static inline int | 344 | static inline int |
345 | blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; } | 345 | blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; } |
346 | 346 | ||
347 | static inline struct blkio_group * | 347 | static inline struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg, |
348 | blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) { return NULL; } | 348 | void *key) { return NULL; } |
349 | static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg, | 349 | static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg, |
350 | unsigned long time, | 350 | unsigned long time, |
351 | unsigned long unaccounted_time) | 351 | unsigned long unaccounted_time) |
diff --git a/block/blk-throttle.c b/block/blk-throttle.c index aeeb798d1cd..2ae637b9e80 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c | |||
@@ -181,17 +181,25 @@ static void throtl_put_tg(struct throtl_grp *tg) | |||
181 | call_rcu(&tg->rcu_head, throtl_free_tg); | 181 | call_rcu(&tg->rcu_head, throtl_free_tg); |
182 | } | 182 | } |
183 | 183 | ||
184 | static void throtl_init_group(struct throtl_grp *tg) | 184 | static struct blkio_group *throtl_alloc_blkio_group(struct request_queue *q, |
185 | struct blkio_cgroup *blkcg) | ||
185 | { | 186 | { |
187 | struct throtl_grp *tg; | ||
188 | |||
189 | tg = kzalloc_node(sizeof(*tg), GFP_ATOMIC, q->node); | ||
190 | if (!tg) | ||
191 | return NULL; | ||
192 | |||
186 | INIT_HLIST_NODE(&tg->tg_node); | 193 | INIT_HLIST_NODE(&tg->tg_node); |
187 | RB_CLEAR_NODE(&tg->rb_node); | 194 | RB_CLEAR_NODE(&tg->rb_node); |
188 | bio_list_init(&tg->bio_lists[0]); | 195 | bio_list_init(&tg->bio_lists[0]); |
189 | bio_list_init(&tg->bio_lists[1]); | 196 | bio_list_init(&tg->bio_lists[1]); |
190 | tg->limits_changed = false; | 197 | tg->limits_changed = false; |
191 | 198 | ||
192 | /* Practically unlimited BW */ | 199 | tg->bps[READ] = blkcg_get_read_bps(blkcg, tg->blkg.dev); |
193 | tg->bps[0] = tg->bps[1] = -1; | 200 | tg->bps[WRITE] = blkcg_get_write_bps(blkcg, tg->blkg.dev); |
194 | tg->iops[0] = tg->iops[1] = -1; | 201 | tg->iops[READ] = blkcg_get_read_iops(blkcg, tg->blkg.dev); |
202 | tg->iops[WRITE] = blkcg_get_write_iops(blkcg, tg->blkg.dev); | ||
195 | 203 | ||
196 | /* | 204 | /* |
197 | * Take the initial reference that will be released on destroy | 205 | * Take the initial reference that will be released on destroy |
@@ -200,14 +208,8 @@ static void throtl_init_group(struct throtl_grp *tg) | |||
200 | * exit or cgroup deletion path depending on who is exiting first. | 208 | * exit or cgroup deletion path depending on who is exiting first. |
201 | */ | 209 | */ |
202 | atomic_set(&tg->ref, 1); | 210 | atomic_set(&tg->ref, 1); |
203 | } | ||
204 | 211 | ||
205 | /* Should be called with rcu read lock held (needed for blkcg) */ | 212 | return &tg->blkg; |
206 | static void | ||
207 | throtl_add_group_to_td_list(struct throtl_data *td, struct throtl_grp *tg) | ||
208 | { | ||
209 | hlist_add_head(&tg->tg_node, &td->tg_list); | ||
210 | td->nr_undestroyed_grps++; | ||
211 | } | 213 | } |
212 | 214 | ||
213 | static void | 215 | static void |
@@ -246,119 +248,62 @@ throtl_tg_fill_dev_details(struct throtl_data *td, struct throtl_grp *tg) | |||
246 | spin_unlock_irq(td->queue->queue_lock); | 248 | spin_unlock_irq(td->queue->queue_lock); |
247 | } | 249 | } |
248 | 250 | ||
249 | static void throtl_init_add_tg_lists(struct throtl_data *td, | 251 | static void throtl_link_blkio_group(struct request_queue *q, |
250 | struct throtl_grp *tg, struct blkio_cgroup *blkcg) | 252 | struct blkio_group *blkg) |
251 | { | 253 | { |
252 | __throtl_tg_fill_dev_details(td, tg); | 254 | struct throtl_data *td = q->td; |
253 | 255 | struct throtl_grp *tg = tg_of_blkg(blkg); | |
254 | /* Add group onto cgroup list */ | ||
255 | blkiocg_add_blkio_group(blkcg, &tg->blkg, td->queue, | ||
256 | tg->blkg.dev, BLKIO_POLICY_THROTL); | ||
257 | |||
258 | tg->bps[READ] = blkcg_get_read_bps(blkcg, tg->blkg.dev); | ||
259 | tg->bps[WRITE] = blkcg_get_write_bps(blkcg, tg->blkg.dev); | ||
260 | tg->iops[READ] = blkcg_get_read_iops(blkcg, tg->blkg.dev); | ||
261 | tg->iops[WRITE] = blkcg_get_write_iops(blkcg, tg->blkg.dev); | ||
262 | |||
263 | throtl_add_group_to_td_list(td, tg); | ||
264 | } | ||
265 | |||
266 | /* Should be called without queue lock and outside of rcu period */ | ||
267 | static struct throtl_grp *throtl_alloc_tg(struct throtl_data *td) | ||
268 | { | ||
269 | struct throtl_grp *tg = NULL; | ||
270 | int ret; | ||
271 | |||
272 | tg = kzalloc_node(sizeof(*tg), GFP_ATOMIC, td->queue->node); | ||
273 | if (!tg) | ||
274 | return NULL; | ||
275 | |||
276 | ret = blkio_alloc_blkg_stats(&tg->blkg); | ||
277 | 256 | ||
278 | if (ret) { | 257 | __throtl_tg_fill_dev_details(td, tg); |
279 | kfree(tg); | ||
280 | return NULL; | ||
281 | } | ||
282 | 258 | ||
283 | throtl_init_group(tg); | 259 | hlist_add_head(&tg->tg_node, &td->tg_list); |
284 | return tg; | 260 | td->nr_undestroyed_grps++; |
285 | } | 261 | } |
286 | 262 | ||
287 | static struct | 263 | static struct |
288 | throtl_grp *throtl_find_tg(struct throtl_data *td, struct blkio_cgroup *blkcg) | 264 | throtl_grp *throtl_lookup_tg(struct throtl_data *td, struct blkio_cgroup *blkcg) |
289 | { | 265 | { |
290 | struct throtl_grp *tg = NULL; | 266 | struct throtl_grp *tg = NULL; |
291 | 267 | ||
292 | /* | 268 | /* |
293 | * This is the common case when there are no blkio cgroups. | 269 | * This is the common case when there are no blkio cgroups. |
294 | * Avoid lookup in this case | 270 | * Avoid lookup in this case |
295 | */ | 271 | */ |
296 | if (blkcg == &blkio_root_cgroup) | 272 | if (blkcg == &blkio_root_cgroup) |
297 | tg = td->root_tg; | 273 | tg = td->root_tg; |
298 | else | 274 | else |
299 | tg = tg_of_blkg(blkiocg_lookup_group(blkcg, td->queue, | 275 | tg = tg_of_blkg(blkg_lookup(blkcg, td->queue, |
300 | BLKIO_POLICY_THROTL)); | 276 | BLKIO_POLICY_THROTL)); |
301 | 277 | ||
302 | __throtl_tg_fill_dev_details(td, tg); | 278 | __throtl_tg_fill_dev_details(td, tg); |
303 | return tg; | 279 | return tg; |
304 | } | 280 | } |
305 | 281 | ||
306 | static struct throtl_grp *throtl_get_tg(struct throtl_data *td, | 282 | static struct throtl_grp *throtl_lookup_create_tg(struct throtl_data *td, |
307 | struct blkio_cgroup *blkcg) | 283 | struct blkio_cgroup *blkcg) |
308 | { | 284 | { |
309 | struct throtl_grp *tg = NULL, *__tg = NULL; | ||
310 | struct request_queue *q = td->queue; | 285 | struct request_queue *q = td->queue; |
311 | 286 | struct throtl_grp *tg = NULL; | |
312 | /* no throttling for dead queue */ | ||
313 | if (unlikely(blk_queue_bypass(q))) | ||
314 | return NULL; | ||
315 | |||
316 | tg = throtl_find_tg(td, blkcg); | ||
317 | if (tg) | ||
318 | return tg; | ||
319 | |||
320 | if (!css_tryget(&blkcg->css)) | ||
321 | return NULL; | ||
322 | |||
323 | /* | ||
324 | * Need to allocate a group. Allocation of group also needs allocation | ||
325 | * of per cpu stats which in-turn takes a mutex() and can block. Hence | ||
326 | * we need to drop rcu lock and queue_lock before we call alloc. | ||
327 | */ | ||
328 | spin_unlock_irq(q->queue_lock); | ||
329 | rcu_read_unlock(); | ||
330 | |||
331 | tg = throtl_alloc_tg(td); | ||
332 | |||
333 | /* Group allocated and queue is still alive. take the lock */ | ||
334 | rcu_read_lock(); | ||
335 | spin_lock_irq(q->queue_lock); | ||
336 | css_put(&blkcg->css); | ||
337 | |||
338 | /* Make sure @q is still alive */ | ||
339 | if (unlikely(blk_queue_bypass(q))) { | ||
340 | kfree(tg); | ||
341 | return NULL; | ||
342 | } | ||
343 | 287 | ||
344 | /* | 288 | /* |
345 | * If some other thread already allocated the group while we were | 289 | * This is the common case when there are no blkio cgroups. |
346 | * not holding queue lock, free up the group | 290 | * Avoid lookup in this case |
347 | */ | 291 | */ |
348 | __tg = throtl_find_tg(td, blkcg); | 292 | if (blkcg == &blkio_root_cgroup) { |
293 | tg = td->root_tg; | ||
294 | } else { | ||
295 | struct blkio_group *blkg; | ||
349 | 296 | ||
350 | if (__tg) { | 297 | blkg = blkg_lookup_create(blkcg, q, BLKIO_POLICY_THROTL, false); |
351 | kfree(tg); | ||
352 | return __tg; | ||
353 | } | ||
354 | 298 | ||
355 | /* Group allocation failed. Account the IO to root group */ | 299 | /* if %NULL and @q is alive, fall back to root_tg */ |
356 | if (!tg) { | 300 | if (!IS_ERR(blkg)) |
357 | tg = td->root_tg; | 301 | tg = tg_of_blkg(blkg); |
358 | return tg; | 302 | else if (!blk_queue_dead(q)) |
303 | tg = td->root_tg; | ||
359 | } | 304 | } |
360 | 305 | ||
361 | throtl_init_add_tg_lists(td, tg, blkcg); | 306 | __throtl_tg_fill_dev_details(td, tg); |
362 | return tg; | 307 | return tg; |
363 | } | 308 | } |
364 | 309 | ||
@@ -1107,6 +1052,8 @@ static void throtl_shutdown_wq(struct request_queue *q) | |||
1107 | 1052 | ||
1108 | static struct blkio_policy_type blkio_policy_throtl = { | 1053 | static struct blkio_policy_type blkio_policy_throtl = { |
1109 | .ops = { | 1054 | .ops = { |
1055 | .blkio_alloc_group_fn = throtl_alloc_blkio_group, | ||
1056 | .blkio_link_group_fn = throtl_link_blkio_group, | ||
1110 | .blkio_unlink_group_fn = throtl_unlink_blkio_group, | 1057 | .blkio_unlink_group_fn = throtl_unlink_blkio_group, |
1111 | .blkio_clear_queue_fn = throtl_clear_queue, | 1058 | .blkio_clear_queue_fn = throtl_clear_queue, |
1112 | .blkio_update_group_read_bps_fn = | 1059 | .blkio_update_group_read_bps_fn = |
@@ -1141,7 +1088,7 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio) | |||
1141 | */ | 1088 | */ |
1142 | rcu_read_lock(); | 1089 | rcu_read_lock(); |
1143 | blkcg = task_blkio_cgroup(current); | 1090 | blkcg = task_blkio_cgroup(current); |
1144 | tg = throtl_find_tg(td, blkcg); | 1091 | tg = throtl_lookup_tg(td, blkcg); |
1145 | if (tg) { | 1092 | if (tg) { |
1146 | throtl_tg_fill_dev_details(td, tg); | 1093 | throtl_tg_fill_dev_details(td, tg); |
1147 | 1094 | ||
@@ -1157,7 +1104,7 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio) | |||
1157 | * IO group | 1104 | * IO group |
1158 | */ | 1105 | */ |
1159 | spin_lock_irq(q->queue_lock); | 1106 | spin_lock_irq(q->queue_lock); |
1160 | tg = throtl_get_tg(td, blkcg); | 1107 | tg = throtl_lookup_create_tg(td, blkcg); |
1161 | if (unlikely(!tg)) | 1108 | if (unlikely(!tg)) |
1162 | goto out_unlock; | 1109 | goto out_unlock; |
1163 | 1110 | ||
@@ -1252,6 +1199,7 @@ void blk_throtl_drain(struct request_queue *q) | |||
1252 | int blk_throtl_init(struct request_queue *q) | 1199 | int blk_throtl_init(struct request_queue *q) |
1253 | { | 1200 | { |
1254 | struct throtl_data *td; | 1201 | struct throtl_data *td; |
1202 | struct blkio_group *blkg; | ||
1255 | 1203 | ||
1256 | td = kzalloc_node(sizeof(*td), GFP_KERNEL, q->node); | 1204 | td = kzalloc_node(sizeof(*td), GFP_KERNEL, q->node); |
1257 | if (!td) | 1205 | if (!td) |
@@ -1262,13 +1210,17 @@ int blk_throtl_init(struct request_queue *q) | |||
1262 | td->limits_changed = false; | 1210 | td->limits_changed = false; |
1263 | INIT_DELAYED_WORK(&td->throtl_work, blk_throtl_work); | 1211 | INIT_DELAYED_WORK(&td->throtl_work, blk_throtl_work); |
1264 | 1212 | ||
1265 | /* alloc and Init root group. */ | 1213 | q->td = td; |
1266 | td->queue = q; | 1214 | td->queue = q; |
1267 | 1215 | ||
1216 | /* alloc and init root group. */ | ||
1268 | rcu_read_lock(); | 1217 | rcu_read_lock(); |
1269 | spin_lock_irq(q->queue_lock); | 1218 | spin_lock_irq(q->queue_lock); |
1270 | 1219 | ||
1271 | td->root_tg = throtl_get_tg(td, &blkio_root_cgroup); | 1220 | blkg = blkg_lookup_create(&blkio_root_cgroup, q, BLKIO_POLICY_THROTL, |
1221 | true); | ||
1222 | if (!IS_ERR(blkg)) | ||
1223 | td->root_tg = tg_of_blkg(blkg); | ||
1272 | 1224 | ||
1273 | spin_unlock_irq(q->queue_lock); | 1225 | spin_unlock_irq(q->queue_lock); |
1274 | rcu_read_unlock(); | 1226 | rcu_read_unlock(); |
@@ -1277,9 +1229,6 @@ int blk_throtl_init(struct request_queue *q) | |||
1277 | kfree(td); | 1229 | kfree(td); |
1278 | return -ENOMEM; | 1230 | return -ENOMEM; |
1279 | } | 1231 | } |
1280 | |||
1281 | /* Attach throtl data to request queue */ | ||
1282 | q->td = td; | ||
1283 | return 0; | 1232 | return 0; |
1284 | } | 1233 | } |
1285 | 1234 | ||
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 1c3f41b9d5d..acef564578c 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
@@ -1048,10 +1048,12 @@ static void cfq_update_blkio_group_weight(struct request_queue *q, | |||
1048 | cfqg->needs_update = true; | 1048 | cfqg->needs_update = true; |
1049 | } | 1049 | } |
1050 | 1050 | ||
1051 | static void cfq_init_add_cfqg_lists(struct cfq_data *cfqd, | 1051 | static void cfq_link_blkio_group(struct request_queue *q, |
1052 | struct cfq_group *cfqg, struct blkio_cgroup *blkcg) | 1052 | struct blkio_group *blkg) |
1053 | { | 1053 | { |
1054 | struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info; | 1054 | struct cfq_data *cfqd = q->elevator->elevator_data; |
1055 | struct backing_dev_info *bdi = &q->backing_dev_info; | ||
1056 | struct cfq_group *cfqg = cfqg_of_blkg(blkg); | ||
1055 | unsigned int major, minor; | 1057 | unsigned int major, minor; |
1056 | 1058 | ||
1057 | /* | 1059 | /* |
@@ -1062,34 +1064,26 @@ static void cfq_init_add_cfqg_lists(struct cfq_data *cfqd, | |||
1062 | */ | 1064 | */ |
1063 | if (bdi->dev) { | 1065 | if (bdi->dev) { |
1064 | sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor); | 1066 | sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor); |
1065 | cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg, | 1067 | blkg->dev = MKDEV(major, minor); |
1066 | cfqd->queue, MKDEV(major, minor)); | 1068 | } |
1067 | } else | ||
1068 | cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg, | ||
1069 | cfqd->queue, 0); | ||
1070 | 1069 | ||
1071 | cfqd->nr_blkcg_linked_grps++; | 1070 | cfqd->nr_blkcg_linked_grps++; |
1072 | cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev); | ||
1073 | 1071 | ||
1074 | /* Add group on cfqd list */ | 1072 | /* Add group on cfqd list */ |
1075 | hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list); | 1073 | hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list); |
1076 | } | 1074 | } |
1077 | 1075 | ||
1078 | /* | 1076 | static struct blkio_group *cfq_alloc_blkio_group(struct request_queue *q, |
1079 | * Should be called from sleepable context. No request queue lock as per | 1077 | struct blkio_cgroup *blkcg) |
1080 | * cpu stats are allocated dynamically and alloc_percpu needs to be called | ||
1081 | * from sleepable context. | ||
1082 | */ | ||
1083 | static struct cfq_group * cfq_alloc_cfqg(struct cfq_data *cfqd) | ||
1084 | { | 1078 | { |
1085 | struct cfq_group *cfqg; | 1079 | struct cfq_group *cfqg; |
1086 | int ret; | ||
1087 | 1080 | ||
1088 | cfqg = kzalloc_node(sizeof(*cfqg), GFP_ATOMIC, cfqd->queue->node); | 1081 | cfqg = kzalloc_node(sizeof(*cfqg), GFP_ATOMIC, q->node); |
1089 | if (!cfqg) | 1082 | if (!cfqg) |
1090 | return NULL; | 1083 | return NULL; |
1091 | 1084 | ||
1092 | cfq_init_cfqg_base(cfqg); | 1085 | cfq_init_cfqg_base(cfqg); |
1086 | cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev); | ||
1093 | 1087 | ||
1094 | /* | 1088 | /* |
1095 | * Take the initial reference that will be released on destroy | 1089 | * Take the initial reference that will be released on destroy |
@@ -1099,90 +1093,38 @@ static struct cfq_group * cfq_alloc_cfqg(struct cfq_data *cfqd) | |||
1099 | */ | 1093 | */ |
1100 | cfqg->ref = 1; | 1094 | cfqg->ref = 1; |
1101 | 1095 | ||
1102 | ret = blkio_alloc_blkg_stats(&cfqg->blkg); | 1096 | return &cfqg->blkg; |
1103 | if (ret) { | ||
1104 | kfree(cfqg); | ||
1105 | return NULL; | ||
1106 | } | ||
1107 | |||
1108 | return cfqg; | ||
1109 | } | ||
1110 | |||
1111 | static struct cfq_group * | ||
1112 | cfq_find_cfqg(struct cfq_data *cfqd, struct blkio_cgroup *blkcg) | ||
1113 | { | ||
1114 | struct cfq_group *cfqg = NULL; | ||
1115 | struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info; | ||
1116 | unsigned int major, minor; | ||
1117 | |||
1118 | /* | ||
1119 | * This is the common case when there are no blkio cgroups. | ||
1120 | * Avoid lookup in this case | ||
1121 | */ | ||
1122 | if (blkcg == &blkio_root_cgroup) | ||
1123 | cfqg = cfqd->root_group; | ||
1124 | else | ||
1125 | cfqg = cfqg_of_blkg(blkiocg_lookup_group(blkcg, cfqd->queue, | ||
1126 | BLKIO_POLICY_PROP)); | ||
1127 | |||
1128 | if (cfqg && !cfqg->blkg.dev && bdi->dev && dev_name(bdi->dev)) { | ||
1129 | sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor); | ||
1130 | cfqg->blkg.dev = MKDEV(major, minor); | ||
1131 | } | ||
1132 | |||
1133 | return cfqg; | ||
1134 | } | 1097 | } |
1135 | 1098 | ||
1136 | /* | 1099 | /* |
1137 | * Search for the cfq group current task belongs to. request_queue lock must | 1100 | * Search for the cfq group current task belongs to. request_queue lock must |
1138 | * be held. | 1101 | * be held. |
1139 | */ | 1102 | */ |
1140 | static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, | 1103 | static struct cfq_group *cfq_lookup_create_cfqg(struct cfq_data *cfqd, |
1141 | struct blkio_cgroup *blkcg) | 1104 | struct blkio_cgroup *blkcg) |
1142 | { | 1105 | { |
1143 | struct cfq_group *cfqg = NULL, *__cfqg = NULL; | ||
1144 | struct request_queue *q = cfqd->queue; | 1106 | struct request_queue *q = cfqd->queue; |
1107 | struct backing_dev_info *bdi = &q->backing_dev_info; | ||
1108 | struct cfq_group *cfqg = NULL; | ||
1145 | 1109 | ||
1146 | cfqg = cfq_find_cfqg(cfqd, blkcg); | 1110 | /* avoid lookup for the common case where there's no blkio cgroup */ |
1147 | if (cfqg) | 1111 | if (blkcg == &blkio_root_cgroup) { |
1148 | return cfqg; | 1112 | cfqg = cfqd->root_group; |
1149 | 1113 | } else { | |
1150 | if (!css_tryget(&blkcg->css)) | 1114 | struct blkio_group *blkg; |
1151 | return NULL; | ||
1152 | |||
1153 | /* | ||
1154 | * Need to allocate a group. Allocation of group also needs allocation | ||
1155 | * of per cpu stats which in-turn takes a mutex() and can block. Hence | ||
1156 | * we need to drop rcu lock and queue_lock before we call alloc. | ||
1157 | * | ||
1158 | * Not taking any queue reference here and assuming that queue is | ||
1159 | * around by the time we return. CFQ queue allocation code does | ||
1160 | * the same. It might be racy though. | ||
1161 | */ | ||
1162 | rcu_read_unlock(); | ||
1163 | spin_unlock_irq(q->queue_lock); | ||
1164 | |||
1165 | cfqg = cfq_alloc_cfqg(cfqd); | ||
1166 | 1115 | ||
1167 | spin_lock_irq(q->queue_lock); | 1116 | blkg = blkg_lookup_create(blkcg, q, BLKIO_POLICY_PROP, false); |
1168 | rcu_read_lock(); | 1117 | if (!IS_ERR(blkg)) |
1169 | css_put(&blkcg->css); | 1118 | cfqg = cfqg_of_blkg(blkg); |
1119 | } | ||
1170 | 1120 | ||
1171 | /* | 1121 | if (cfqg && !cfqg->blkg.dev && bdi->dev && dev_name(bdi->dev)) { |
1172 | * If some other thread already allocated the group while we were | 1122 | unsigned int major, minor; |
1173 | * not holding queue lock, free up the group | ||
1174 | */ | ||
1175 | __cfqg = cfq_find_cfqg(cfqd, blkcg); | ||
1176 | 1123 | ||
1177 | if (__cfqg) { | 1124 | sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor); |
1178 | kfree(cfqg); | 1125 | cfqg->blkg.dev = MKDEV(major, minor); |
1179 | return __cfqg; | ||
1180 | } | 1126 | } |
1181 | 1127 | ||
1182 | if (!cfqg) | ||
1183 | cfqg = cfqd->root_group; | ||
1184 | |||
1185 | cfq_init_add_cfqg_lists(cfqd, cfqg, blkcg); | ||
1186 | return cfqg; | 1128 | return cfqg; |
1187 | } | 1129 | } |
1188 | 1130 | ||
@@ -1294,8 +1236,8 @@ static bool cfq_clear_queue(struct request_queue *q) | |||
1294 | } | 1236 | } |
1295 | 1237 | ||
1296 | #else /* GROUP_IOSCHED */ | 1238 | #else /* GROUP_IOSCHED */ |
1297 | static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, | 1239 | static struct cfq_group *cfq_lookup_create_cfqg(struct cfq_data *cfqd, |
1298 | struct blkio_cgroup *blkcg) | 1240 | struct blkio_cgroup *blkcg) |
1299 | { | 1241 | { |
1300 | return cfqd->root_group; | 1242 | return cfqd->root_group; |
1301 | } | 1243 | } |
@@ -2887,7 +2829,8 @@ retry: | |||
2887 | 2829 | ||
2888 | blkcg = task_blkio_cgroup(current); | 2830 | blkcg = task_blkio_cgroup(current); |
2889 | 2831 | ||
2890 | cfqg = cfq_get_cfqg(cfqd, blkcg); | 2832 | cfqg = cfq_lookup_create_cfqg(cfqd, blkcg); |
2833 | |||
2891 | cic = cfq_cic_lookup(cfqd, ioc); | 2834 | cic = cfq_cic_lookup(cfqd, ioc); |
2892 | /* cic always exists here */ | 2835 | /* cic always exists here */ |
2893 | cfqq = cic_to_cfqq(cic, is_sync); | 2836 | cfqq = cic_to_cfqq(cic, is_sync); |
@@ -3694,6 +3637,7 @@ static void cfq_exit_queue(struct elevator_queue *e) | |||
3694 | static int cfq_init_queue(struct request_queue *q) | 3637 | static int cfq_init_queue(struct request_queue *q) |
3695 | { | 3638 | { |
3696 | struct cfq_data *cfqd; | 3639 | struct cfq_data *cfqd; |
3640 | struct blkio_group *blkg __maybe_unused; | ||
3697 | int i; | 3641 | int i; |
3698 | 3642 | ||
3699 | cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node); | 3643 | cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node); |
@@ -3711,7 +3655,10 @@ static int cfq_init_queue(struct request_queue *q) | |||
3711 | rcu_read_lock(); | 3655 | rcu_read_lock(); |
3712 | spin_lock_irq(q->queue_lock); | 3656 | spin_lock_irq(q->queue_lock); |
3713 | 3657 | ||
3714 | cfqd->root_group = cfq_get_cfqg(cfqd, &blkio_root_cgroup); | 3658 | blkg = blkg_lookup_create(&blkio_root_cgroup, q, BLKIO_POLICY_PROP, |
3659 | true); | ||
3660 | if (!IS_ERR(blkg)) | ||
3661 | cfqd->root_group = cfqg_of_blkg(blkg); | ||
3715 | 3662 | ||
3716 | spin_unlock_irq(q->queue_lock); | 3663 | spin_unlock_irq(q->queue_lock); |
3717 | rcu_read_unlock(); | 3664 | rcu_read_unlock(); |
@@ -3897,6 +3844,8 @@ static struct elevator_type iosched_cfq = { | |||
3897 | #ifdef CONFIG_CFQ_GROUP_IOSCHED | 3844 | #ifdef CONFIG_CFQ_GROUP_IOSCHED |
3898 | static struct blkio_policy_type blkio_policy_cfq = { | 3845 | static struct blkio_policy_type blkio_policy_cfq = { |
3899 | .ops = { | 3846 | .ops = { |
3847 | .blkio_alloc_group_fn = cfq_alloc_blkio_group, | ||
3848 | .blkio_link_group_fn = cfq_link_blkio_group, | ||
3900 | .blkio_unlink_group_fn = cfq_unlink_blkio_group, | 3849 | .blkio_unlink_group_fn = cfq_unlink_blkio_group, |
3901 | .blkio_clear_queue_fn = cfq_clear_queue, | 3850 | .blkio_clear_queue_fn = cfq_clear_queue, |
3902 | .blkio_update_group_weight_fn = cfq_update_blkio_group_weight, | 3851 | .blkio_update_group_weight_fn = cfq_update_blkio_group_weight, |
diff --git a/block/cfq.h b/block/cfq.h index 343b78a61df..398760194e1 100644 --- a/block/cfq.h +++ b/block/cfq.h | |||
@@ -67,12 +67,6 @@ static inline void cfq_blkiocg_update_completion_stats(struct blkio_group *blkg, | |||
67 | direction, sync); | 67 | direction, sync); |
68 | } | 68 | } |
69 | 69 | ||
70 | static inline void cfq_blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, | ||
71 | struct blkio_group *blkg, struct request_queue *q, dev_t dev) | ||
72 | { | ||
73 | blkiocg_add_blkio_group(blkcg, blkg, q, dev, BLKIO_POLICY_PROP); | ||
74 | } | ||
75 | |||
76 | static inline int cfq_blkiocg_del_blkio_group(struct blkio_group *blkg) | 70 | static inline int cfq_blkiocg_del_blkio_group(struct blkio_group *blkg) |
77 | { | 71 | { |
78 | return blkiocg_del_blkio_group(blkg); | 72 | return blkiocg_del_blkio_group(blkg); |
@@ -105,8 +99,6 @@ static inline void cfq_blkiocg_update_dispatch_stats(struct blkio_group *blkg, | |||
105 | uint64_t bytes, bool direction, bool sync) {} | 99 | uint64_t bytes, bool direction, bool sync) {} |
106 | static inline void cfq_blkiocg_update_completion_stats(struct blkio_group *blkg, uint64_t start_time, uint64_t io_start_time, bool direction, bool sync) {} | 100 | static inline void cfq_blkiocg_update_completion_stats(struct blkio_group *blkg, uint64_t start_time, uint64_t io_start_time, bool direction, bool sync) {} |
107 | 101 | ||
108 | static inline void cfq_blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, | ||
109 | struct blkio_group *blkg, struct request_queue *q, dev_t dev) {} | ||
110 | static inline int cfq_blkiocg_del_blkio_group(struct blkio_group *blkg) | 102 | static inline int cfq_blkiocg_del_blkio_group(struct blkio_group *blkg) |
111 | { | 103 | { |
112 | return 0; | 104 | return 0; |