aboutsummaryrefslogtreecommitdiffstats
path: root/block
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2012-03-05 16:15:06 -0500
committerJens Axboe <axboe@kernel.dk>2012-03-06 15:27:22 -0500
commitcd1604fab4f95f7cfc227d3955fd7ae14da61f38 (patch)
tree021881faedc1c2468730f9f54d364083e70dce76 /block
parentf51b802c17e2a21926b29911493f5e7ddf6eee87 (diff)
blkcg: factor out blkio_group creation
Currently both blk-throttle and cfq-iosched implement their own blkio_group creation code in throtl_get_tg() and cfq_get_cfqg(). This patch factors out the common code into blkg_lookup_create(), which returns ERR_PTR value so that transitional failures due to queue bypass can be distinguished from other failures. * New plkio_policy_ops methods blkio_alloc_group_fn() and blkio_link_group_fn added. Both are transitional and will be removed once the blkg management code is fully moved into blk-cgroup.c. * blkio_alloc_group_fn() allocates policy-specific blkg which is usually a larger data structure with blkg as the first entry and intiailizes it. Note that initialization of blkg proper, including percpu stats, is responsibility of blk-cgroup proper. Note that default config (weight, bps...) initialization is done from this method; otherwise, we end up violating locking order between blkcg and q locks via blkcg_get_CONF() functions. * blkio_link_group_fn() is called under queue_lock and responsible for linking the blkg to the queue. blkcg side is handled by blk-cgroup proper. * The common blkg creation function is named blkg_lookup_create() and blkiocg_lookup_group() is renamed to blkg_lookup() for consistency. Also, throtl / cfq related functions are similarly [re]named for consistency. This simplifies blkcg policy implementations and enables further cleanup. -v2: Vivek noticed that blkg_lookup_create() incorrectly tested blk_queue_dead() instead of blk_queue_bypass() leading a user of the function ending up creating a new blkg on bypassing queue. This is a bug introduced while relocating bypass patches before this one. Fixed. -v3: ERR_PTR patch folded into this one. @for_root added to blkg_lookup_create() to allow creating root group on a bypassed queue during elevator switch. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Vivek Goyal <vgoyal@redhat.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block')
-rw-r--r--block/blk-cgroup.c117
-rw-r--r--block/blk-cgroup.h30
-rw-r--r--block/blk-throttle.c155
-rw-r--r--block/cfq-iosched.c131
-rw-r--r--block/cfq.h8
5 files changed, 193 insertions, 248 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index f1b08d3cba5..bc989149631 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -465,38 +465,93 @@ void blkiocg_update_io_merged_stats(struct blkio_group *blkg, bool direction,
465} 465}
466EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats); 466EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats);
467 467
468/* 468struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
469 * This function allocates the per cpu stats for blkio_group. Should be called 469 struct request_queue *q,
470 * from sleepable context as alloc_per_cpu() requires that. 470 enum blkio_policy_id plid,
471 */ 471 bool for_root)
472int blkio_alloc_blkg_stats(struct blkio_group *blkg) 472 __releases(q->queue_lock) __acquires(q->queue_lock)
473{ 473{
474 /* Allocate memory for per cpu stats */ 474 struct blkio_policy_type *pol = blkio_policy[plid];
475 blkg->stats_cpu = alloc_percpu(struct blkio_group_stats_cpu); 475 struct blkio_group *blkg, *new_blkg;
476 if (!blkg->stats_cpu)
477 return -ENOMEM;
478 return 0;
479}
480EXPORT_SYMBOL_GPL(blkio_alloc_blkg_stats);
481 476
482void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, 477 WARN_ON_ONCE(!rcu_read_lock_held());
483 struct blkio_group *blkg, struct request_queue *q, dev_t dev, 478 lockdep_assert_held(q->queue_lock);
484 enum blkio_policy_id plid)
485{
486 unsigned long flags;
487 479
488 spin_lock_irqsave(&blkcg->lock, flags); 480 /*
489 spin_lock_init(&blkg->stats_lock); 481 * This could be the first entry point of blkcg implementation and
490 rcu_assign_pointer(blkg->q, q); 482 * we shouldn't allow anything to go through for a bypassing queue.
491 blkg->blkcg_id = css_id(&blkcg->css); 483 * The following can be removed if blkg lookup is guaranteed to
484 * fail on a bypassing queue.
485 */
486 if (unlikely(blk_queue_bypass(q)) && !for_root)
487 return ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY);
488
489 blkg = blkg_lookup(blkcg, q, plid);
490 if (blkg)
491 return blkg;
492
493 if (!css_tryget(&blkcg->css))
494 return ERR_PTR(-EINVAL);
495
496 /*
497 * Allocate and initialize.
498 *
499 * FIXME: The following is broken. Percpu memory allocation
500 * requires %GFP_KERNEL context and can't be performed from IO
501 * path. Allocation here should inherently be atomic and the
502 * following lock dancing can be removed once the broken percpu
503 * allocation is fixed.
504 */
505 spin_unlock_irq(q->queue_lock);
506 rcu_read_unlock();
507
508 new_blkg = pol->ops.blkio_alloc_group_fn(q, blkcg);
509 if (new_blkg) {
510 new_blkg->stats_cpu = alloc_percpu(struct blkio_group_stats_cpu);
511
512 spin_lock_init(&new_blkg->stats_lock);
513 rcu_assign_pointer(new_blkg->q, q);
514 new_blkg->blkcg_id = css_id(&blkcg->css);
515 new_blkg->plid = plid;
516 cgroup_path(blkcg->css.cgroup, new_blkg->path,
517 sizeof(new_blkg->path));
518 }
519
520 rcu_read_lock();
521 spin_lock_irq(q->queue_lock);
522 css_put(&blkcg->css);
523
524 /* did bypass get turned on inbetween? */
525 if (unlikely(blk_queue_bypass(q)) && !for_root) {
526 blkg = ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY);
527 goto out;
528 }
529
530 /* did someone beat us to it? */
531 blkg = blkg_lookup(blkcg, q, plid);
532 if (unlikely(blkg))
533 goto out;
534
535 /* did alloc fail? */
536 if (unlikely(!new_blkg || !new_blkg->stats_cpu)) {
537 blkg = ERR_PTR(-ENOMEM);
538 goto out;
539 }
540
541 /* insert */
542 spin_lock(&blkcg->lock);
543 swap(blkg, new_blkg);
492 hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list); 544 hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
493 blkg->plid = plid; 545 pol->ops.blkio_link_group_fn(q, blkg);
494 spin_unlock_irqrestore(&blkcg->lock, flags); 546 spin_unlock(&blkcg->lock);
495 /* Need to take css reference ? */ 547out:
496 cgroup_path(blkcg->css.cgroup, blkg->path, sizeof(blkg->path)); 548 if (new_blkg) {
497 blkg->dev = dev; 549 free_percpu(new_blkg->stats_cpu);
550 kfree(new_blkg);
551 }
552 return blkg;
498} 553}
499EXPORT_SYMBOL_GPL(blkiocg_add_blkio_group); 554EXPORT_SYMBOL_GPL(blkg_lookup_create);
500 555
501static void __blkiocg_del_blkio_group(struct blkio_group *blkg) 556static void __blkiocg_del_blkio_group(struct blkio_group *blkg)
502{ 557{
@@ -533,9 +588,9 @@ int blkiocg_del_blkio_group(struct blkio_group *blkg)
533EXPORT_SYMBOL_GPL(blkiocg_del_blkio_group); 588EXPORT_SYMBOL_GPL(blkiocg_del_blkio_group);
534 589
535/* called under rcu_read_lock(). */ 590/* called under rcu_read_lock(). */
536struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg, 591struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
537 struct request_queue *q, 592 struct request_queue *q,
538 enum blkio_policy_id plid) 593 enum blkio_policy_id plid)
539{ 594{
540 struct blkio_group *blkg; 595 struct blkio_group *blkg;
541 struct hlist_node *n; 596 struct hlist_node *n;
@@ -545,7 +600,7 @@ struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg,
545 return blkg; 600 return blkg;
546 return NULL; 601 return NULL;
547} 602}
548EXPORT_SYMBOL_GPL(blkiocg_lookup_group); 603EXPORT_SYMBOL_GPL(blkg_lookup);
549 604
550void blkg_destroy_all(struct request_queue *q) 605void blkg_destroy_all(struct request_queue *q)
551{ 606{
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 562fa55d97b..2600ae7e6f6 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -204,6 +204,10 @@ extern unsigned int blkcg_get_read_iops(struct blkio_cgroup *blkcg,
204extern unsigned int blkcg_get_write_iops(struct blkio_cgroup *blkcg, 204extern unsigned int blkcg_get_write_iops(struct blkio_cgroup *blkcg,
205 dev_t dev); 205 dev_t dev);
206 206
207typedef struct blkio_group *(blkio_alloc_group_fn)(struct request_queue *q,
208 struct blkio_cgroup *blkcg);
209typedef void (blkio_link_group_fn)(struct request_queue *q,
210 struct blkio_group *blkg);
207typedef void (blkio_unlink_group_fn)(struct request_queue *q, 211typedef void (blkio_unlink_group_fn)(struct request_queue *q,
208 struct blkio_group *blkg); 212 struct blkio_group *blkg);
209typedef bool (blkio_clear_queue_fn)(struct request_queue *q); 213typedef bool (blkio_clear_queue_fn)(struct request_queue *q);
@@ -219,6 +223,8 @@ typedef void (blkio_update_group_write_iops_fn)(struct request_queue *q,
219 struct blkio_group *blkg, unsigned int write_iops); 223 struct blkio_group *blkg, unsigned int write_iops);
220 224
221struct blkio_policy_ops { 225struct blkio_policy_ops {
226 blkio_alloc_group_fn *blkio_alloc_group_fn;
227 blkio_link_group_fn *blkio_link_group_fn;
222 blkio_unlink_group_fn *blkio_unlink_group_fn; 228 blkio_unlink_group_fn *blkio_unlink_group_fn;
223 blkio_clear_queue_fn *blkio_clear_queue_fn; 229 blkio_clear_queue_fn *blkio_clear_queue_fn;
224 blkio_update_group_weight_fn *blkio_update_group_weight_fn; 230 blkio_update_group_weight_fn *blkio_update_group_weight_fn;
@@ -307,14 +313,14 @@ static inline void blkiocg_set_start_empty_time(struct blkio_group *blkg) {}
307extern struct blkio_cgroup blkio_root_cgroup; 313extern struct blkio_cgroup blkio_root_cgroup;
308extern struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup); 314extern struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup);
309extern struct blkio_cgroup *task_blkio_cgroup(struct task_struct *tsk); 315extern struct blkio_cgroup *task_blkio_cgroup(struct task_struct *tsk);
310extern void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
311 struct blkio_group *blkg, struct request_queue *q, dev_t dev,
312 enum blkio_policy_id plid);
313extern int blkio_alloc_blkg_stats(struct blkio_group *blkg);
314extern int blkiocg_del_blkio_group(struct blkio_group *blkg); 316extern int blkiocg_del_blkio_group(struct blkio_group *blkg);
315extern struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg, 317extern struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
316 struct request_queue *q, 318 struct request_queue *q,
317 enum blkio_policy_id plid); 319 enum blkio_policy_id plid);
320struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
321 struct request_queue *q,
322 enum blkio_policy_id plid,
323 bool for_root);
318void blkiocg_update_timeslice_used(struct blkio_group *blkg, 324void blkiocg_update_timeslice_used(struct blkio_group *blkg,
319 unsigned long time, 325 unsigned long time,
320 unsigned long unaccounted_time); 326 unsigned long unaccounted_time);
@@ -335,17 +341,11 @@ cgroup_to_blkio_cgroup(struct cgroup *cgroup) { return NULL; }
335static inline struct blkio_cgroup * 341static inline struct blkio_cgroup *
336task_blkio_cgroup(struct task_struct *tsk) { return NULL; } 342task_blkio_cgroup(struct task_struct *tsk) { return NULL; }
337 343
338static inline void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
339 struct blkio_group *blkg, void *key, dev_t dev,
340 enum blkio_policy_id plid) {}
341
342static inline int blkio_alloc_blkg_stats(struct blkio_group *blkg) { return 0; }
343
344static inline int 344static inline int
345blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; } 345blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; }
346 346
347static inline struct blkio_group * 347static inline struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
348blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) { return NULL; } 348 void *key) { return NULL; }
349static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg, 349static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg,
350 unsigned long time, 350 unsigned long time,
351 unsigned long unaccounted_time) 351 unsigned long unaccounted_time)
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index aeeb798d1cd..2ae637b9e80 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -181,17 +181,25 @@ static void throtl_put_tg(struct throtl_grp *tg)
181 call_rcu(&tg->rcu_head, throtl_free_tg); 181 call_rcu(&tg->rcu_head, throtl_free_tg);
182} 182}
183 183
184static void throtl_init_group(struct throtl_grp *tg) 184static struct blkio_group *throtl_alloc_blkio_group(struct request_queue *q,
185 struct blkio_cgroup *blkcg)
185{ 186{
187 struct throtl_grp *tg;
188
189 tg = kzalloc_node(sizeof(*tg), GFP_ATOMIC, q->node);
190 if (!tg)
191 return NULL;
192
186 INIT_HLIST_NODE(&tg->tg_node); 193 INIT_HLIST_NODE(&tg->tg_node);
187 RB_CLEAR_NODE(&tg->rb_node); 194 RB_CLEAR_NODE(&tg->rb_node);
188 bio_list_init(&tg->bio_lists[0]); 195 bio_list_init(&tg->bio_lists[0]);
189 bio_list_init(&tg->bio_lists[1]); 196 bio_list_init(&tg->bio_lists[1]);
190 tg->limits_changed = false; 197 tg->limits_changed = false;
191 198
192 /* Practically unlimited BW */ 199 tg->bps[READ] = blkcg_get_read_bps(blkcg, tg->blkg.dev);
193 tg->bps[0] = tg->bps[1] = -1; 200 tg->bps[WRITE] = blkcg_get_write_bps(blkcg, tg->blkg.dev);
194 tg->iops[0] = tg->iops[1] = -1; 201 tg->iops[READ] = blkcg_get_read_iops(blkcg, tg->blkg.dev);
202 tg->iops[WRITE] = blkcg_get_write_iops(blkcg, tg->blkg.dev);
195 203
196 /* 204 /*
197 * Take the initial reference that will be released on destroy 205 * Take the initial reference that will be released on destroy
@@ -200,14 +208,8 @@ static void throtl_init_group(struct throtl_grp *tg)
200 * exit or cgroup deletion path depending on who is exiting first. 208 * exit or cgroup deletion path depending on who is exiting first.
201 */ 209 */
202 atomic_set(&tg->ref, 1); 210 atomic_set(&tg->ref, 1);
203}
204 211
205/* Should be called with rcu read lock held (needed for blkcg) */ 212 return &tg->blkg;
206static void
207throtl_add_group_to_td_list(struct throtl_data *td, struct throtl_grp *tg)
208{
209 hlist_add_head(&tg->tg_node, &td->tg_list);
210 td->nr_undestroyed_grps++;
211} 213}
212 214
213static void 215static void
@@ -246,119 +248,62 @@ throtl_tg_fill_dev_details(struct throtl_data *td, struct throtl_grp *tg)
246 spin_unlock_irq(td->queue->queue_lock); 248 spin_unlock_irq(td->queue->queue_lock);
247} 249}
248 250
249static void throtl_init_add_tg_lists(struct throtl_data *td, 251static void throtl_link_blkio_group(struct request_queue *q,
250 struct throtl_grp *tg, struct blkio_cgroup *blkcg) 252 struct blkio_group *blkg)
251{ 253{
252 __throtl_tg_fill_dev_details(td, tg); 254 struct throtl_data *td = q->td;
253 255 struct throtl_grp *tg = tg_of_blkg(blkg);
254 /* Add group onto cgroup list */
255 blkiocg_add_blkio_group(blkcg, &tg->blkg, td->queue,
256 tg->blkg.dev, BLKIO_POLICY_THROTL);
257
258 tg->bps[READ] = blkcg_get_read_bps(blkcg, tg->blkg.dev);
259 tg->bps[WRITE] = blkcg_get_write_bps(blkcg, tg->blkg.dev);
260 tg->iops[READ] = blkcg_get_read_iops(blkcg, tg->blkg.dev);
261 tg->iops[WRITE] = blkcg_get_write_iops(blkcg, tg->blkg.dev);
262
263 throtl_add_group_to_td_list(td, tg);
264}
265
266/* Should be called without queue lock and outside of rcu period */
267static struct throtl_grp *throtl_alloc_tg(struct throtl_data *td)
268{
269 struct throtl_grp *tg = NULL;
270 int ret;
271
272 tg = kzalloc_node(sizeof(*tg), GFP_ATOMIC, td->queue->node);
273 if (!tg)
274 return NULL;
275
276 ret = blkio_alloc_blkg_stats(&tg->blkg);
277 256
278 if (ret) { 257 __throtl_tg_fill_dev_details(td, tg);
279 kfree(tg);
280 return NULL;
281 }
282 258
283 throtl_init_group(tg); 259 hlist_add_head(&tg->tg_node, &td->tg_list);
284 return tg; 260 td->nr_undestroyed_grps++;
285} 261}
286 262
287static struct 263static struct
288throtl_grp *throtl_find_tg(struct throtl_data *td, struct blkio_cgroup *blkcg) 264throtl_grp *throtl_lookup_tg(struct throtl_data *td, struct blkio_cgroup *blkcg)
289{ 265{
290 struct throtl_grp *tg = NULL; 266 struct throtl_grp *tg = NULL;
291 267
292 /* 268 /*
293 * This is the common case when there are no blkio cgroups. 269 * This is the common case when there are no blkio cgroups.
294 * Avoid lookup in this case 270 * Avoid lookup in this case
295 */ 271 */
296 if (blkcg == &blkio_root_cgroup) 272 if (blkcg == &blkio_root_cgroup)
297 tg = td->root_tg; 273 tg = td->root_tg;
298 else 274 else
299 tg = tg_of_blkg(blkiocg_lookup_group(blkcg, td->queue, 275 tg = tg_of_blkg(blkg_lookup(blkcg, td->queue,
300 BLKIO_POLICY_THROTL)); 276 BLKIO_POLICY_THROTL));
301 277
302 __throtl_tg_fill_dev_details(td, tg); 278 __throtl_tg_fill_dev_details(td, tg);
303 return tg; 279 return tg;
304} 280}
305 281
306static struct throtl_grp *throtl_get_tg(struct throtl_data *td, 282static struct throtl_grp *throtl_lookup_create_tg(struct throtl_data *td,
307 struct blkio_cgroup *blkcg) 283 struct blkio_cgroup *blkcg)
308{ 284{
309 struct throtl_grp *tg = NULL, *__tg = NULL;
310 struct request_queue *q = td->queue; 285 struct request_queue *q = td->queue;
311 286 struct throtl_grp *tg = NULL;
312 /* no throttling for dead queue */
313 if (unlikely(blk_queue_bypass(q)))
314 return NULL;
315
316 tg = throtl_find_tg(td, blkcg);
317 if (tg)
318 return tg;
319
320 if (!css_tryget(&blkcg->css))
321 return NULL;
322
323 /*
324 * Need to allocate a group. Allocation of group also needs allocation
325 * of per cpu stats which in-turn takes a mutex() and can block. Hence
326 * we need to drop rcu lock and queue_lock before we call alloc.
327 */
328 spin_unlock_irq(q->queue_lock);
329 rcu_read_unlock();
330
331 tg = throtl_alloc_tg(td);
332
333 /* Group allocated and queue is still alive. take the lock */
334 rcu_read_lock();
335 spin_lock_irq(q->queue_lock);
336 css_put(&blkcg->css);
337
338 /* Make sure @q is still alive */
339 if (unlikely(blk_queue_bypass(q))) {
340 kfree(tg);
341 return NULL;
342 }
343 287
344 /* 288 /*
345 * If some other thread already allocated the group while we were 289 * This is the common case when there are no blkio cgroups.
346 * not holding queue lock, free up the group 290 * Avoid lookup in this case
347 */ 291 */
348 __tg = throtl_find_tg(td, blkcg); 292 if (blkcg == &blkio_root_cgroup) {
293 tg = td->root_tg;
294 } else {
295 struct blkio_group *blkg;
349 296
350 if (__tg) { 297 blkg = blkg_lookup_create(blkcg, q, BLKIO_POLICY_THROTL, false);
351 kfree(tg);
352 return __tg;
353 }
354 298
355 /* Group allocation failed. Account the IO to root group */ 299 /* if %NULL and @q is alive, fall back to root_tg */
356 if (!tg) { 300 if (!IS_ERR(blkg))
357 tg = td->root_tg; 301 tg = tg_of_blkg(blkg);
358 return tg; 302 else if (!blk_queue_dead(q))
303 tg = td->root_tg;
359 } 304 }
360 305
361 throtl_init_add_tg_lists(td, tg, blkcg); 306 __throtl_tg_fill_dev_details(td, tg);
362 return tg; 307 return tg;
363} 308}
364 309
@@ -1107,6 +1052,8 @@ static void throtl_shutdown_wq(struct request_queue *q)
1107 1052
1108static struct blkio_policy_type blkio_policy_throtl = { 1053static struct blkio_policy_type blkio_policy_throtl = {
1109 .ops = { 1054 .ops = {
1055 .blkio_alloc_group_fn = throtl_alloc_blkio_group,
1056 .blkio_link_group_fn = throtl_link_blkio_group,
1110 .blkio_unlink_group_fn = throtl_unlink_blkio_group, 1057 .blkio_unlink_group_fn = throtl_unlink_blkio_group,
1111 .blkio_clear_queue_fn = throtl_clear_queue, 1058 .blkio_clear_queue_fn = throtl_clear_queue,
1112 .blkio_update_group_read_bps_fn = 1059 .blkio_update_group_read_bps_fn =
@@ -1141,7 +1088,7 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio)
1141 */ 1088 */
1142 rcu_read_lock(); 1089 rcu_read_lock();
1143 blkcg = task_blkio_cgroup(current); 1090 blkcg = task_blkio_cgroup(current);
1144 tg = throtl_find_tg(td, blkcg); 1091 tg = throtl_lookup_tg(td, blkcg);
1145 if (tg) { 1092 if (tg) {
1146 throtl_tg_fill_dev_details(td, tg); 1093 throtl_tg_fill_dev_details(td, tg);
1147 1094
@@ -1157,7 +1104,7 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio)
1157 * IO group 1104 * IO group
1158 */ 1105 */
1159 spin_lock_irq(q->queue_lock); 1106 spin_lock_irq(q->queue_lock);
1160 tg = throtl_get_tg(td, blkcg); 1107 tg = throtl_lookup_create_tg(td, blkcg);
1161 if (unlikely(!tg)) 1108 if (unlikely(!tg))
1162 goto out_unlock; 1109 goto out_unlock;
1163 1110
@@ -1252,6 +1199,7 @@ void blk_throtl_drain(struct request_queue *q)
1252int blk_throtl_init(struct request_queue *q) 1199int blk_throtl_init(struct request_queue *q)
1253{ 1200{
1254 struct throtl_data *td; 1201 struct throtl_data *td;
1202 struct blkio_group *blkg;
1255 1203
1256 td = kzalloc_node(sizeof(*td), GFP_KERNEL, q->node); 1204 td = kzalloc_node(sizeof(*td), GFP_KERNEL, q->node);
1257 if (!td) 1205 if (!td)
@@ -1262,13 +1210,17 @@ int blk_throtl_init(struct request_queue *q)
1262 td->limits_changed = false; 1210 td->limits_changed = false;
1263 INIT_DELAYED_WORK(&td->throtl_work, blk_throtl_work); 1211 INIT_DELAYED_WORK(&td->throtl_work, blk_throtl_work);
1264 1212
1265 /* alloc and Init root group. */ 1213 q->td = td;
1266 td->queue = q; 1214 td->queue = q;
1267 1215
1216 /* alloc and init root group. */
1268 rcu_read_lock(); 1217 rcu_read_lock();
1269 spin_lock_irq(q->queue_lock); 1218 spin_lock_irq(q->queue_lock);
1270 1219
1271 td->root_tg = throtl_get_tg(td, &blkio_root_cgroup); 1220 blkg = blkg_lookup_create(&blkio_root_cgroup, q, BLKIO_POLICY_THROTL,
1221 true);
1222 if (!IS_ERR(blkg))
1223 td->root_tg = tg_of_blkg(blkg);
1272 1224
1273 spin_unlock_irq(q->queue_lock); 1225 spin_unlock_irq(q->queue_lock);
1274 rcu_read_unlock(); 1226 rcu_read_unlock();
@@ -1277,9 +1229,6 @@ int blk_throtl_init(struct request_queue *q)
1277 kfree(td); 1229 kfree(td);
1278 return -ENOMEM; 1230 return -ENOMEM;
1279 } 1231 }
1280
1281 /* Attach throtl data to request queue */
1282 q->td = td;
1283 return 0; 1232 return 0;
1284} 1233}
1285 1234
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 1c3f41b9d5d..acef564578c 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1048,10 +1048,12 @@ static void cfq_update_blkio_group_weight(struct request_queue *q,
1048 cfqg->needs_update = true; 1048 cfqg->needs_update = true;
1049} 1049}
1050 1050
1051static void cfq_init_add_cfqg_lists(struct cfq_data *cfqd, 1051static void cfq_link_blkio_group(struct request_queue *q,
1052 struct cfq_group *cfqg, struct blkio_cgroup *blkcg) 1052 struct blkio_group *blkg)
1053{ 1053{
1054 struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info; 1054 struct cfq_data *cfqd = q->elevator->elevator_data;
1055 struct backing_dev_info *bdi = &q->backing_dev_info;
1056 struct cfq_group *cfqg = cfqg_of_blkg(blkg);
1055 unsigned int major, minor; 1057 unsigned int major, minor;
1056 1058
1057 /* 1059 /*
@@ -1062,34 +1064,26 @@ static void cfq_init_add_cfqg_lists(struct cfq_data *cfqd,
1062 */ 1064 */
1063 if (bdi->dev) { 1065 if (bdi->dev) {
1064 sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor); 1066 sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
1065 cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg, 1067 blkg->dev = MKDEV(major, minor);
1066 cfqd->queue, MKDEV(major, minor)); 1068 }
1067 } else
1068 cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg,
1069 cfqd->queue, 0);
1070 1069
1071 cfqd->nr_blkcg_linked_grps++; 1070 cfqd->nr_blkcg_linked_grps++;
1072 cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev);
1073 1071
1074 /* Add group on cfqd list */ 1072 /* Add group on cfqd list */
1075 hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list); 1073 hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list);
1076} 1074}
1077 1075
1078/* 1076static struct blkio_group *cfq_alloc_blkio_group(struct request_queue *q,
1079 * Should be called from sleepable context. No request queue lock as per 1077 struct blkio_cgroup *blkcg)
1080 * cpu stats are allocated dynamically and alloc_percpu needs to be called
1081 * from sleepable context.
1082 */
1083static struct cfq_group * cfq_alloc_cfqg(struct cfq_data *cfqd)
1084{ 1078{
1085 struct cfq_group *cfqg; 1079 struct cfq_group *cfqg;
1086 int ret;
1087 1080
1088 cfqg = kzalloc_node(sizeof(*cfqg), GFP_ATOMIC, cfqd->queue->node); 1081 cfqg = kzalloc_node(sizeof(*cfqg), GFP_ATOMIC, q->node);
1089 if (!cfqg) 1082 if (!cfqg)
1090 return NULL; 1083 return NULL;
1091 1084
1092 cfq_init_cfqg_base(cfqg); 1085 cfq_init_cfqg_base(cfqg);
1086 cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev);
1093 1087
1094 /* 1088 /*
1095 * Take the initial reference that will be released on destroy 1089 * Take the initial reference that will be released on destroy
@@ -1099,90 +1093,38 @@ static struct cfq_group * cfq_alloc_cfqg(struct cfq_data *cfqd)
1099 */ 1093 */
1100 cfqg->ref = 1; 1094 cfqg->ref = 1;
1101 1095
1102 ret = blkio_alloc_blkg_stats(&cfqg->blkg); 1096 return &cfqg->blkg;
1103 if (ret) {
1104 kfree(cfqg);
1105 return NULL;
1106 }
1107
1108 return cfqg;
1109}
1110
1111static struct cfq_group *
1112cfq_find_cfqg(struct cfq_data *cfqd, struct blkio_cgroup *blkcg)
1113{
1114 struct cfq_group *cfqg = NULL;
1115 struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info;
1116 unsigned int major, minor;
1117
1118 /*
1119 * This is the common case when there are no blkio cgroups.
1120 * Avoid lookup in this case
1121 */
1122 if (blkcg == &blkio_root_cgroup)
1123 cfqg = cfqd->root_group;
1124 else
1125 cfqg = cfqg_of_blkg(blkiocg_lookup_group(blkcg, cfqd->queue,
1126 BLKIO_POLICY_PROP));
1127
1128 if (cfqg && !cfqg->blkg.dev && bdi->dev && dev_name(bdi->dev)) {
1129 sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
1130 cfqg->blkg.dev = MKDEV(major, minor);
1131 }
1132
1133 return cfqg;
1134} 1097}
1135 1098
1136/* 1099/*
1137 * Search for the cfq group current task belongs to. request_queue lock must 1100 * Search for the cfq group current task belongs to. request_queue lock must
1138 * be held. 1101 * be held.
1139 */ 1102 */
1140static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, 1103static struct cfq_group *cfq_lookup_create_cfqg(struct cfq_data *cfqd,
1141 struct blkio_cgroup *blkcg) 1104 struct blkio_cgroup *blkcg)
1142{ 1105{
1143 struct cfq_group *cfqg = NULL, *__cfqg = NULL;
1144 struct request_queue *q = cfqd->queue; 1106 struct request_queue *q = cfqd->queue;
1107 struct backing_dev_info *bdi = &q->backing_dev_info;
1108 struct cfq_group *cfqg = NULL;
1145 1109
1146 cfqg = cfq_find_cfqg(cfqd, blkcg); 1110 /* avoid lookup for the common case where there's no blkio cgroup */
1147 if (cfqg) 1111 if (blkcg == &blkio_root_cgroup) {
1148 return cfqg; 1112 cfqg = cfqd->root_group;
1149 1113 } else {
1150 if (!css_tryget(&blkcg->css)) 1114 struct blkio_group *blkg;
1151 return NULL;
1152
1153 /*
1154 * Need to allocate a group. Allocation of group also needs allocation
1155 * of per cpu stats which in-turn takes a mutex() and can block. Hence
1156 * we need to drop rcu lock and queue_lock before we call alloc.
1157 *
1158 * Not taking any queue reference here and assuming that queue is
1159 * around by the time we return. CFQ queue allocation code does
1160 * the same. It might be racy though.
1161 */
1162 rcu_read_unlock();
1163 spin_unlock_irq(q->queue_lock);
1164
1165 cfqg = cfq_alloc_cfqg(cfqd);
1166 1115
1167 spin_lock_irq(q->queue_lock); 1116 blkg = blkg_lookup_create(blkcg, q, BLKIO_POLICY_PROP, false);
1168 rcu_read_lock(); 1117 if (!IS_ERR(blkg))
1169 css_put(&blkcg->css); 1118 cfqg = cfqg_of_blkg(blkg);
1119 }
1170 1120
1171 /* 1121 if (cfqg && !cfqg->blkg.dev && bdi->dev && dev_name(bdi->dev)) {
1172 * If some other thread already allocated the group while we were 1122 unsigned int major, minor;
1173 * not holding queue lock, free up the group
1174 */
1175 __cfqg = cfq_find_cfqg(cfqd, blkcg);
1176 1123
1177 if (__cfqg) { 1124 sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
1178 kfree(cfqg); 1125 cfqg->blkg.dev = MKDEV(major, minor);
1179 return __cfqg;
1180 } 1126 }
1181 1127
1182 if (!cfqg)
1183 cfqg = cfqd->root_group;
1184
1185 cfq_init_add_cfqg_lists(cfqd, cfqg, blkcg);
1186 return cfqg; 1128 return cfqg;
1187} 1129}
1188 1130
@@ -1294,8 +1236,8 @@ static bool cfq_clear_queue(struct request_queue *q)
1294} 1236}
1295 1237
1296#else /* GROUP_IOSCHED */ 1238#else /* GROUP_IOSCHED */
1297static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, 1239static struct cfq_group *cfq_lookup_create_cfqg(struct cfq_data *cfqd,
1298 struct blkio_cgroup *blkcg) 1240 struct blkio_cgroup *blkcg)
1299{ 1241{
1300 return cfqd->root_group; 1242 return cfqd->root_group;
1301} 1243}
@@ -2887,7 +2829,8 @@ retry:
2887 2829
2888 blkcg = task_blkio_cgroup(current); 2830 blkcg = task_blkio_cgroup(current);
2889 2831
2890 cfqg = cfq_get_cfqg(cfqd, blkcg); 2832 cfqg = cfq_lookup_create_cfqg(cfqd, blkcg);
2833
2891 cic = cfq_cic_lookup(cfqd, ioc); 2834 cic = cfq_cic_lookup(cfqd, ioc);
2892 /* cic always exists here */ 2835 /* cic always exists here */
2893 cfqq = cic_to_cfqq(cic, is_sync); 2836 cfqq = cic_to_cfqq(cic, is_sync);
@@ -3694,6 +3637,7 @@ static void cfq_exit_queue(struct elevator_queue *e)
3694static int cfq_init_queue(struct request_queue *q) 3637static int cfq_init_queue(struct request_queue *q)
3695{ 3638{
3696 struct cfq_data *cfqd; 3639 struct cfq_data *cfqd;
3640 struct blkio_group *blkg __maybe_unused;
3697 int i; 3641 int i;
3698 3642
3699 cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node); 3643 cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node);
@@ -3711,7 +3655,10 @@ static int cfq_init_queue(struct request_queue *q)
3711 rcu_read_lock(); 3655 rcu_read_lock();
3712 spin_lock_irq(q->queue_lock); 3656 spin_lock_irq(q->queue_lock);
3713 3657
3714 cfqd->root_group = cfq_get_cfqg(cfqd, &blkio_root_cgroup); 3658 blkg = blkg_lookup_create(&blkio_root_cgroup, q, BLKIO_POLICY_PROP,
3659 true);
3660 if (!IS_ERR(blkg))
3661 cfqd->root_group = cfqg_of_blkg(blkg);
3715 3662
3716 spin_unlock_irq(q->queue_lock); 3663 spin_unlock_irq(q->queue_lock);
3717 rcu_read_unlock(); 3664 rcu_read_unlock();
@@ -3897,6 +3844,8 @@ static struct elevator_type iosched_cfq = {
3897#ifdef CONFIG_CFQ_GROUP_IOSCHED 3844#ifdef CONFIG_CFQ_GROUP_IOSCHED
3898static struct blkio_policy_type blkio_policy_cfq = { 3845static struct blkio_policy_type blkio_policy_cfq = {
3899 .ops = { 3846 .ops = {
3847 .blkio_alloc_group_fn = cfq_alloc_blkio_group,
3848 .blkio_link_group_fn = cfq_link_blkio_group,
3900 .blkio_unlink_group_fn = cfq_unlink_blkio_group, 3849 .blkio_unlink_group_fn = cfq_unlink_blkio_group,
3901 .blkio_clear_queue_fn = cfq_clear_queue, 3850 .blkio_clear_queue_fn = cfq_clear_queue,
3902 .blkio_update_group_weight_fn = cfq_update_blkio_group_weight, 3851 .blkio_update_group_weight_fn = cfq_update_blkio_group_weight,
diff --git a/block/cfq.h b/block/cfq.h
index 343b78a61df..398760194e1 100644
--- a/block/cfq.h
+++ b/block/cfq.h
@@ -67,12 +67,6 @@ static inline void cfq_blkiocg_update_completion_stats(struct blkio_group *blkg,
67 direction, sync); 67 direction, sync);
68} 68}
69 69
70static inline void cfq_blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
71 struct blkio_group *blkg, struct request_queue *q, dev_t dev)
72{
73 blkiocg_add_blkio_group(blkcg, blkg, q, dev, BLKIO_POLICY_PROP);
74}
75
76static inline int cfq_blkiocg_del_blkio_group(struct blkio_group *blkg) 70static inline int cfq_blkiocg_del_blkio_group(struct blkio_group *blkg)
77{ 71{
78 return blkiocg_del_blkio_group(blkg); 72 return blkiocg_del_blkio_group(blkg);
@@ -105,8 +99,6 @@ static inline void cfq_blkiocg_update_dispatch_stats(struct blkio_group *blkg,
105 uint64_t bytes, bool direction, bool sync) {} 99 uint64_t bytes, bool direction, bool sync) {}
106static inline void cfq_blkiocg_update_completion_stats(struct blkio_group *blkg, uint64_t start_time, uint64_t io_start_time, bool direction, bool sync) {} 100static inline void cfq_blkiocg_update_completion_stats(struct blkio_group *blkg, uint64_t start_time, uint64_t io_start_time, bool direction, bool sync) {}
107 101
108static inline void cfq_blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
109 struct blkio_group *blkg, struct request_queue *q, dev_t dev) {}
110static inline int cfq_blkiocg_del_blkio_group(struct blkio_group *blkg) 102static inline int cfq_blkiocg_del_blkio_group(struct blkio_group *blkg)
111{ 103{
112 return 0; 104 return 0;