aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2012-03-05 16:15:20 -0500
committerJens Axboe <axboe@kernel.dk>2012-03-06 15:27:23 -0500
commite8989fae38d9831c72b20375a206a919ca468c52 (patch)
tree2eeb1320e526cd0ba187465c0e6e19799dc1b956
parent03aa264ac15637b6f98374270bcdf31400965505 (diff)
blkcg: unify blkg's for blkcg policies
Currently, blkg is per cgroup-queue-policy combination. This is unnatural and leads to various convolutions in partially used duplicate fields in blkg, config / stat access, and general management of blkgs. This patch make blkg's per cgroup-queue and let them serve all policies. blkgs are now created and destroyed by blkcg core proper. This will allow further consolidation of common management logic into blkcg core and API with better defined semantics and layering. As a transitional step to untangle blkg management, elvswitch and policy [de]registration, all blkgs except the root blkg are being shot down during elvswitch and bypass. This patch adds blkg_root_update() to update root blkg in place on policy change. This is hacky and racy but should be good enough as interim step until we get locking simplified and switch over to proper in-place update for all blkgs. -v2: Root blkgs need to be updated on elvswitch too and blkg_alloc() comment wasn't updated according to the function change. Fixed. Both pointed out by Vivek. -v3: v2 updated blkg_destroy_all() to invoke update_root_blkg_pd() for all policies. This freed root pd during elvswitch before the last queue finished exiting and led to oops. Directly invoke update_root_blkg_pd() only on BLKIO_POLICY_PROP from cfq_exit_queue(). This also is closer to what will be done with proper in-place blkg update. Reported by Vivek. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Vivek Goyal <vgoyal@redhat.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--block/blk-cgroup.c224
-rw-r--r--block/blk-cgroup.h15
-rw-r--r--block/blk-core.c3
-rw-r--r--block/blk-sysfs.c4
-rw-r--r--block/blk-throttle.c9
-rw-r--r--block/cfq-iosched.c5
-rw-r--r--block/elevator.c5
7 files changed, 154 insertions, 111 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 2ca9a15db0f..cad5f15cf49 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -461,16 +461,20 @@ EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats);
461 */ 461 */
462static void blkg_free(struct blkio_group *blkg) 462static void blkg_free(struct blkio_group *blkg)
463{ 463{
464 struct blkg_policy_data *pd; 464 int i;
465 465
466 if (!blkg) 466 if (!blkg)
467 return; 467 return;
468 468
469 pd = blkg->pd[blkg->plid]; 469 for (i = 0; i < BLKIO_NR_POLICIES; i++) {
470 if (pd) { 470 struct blkg_policy_data *pd = blkg->pd[i];
471 free_percpu(pd->stats_cpu); 471
472 kfree(pd); 472 if (pd) {
473 free_percpu(pd->stats_cpu);
474 kfree(pd);
475 }
473 } 476 }
477
474 kfree(blkg); 478 kfree(blkg);
475} 479}
476 480
@@ -478,19 +482,17 @@ static void blkg_free(struct blkio_group *blkg)
478 * blkg_alloc - allocate a blkg 482 * blkg_alloc - allocate a blkg
479 * @blkcg: block cgroup the new blkg is associated with 483 * @blkcg: block cgroup the new blkg is associated with
480 * @q: request_queue the new blkg is associated with 484 * @q: request_queue the new blkg is associated with
481 * @pol: policy the new blkg is associated with
482 * 485 *
483 * Allocate a new blkg assocating @blkcg and @q for @pol. 486 * Allocate a new blkg assocating @blkcg and @q.
484 * 487 *
485 * FIXME: Should be called with queue locked but currently isn't due to 488 * FIXME: Should be called with queue locked but currently isn't due to
486 * percpu stat breakage. 489 * percpu stat breakage.
487 */ 490 */
488static struct blkio_group *blkg_alloc(struct blkio_cgroup *blkcg, 491static struct blkio_group *blkg_alloc(struct blkio_cgroup *blkcg,
489 struct request_queue *q, 492 struct request_queue *q)
490 struct blkio_policy_type *pol)
491{ 493{
492 struct blkio_group *blkg; 494 struct blkio_group *blkg;
493 struct blkg_policy_data *pd; 495 int i;
494 496
495 /* alloc and init base part */ 497 /* alloc and init base part */
496 blkg = kzalloc_node(sizeof(*blkg), GFP_ATOMIC, q->node); 498 blkg = kzalloc_node(sizeof(*blkg), GFP_ATOMIC, q->node);
@@ -499,34 +501,45 @@ static struct blkio_group *blkg_alloc(struct blkio_cgroup *blkcg,
499 501
500 spin_lock_init(&blkg->stats_lock); 502 spin_lock_init(&blkg->stats_lock);
501 rcu_assign_pointer(blkg->q, q); 503 rcu_assign_pointer(blkg->q, q);
502 INIT_LIST_HEAD(&blkg->q_node[0]); 504 INIT_LIST_HEAD(&blkg->q_node);
503 INIT_LIST_HEAD(&blkg->q_node[1]);
504 blkg->blkcg = blkcg; 505 blkg->blkcg = blkcg;
505 blkg->plid = pol->plid;
506 blkg->refcnt = 1; 506 blkg->refcnt = 1;
507 cgroup_path(blkcg->css.cgroup, blkg->path, sizeof(blkg->path)); 507 cgroup_path(blkcg->css.cgroup, blkg->path, sizeof(blkg->path));
508 508
509 /* alloc per-policy data and attach it to blkg */ 509 for (i = 0; i < BLKIO_NR_POLICIES; i++) {
510 pd = kzalloc_node(sizeof(*pd) + pol->pdata_size, GFP_ATOMIC, 510 struct blkio_policy_type *pol = blkio_policy[i];
511 q->node); 511 struct blkg_policy_data *pd;
512 if (!pd) {
513 blkg_free(blkg);
514 return NULL;
515 }
516 512
517 blkg->pd[pol->plid] = pd; 513 if (!pol)
518 pd->blkg = blkg; 514 continue;
515
516 /* alloc per-policy data and attach it to blkg */
517 pd = kzalloc_node(sizeof(*pd) + pol->pdata_size, GFP_ATOMIC,
518 q->node);
519 if (!pd) {
520 blkg_free(blkg);
521 return NULL;
522 }
519 523
520 /* broken, read comment in the callsite */ 524 blkg->pd[i] = pd;
525 pd->blkg = blkg;
521 526
522 pd->stats_cpu = alloc_percpu(struct blkio_group_stats_cpu); 527 /* broken, read comment in the callsite */
523 if (!pd->stats_cpu) { 528 pd->stats_cpu = alloc_percpu(struct blkio_group_stats_cpu);
524 blkg_free(blkg); 529 if (!pd->stats_cpu) {
525 return NULL; 530 blkg_free(blkg);
531 return NULL;
532 }
526 } 533 }
527 534
528 /* invoke per-policy init */ 535 /* invoke per-policy init */
529 pol->ops.blkio_init_group_fn(blkg); 536 for (i = 0; i < BLKIO_NR_POLICIES; i++) {
537 struct blkio_policy_type *pol = blkio_policy[i];
538
539 if (pol)
540 pol->ops.blkio_init_group_fn(blkg);
541 }
542
530 return blkg; 543 return blkg;
531} 544}
532 545
@@ -536,7 +549,6 @@ struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
536 bool for_root) 549 bool for_root)
537 __releases(q->queue_lock) __acquires(q->queue_lock) 550 __releases(q->queue_lock) __acquires(q->queue_lock)
538{ 551{
539 struct blkio_policy_type *pol = blkio_policy[plid];
540 struct blkio_group *blkg, *new_blkg; 552 struct blkio_group *blkg, *new_blkg;
541 553
542 WARN_ON_ONCE(!rcu_read_lock_held()); 554 WARN_ON_ONCE(!rcu_read_lock_held());
@@ -551,7 +563,7 @@ struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
551 if (unlikely(blk_queue_bypass(q)) && !for_root) 563 if (unlikely(blk_queue_bypass(q)) && !for_root)
552 return ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY); 564 return ERR_PTR(blk_queue_dead(q) ? -EINVAL : -EBUSY);
553 565
554 blkg = blkg_lookup(blkcg, q, plid); 566 blkg = blkg_lookup(blkcg, q);
555 if (blkg) 567 if (blkg)
556 return blkg; 568 return blkg;
557 569
@@ -571,7 +583,7 @@ struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
571 spin_unlock_irq(q->queue_lock); 583 spin_unlock_irq(q->queue_lock);
572 rcu_read_unlock(); 584 rcu_read_unlock();
573 585
574 new_blkg = blkg_alloc(blkcg, q, pol); 586 new_blkg = blkg_alloc(blkcg, q);
575 587
576 rcu_read_lock(); 588 rcu_read_lock();
577 spin_lock_irq(q->queue_lock); 589 spin_lock_irq(q->queue_lock);
@@ -583,7 +595,7 @@ struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
583 } 595 }
584 596
585 /* did someone beat us to it? */ 597 /* did someone beat us to it? */
586 blkg = blkg_lookup(blkcg, q, plid); 598 blkg = blkg_lookup(blkcg, q);
587 if (unlikely(blkg)) 599 if (unlikely(blkg))
588 goto out; 600 goto out;
589 601
@@ -598,8 +610,8 @@ struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
598 swap(blkg, new_blkg); 610 swap(blkg, new_blkg);
599 611
600 hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list); 612 hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
601 list_add(&blkg->q_node[plid], &q->blkg_list[plid]); 613 list_add(&blkg->q_node, &q->blkg_list);
602 q->nr_blkgs[plid]++; 614 q->nr_blkgs++;
603 615
604 spin_unlock(&blkcg->lock); 616 spin_unlock(&blkcg->lock);
605out: 617out:
@@ -636,31 +648,30 @@ EXPORT_SYMBOL_GPL(blkiocg_del_blkio_group);
636 648
637/* called under rcu_read_lock(). */ 649/* called under rcu_read_lock(). */
638struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg, 650struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
639 struct request_queue *q, 651 struct request_queue *q)
640 enum blkio_policy_id plid)
641{ 652{
642 struct blkio_group *blkg; 653 struct blkio_group *blkg;
643 struct hlist_node *n; 654 struct hlist_node *n;
644 655
645 hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) 656 hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node)
646 if (blkg->q == q && blkg->plid == plid) 657 if (blkg->q == q)
647 return blkg; 658 return blkg;
648 return NULL; 659 return NULL;
649} 660}
650EXPORT_SYMBOL_GPL(blkg_lookup); 661EXPORT_SYMBOL_GPL(blkg_lookup);
651 662
652static void blkg_destroy(struct blkio_group *blkg, enum blkio_policy_id plid) 663static void blkg_destroy(struct blkio_group *blkg)
653{ 664{
654 struct request_queue *q = blkg->q; 665 struct request_queue *q = blkg->q;
655 666
656 lockdep_assert_held(q->queue_lock); 667 lockdep_assert_held(q->queue_lock);
657 668
658 /* Something wrong if we are trying to remove same group twice */ 669 /* Something wrong if we are trying to remove same group twice */
659 WARN_ON_ONCE(list_empty(&blkg->q_node[plid])); 670 WARN_ON_ONCE(list_empty(&blkg->q_node));
660 list_del_init(&blkg->q_node[plid]); 671 list_del_init(&blkg->q_node);
661 672
662 WARN_ON_ONCE(q->nr_blkgs[plid] <= 0); 673 WARN_ON_ONCE(q->nr_blkgs <= 0);
663 q->nr_blkgs[plid]--; 674 q->nr_blkgs--;
664 675
665 /* 676 /*
666 * Put the reference taken at the time of creation so that when all 677 * Put the reference taken at the time of creation so that when all
@@ -669,8 +680,40 @@ static void blkg_destroy(struct blkio_group *blkg, enum blkio_policy_id plid)
669 blkg_put(blkg); 680 blkg_put(blkg);
670} 681}
671 682
672void blkg_destroy_all(struct request_queue *q, enum blkio_policy_id plid, 683/*
673 bool destroy_root) 684 * XXX: This updates blkg policy data in-place for root blkg, which is
685 * necessary across elevator switch and policy registration as root blkgs
686 * aren't shot down. This broken and racy implementation is temporary.
687 * Eventually, blkg shoot down will be replaced by proper in-place update.
688 */
689void update_root_blkg_pd(struct request_queue *q, enum blkio_policy_id plid)
690{
691 struct blkio_policy_type *pol = blkio_policy[plid];
692 struct blkio_group *blkg = blkg_lookup(&blkio_root_cgroup, q);
693 struct blkg_policy_data *pd;
694
695 if (!blkg)
696 return;
697
698 kfree(blkg->pd[plid]);
699 blkg->pd[plid] = NULL;
700
701 if (!pol)
702 return;
703
704 pd = kzalloc(sizeof(*pd) + pol->pdata_size, GFP_KERNEL);
705 WARN_ON_ONCE(!pd);
706
707 pd->stats_cpu = alloc_percpu(struct blkio_group_stats_cpu);
708 WARN_ON_ONCE(!pd->stats_cpu);
709
710 blkg->pd[plid] = pd;
711 pd->blkg = blkg;
712 pol->ops.blkio_init_group_fn(blkg);
713}
714EXPORT_SYMBOL_GPL(update_root_blkg_pd);
715
716void blkg_destroy_all(struct request_queue *q, bool destroy_root)
674{ 717{
675 struct blkio_group *blkg, *n; 718 struct blkio_group *blkg, *n;
676 719
@@ -679,8 +722,7 @@ void blkg_destroy_all(struct request_queue *q, enum blkio_policy_id plid,
679 722
680 spin_lock_irq(q->queue_lock); 723 spin_lock_irq(q->queue_lock);
681 724
682 list_for_each_entry_safe(blkg, n, &q->blkg_list[plid], 725 list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) {
683 q_node[plid]) {
684 /* skip root? */ 726 /* skip root? */
685 if (!destroy_root && blkg->blkcg == &blkio_root_cgroup) 727 if (!destroy_root && blkg->blkcg == &blkio_root_cgroup)
686 continue; 728 continue;
@@ -691,7 +733,7 @@ void blkg_destroy_all(struct request_queue *q, enum blkio_policy_id plid,
691 * take care of destroying cfqg also. 733 * take care of destroying cfqg also.
692 */ 734 */
693 if (!blkiocg_del_blkio_group(blkg)) 735 if (!blkiocg_del_blkio_group(blkg))
694 blkg_destroy(blkg, plid); 736 blkg_destroy(blkg);
695 else 737 else
696 done = false; 738 done = false;
697 } 739 }
@@ -776,43 +818,49 @@ blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
776#endif 818#endif
777 819
778 blkcg = cgroup_to_blkio_cgroup(cgroup); 820 blkcg = cgroup_to_blkio_cgroup(cgroup);
821 spin_lock(&blkio_list_lock);
779 spin_lock_irq(&blkcg->lock); 822 spin_lock_irq(&blkcg->lock);
780 hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) { 823 hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
781 struct blkg_policy_data *pd = blkg->pd[blkg->plid]; 824 struct blkio_policy_type *pol;
782 825
783 spin_lock(&blkg->stats_lock); 826 list_for_each_entry(pol, &blkio_list, list) {
784 stats = &pd->stats; 827 struct blkg_policy_data *pd = blkg->pd[pol->plid];
828
829 spin_lock(&blkg->stats_lock);
830 stats = &pd->stats;
785#ifdef CONFIG_DEBUG_BLK_CGROUP 831#ifdef CONFIG_DEBUG_BLK_CGROUP
786 idling = blkio_blkg_idling(stats); 832 idling = blkio_blkg_idling(stats);
787 waiting = blkio_blkg_waiting(stats); 833 waiting = blkio_blkg_waiting(stats);
788 empty = blkio_blkg_empty(stats); 834 empty = blkio_blkg_empty(stats);
789#endif 835#endif
790 for (i = 0; i < BLKIO_STAT_TOTAL; i++) 836 for (i = 0; i < BLKIO_STAT_TOTAL; i++)
791 queued[i] = stats->stat_arr[BLKIO_STAT_QUEUED][i]; 837 queued[i] = stats->stat_arr[BLKIO_STAT_QUEUED][i];
792 memset(stats, 0, sizeof(struct blkio_group_stats)); 838 memset(stats, 0, sizeof(struct blkio_group_stats));
793 for (i = 0; i < BLKIO_STAT_TOTAL; i++) 839 for (i = 0; i < BLKIO_STAT_TOTAL; i++)
794 stats->stat_arr[BLKIO_STAT_QUEUED][i] = queued[i]; 840 stats->stat_arr[BLKIO_STAT_QUEUED][i] = queued[i];
795#ifdef CONFIG_DEBUG_BLK_CGROUP 841#ifdef CONFIG_DEBUG_BLK_CGROUP
796 if (idling) { 842 if (idling) {
797 blkio_mark_blkg_idling(stats); 843 blkio_mark_blkg_idling(stats);
798 stats->start_idle_time = now; 844 stats->start_idle_time = now;
799 } 845 }
800 if (waiting) { 846 if (waiting) {
801 blkio_mark_blkg_waiting(stats); 847 blkio_mark_blkg_waiting(stats);
802 stats->start_group_wait_time = now; 848 stats->start_group_wait_time = now;
803 } 849 }
804 if (empty) { 850 if (empty) {
805 blkio_mark_blkg_empty(stats); 851 blkio_mark_blkg_empty(stats);
806 stats->start_empty_time = now; 852 stats->start_empty_time = now;
807 } 853 }
808#endif 854#endif
809 spin_unlock(&blkg->stats_lock); 855 spin_unlock(&blkg->stats_lock);
810 856
811 /* Reset Per cpu stats which don't take blkg->stats_lock */ 857 /* Reset Per cpu stats which don't take blkg->stats_lock */
812 blkio_reset_stats_cpu(blkg, blkg->plid); 858 blkio_reset_stats_cpu(blkg, pol->plid);
859 }
813 } 860 }
814 861
815 spin_unlock_irq(&blkcg->lock); 862 spin_unlock_irq(&blkcg->lock);
863 spin_unlock(&blkio_list_lock);
816 return 0; 864 return 0;
817} 865}
818 866
@@ -1168,8 +1216,7 @@ static void blkio_read_conf(struct cftype *cft, struct blkio_cgroup *blkcg,
1168 1216
1169 spin_lock_irq(&blkcg->lock); 1217 spin_lock_irq(&blkcg->lock);
1170 hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) 1218 hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node)
1171 if (BLKIOFILE_POLICY(cft->private) == blkg->plid) 1219 blkio_print_group_conf(cft, blkg, m);
1172 blkio_print_group_conf(cft, blkg, m);
1173 spin_unlock_irq(&blkcg->lock); 1220 spin_unlock_irq(&blkcg->lock);
1174} 1221}
1175 1222
@@ -1224,7 +1271,7 @@ static int blkio_read_blkg_stats(struct blkio_cgroup *blkcg,
1224 const char *dname = blkg_dev_name(blkg); 1271 const char *dname = blkg_dev_name(blkg);
1225 int plid = BLKIOFILE_POLICY(cft->private); 1272 int plid = BLKIOFILE_POLICY(cft->private);
1226 1273
1227 if (!dname || plid != blkg->plid) 1274 if (!dname)
1228 continue; 1275 continue;
1229 if (pcpu) { 1276 if (pcpu) {
1230 cgroup_total += blkio_get_stat_cpu(blkg, plid, 1277 cgroup_total += blkio_get_stat_cpu(blkg, plid,
@@ -1335,9 +1382,9 @@ static int blkio_weight_write(struct blkio_cgroup *blkcg, int plid, u64 val)
1335 blkcg->weight = (unsigned int)val; 1382 blkcg->weight = (unsigned int)val;
1336 1383
1337 hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) { 1384 hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
1338 struct blkg_policy_data *pd = blkg->pd[blkg->plid]; 1385 struct blkg_policy_data *pd = blkg->pd[plid];
1339 1386
1340 if (blkg->plid == plid && !pd->conf.weight) 1387 if (!pd->conf.weight)
1341 blkio_update_group_weight(blkg, plid, blkcg->weight); 1388 blkio_update_group_weight(blkg, plid, blkcg->weight);
1342 } 1389 }
1343 1390
@@ -1560,7 +1607,6 @@ static int blkiocg_pre_destroy(struct cgroup_subsys *subsys,
1560 unsigned long flags; 1607 unsigned long flags;
1561 struct blkio_group *blkg; 1608 struct blkio_group *blkg;
1562 struct request_queue *q; 1609 struct request_queue *q;
1563 struct blkio_policy_type *blkiop;
1564 1610
1565 rcu_read_lock(); 1611 rcu_read_lock();
1566 1612
@@ -1586,11 +1632,7 @@ static int blkiocg_pre_destroy(struct cgroup_subsys *subsys,
1586 */ 1632 */
1587 spin_lock(&blkio_list_lock); 1633 spin_lock(&blkio_list_lock);
1588 spin_lock_irqsave(q->queue_lock, flags); 1634 spin_lock_irqsave(q->queue_lock, flags);
1589 list_for_each_entry(blkiop, &blkio_list, list) { 1635 blkg_destroy(blkg);
1590 if (blkiop->plid != blkg->plid)
1591 continue;
1592 blkg_destroy(blkg, blkiop->plid);
1593 }
1594 spin_unlock_irqrestore(q->queue_lock, flags); 1636 spin_unlock_irqrestore(q->queue_lock, flags);
1595 spin_unlock(&blkio_list_lock); 1637 spin_unlock(&blkio_list_lock);
1596 } while (1); 1638 } while (1);
@@ -1684,6 +1726,8 @@ void blkcg_exit_queue(struct request_queue *q)
1684 list_del_init(&q->all_q_node); 1726 list_del_init(&q->all_q_node);
1685 mutex_unlock(&all_q_mutex); 1727 mutex_unlock(&all_q_mutex);
1686 1728
1729 blkg_destroy_all(q, true);
1730
1687 blk_throtl_exit(q); 1731 blk_throtl_exit(q);
1688} 1732}
1689 1733
@@ -1733,14 +1777,12 @@ static void blkcg_bypass_start(void)
1733 __acquires(&all_q_mutex) 1777 __acquires(&all_q_mutex)
1734{ 1778{
1735 struct request_queue *q; 1779 struct request_queue *q;
1736 int i;
1737 1780
1738 mutex_lock(&all_q_mutex); 1781 mutex_lock(&all_q_mutex);
1739 1782
1740 list_for_each_entry(q, &all_q_list, all_q_node) { 1783 list_for_each_entry(q, &all_q_list, all_q_node) {
1741 blk_queue_bypass_start(q); 1784 blk_queue_bypass_start(q);
1742 for (i = 0; i < BLKIO_NR_POLICIES; i++) 1785 blkg_destroy_all(q, false);
1743 blkg_destroy_all(q, i, false);
1744 } 1786 }
1745} 1787}
1746 1788
@@ -1757,6 +1799,8 @@ static void blkcg_bypass_end(void)
1757 1799
1758void blkio_policy_register(struct blkio_policy_type *blkiop) 1800void blkio_policy_register(struct blkio_policy_type *blkiop)
1759{ 1801{
1802 struct request_queue *q;
1803
1760 blkcg_bypass_start(); 1804 blkcg_bypass_start();
1761 spin_lock(&blkio_list_lock); 1805 spin_lock(&blkio_list_lock);
1762 1806
@@ -1765,12 +1809,16 @@ void blkio_policy_register(struct blkio_policy_type *blkiop)
1765 list_add_tail(&blkiop->list, &blkio_list); 1809 list_add_tail(&blkiop->list, &blkio_list);
1766 1810
1767 spin_unlock(&blkio_list_lock); 1811 spin_unlock(&blkio_list_lock);
1812 list_for_each_entry(q, &all_q_list, all_q_node)
1813 update_root_blkg_pd(q, blkiop->plid);
1768 blkcg_bypass_end(); 1814 blkcg_bypass_end();
1769} 1815}
1770EXPORT_SYMBOL_GPL(blkio_policy_register); 1816EXPORT_SYMBOL_GPL(blkio_policy_register);
1771 1817
1772void blkio_policy_unregister(struct blkio_policy_type *blkiop) 1818void blkio_policy_unregister(struct blkio_policy_type *blkiop)
1773{ 1819{
1820 struct request_queue *q;
1821
1774 blkcg_bypass_start(); 1822 blkcg_bypass_start();
1775 spin_lock(&blkio_list_lock); 1823 spin_lock(&blkio_list_lock);
1776 1824
@@ -1779,6 +1827,8 @@ void blkio_policy_unregister(struct blkio_policy_type *blkiop)
1779 list_del_init(&blkiop->list); 1827 list_del_init(&blkiop->list);
1780 1828
1781 spin_unlock(&blkio_list_lock); 1829 spin_unlock(&blkio_list_lock);
1830 list_for_each_entry(q, &all_q_list, all_q_node)
1831 update_root_blkg_pd(q, blkiop->plid);
1782 blkcg_bypass_end(); 1832 blkcg_bypass_end();
1783} 1833}
1784EXPORT_SYMBOL_GPL(blkio_policy_unregister); 1834EXPORT_SYMBOL_GPL(blkio_policy_unregister);
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 83ce5fa0a60..6e8ee86a287 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -178,13 +178,11 @@ struct blkg_policy_data {
178struct blkio_group { 178struct blkio_group {
179 /* Pointer to the associated request_queue, RCU protected */ 179 /* Pointer to the associated request_queue, RCU protected */
180 struct request_queue __rcu *q; 180 struct request_queue __rcu *q;
181 struct list_head q_node[BLKIO_NR_POLICIES]; 181 struct list_head q_node;
182 struct hlist_node blkcg_node; 182 struct hlist_node blkcg_node;
183 struct blkio_cgroup *blkcg; 183 struct blkio_cgroup *blkcg;
184 /* Store cgroup path */ 184 /* Store cgroup path */
185 char path[128]; 185 char path[128];
186 /* policy which owns this blk group */
187 enum blkio_policy_id plid;
188 /* reference count */ 186 /* reference count */
189 int refcnt; 187 int refcnt;
190 188
@@ -230,8 +228,9 @@ extern void blkcg_exit_queue(struct request_queue *q);
230/* Blkio controller policy registration */ 228/* Blkio controller policy registration */
231extern void blkio_policy_register(struct blkio_policy_type *); 229extern void blkio_policy_register(struct blkio_policy_type *);
232extern void blkio_policy_unregister(struct blkio_policy_type *); 230extern void blkio_policy_unregister(struct blkio_policy_type *);
233extern void blkg_destroy_all(struct request_queue *q, 231extern void blkg_destroy_all(struct request_queue *q, bool destroy_root);
234 enum blkio_policy_id plid, bool destroy_root); 232extern void update_root_blkg_pd(struct request_queue *q,
233 enum blkio_policy_id plid);
235 234
236/** 235/**
237 * blkg_to_pdata - get policy private data 236 * blkg_to_pdata - get policy private data
@@ -313,8 +312,9 @@ static inline void blkcg_exit_queue(struct request_queue *q) { }
313static inline void blkio_policy_register(struct blkio_policy_type *blkiop) { } 312static inline void blkio_policy_register(struct blkio_policy_type *blkiop) { }
314static inline void blkio_policy_unregister(struct blkio_policy_type *blkiop) { } 313static inline void blkio_policy_unregister(struct blkio_policy_type *blkiop) { }
315static inline void blkg_destroy_all(struct request_queue *q, 314static inline void blkg_destroy_all(struct request_queue *q,
316 enum blkio_policy_id plid,
317 bool destory_root) { } 315 bool destory_root) { }
316static inline void update_root_blkg_pd(struct request_queue *q,
317 enum blkio_policy_id plid) { }
318 318
319static inline void *blkg_to_pdata(struct blkio_group *blkg, 319static inline void *blkg_to_pdata(struct blkio_group *blkg,
320 struct blkio_policy_type *pol) { return NULL; } 320 struct blkio_policy_type *pol) { return NULL; }
@@ -382,8 +382,7 @@ extern struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup);
382extern struct blkio_cgroup *task_blkio_cgroup(struct task_struct *tsk); 382extern struct blkio_cgroup *task_blkio_cgroup(struct task_struct *tsk);
383extern int blkiocg_del_blkio_group(struct blkio_group *blkg); 383extern int blkiocg_del_blkio_group(struct blkio_group *blkg);
384extern struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg, 384extern struct blkio_group *blkg_lookup(struct blkio_cgroup *blkcg,
385 struct request_queue *q, 385 struct request_queue *q);
386 enum blkio_policy_id plid);
387struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg, 386struct blkio_group *blkg_lookup_create(struct blkio_cgroup *blkcg,
388 struct request_queue *q, 387 struct request_queue *q,
389 enum blkio_policy_id plid, 388 enum blkio_policy_id plid,
diff --git a/block/blk-core.c b/block/blk-core.c
index 83a47fcf594..05693f403e4 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -548,8 +548,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
548 INIT_LIST_HEAD(&q->timeout_list); 548 INIT_LIST_HEAD(&q->timeout_list);
549 INIT_LIST_HEAD(&q->icq_list); 549 INIT_LIST_HEAD(&q->icq_list);
550#ifdef CONFIG_BLK_CGROUP 550#ifdef CONFIG_BLK_CGROUP
551 INIT_LIST_HEAD(&q->blkg_list[0]); 551 INIT_LIST_HEAD(&q->blkg_list);
552 INIT_LIST_HEAD(&q->blkg_list[1]);
553#endif 552#endif
554 INIT_LIST_HEAD(&q->flush_queue[0]); 553 INIT_LIST_HEAD(&q->flush_queue[0]);
555 INIT_LIST_HEAD(&q->flush_queue[1]); 554 INIT_LIST_HEAD(&q->flush_queue[1]);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 00cdc987b52..aa41b47c22d 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -480,6 +480,8 @@ static void blk_release_queue(struct kobject *kobj)
480 480
481 blk_sync_queue(q); 481 blk_sync_queue(q);
482 482
483 blkcg_exit_queue(q);
484
483 if (q->elevator) { 485 if (q->elevator) {
484 spin_lock_irq(q->queue_lock); 486 spin_lock_irq(q->queue_lock);
485 ioc_clear_queue(q); 487 ioc_clear_queue(q);
@@ -487,8 +489,6 @@ static void blk_release_queue(struct kobject *kobj)
487 elevator_exit(q->elevator); 489 elevator_exit(q->elevator);
488 } 490 }
489 491
490 blkcg_exit_queue(q);
491
492 if (rl->rq_pool) 492 if (rl->rq_pool)
493 mempool_destroy(rl->rq_pool); 493 mempool_destroy(rl->rq_pool);
494 494
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 132941260e5..e35ee7aeea6 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -167,7 +167,7 @@ throtl_grp *throtl_lookup_tg(struct throtl_data *td, struct blkio_cgroup *blkcg)
167 if (blkcg == &blkio_root_cgroup) 167 if (blkcg == &blkio_root_cgroup)
168 return td->root_tg; 168 return td->root_tg;
169 169
170 return blkg_to_tg(blkg_lookup(blkcg, td->queue, BLKIO_POLICY_THROTL)); 170 return blkg_to_tg(blkg_lookup(blkcg, td->queue));
171} 171}
172 172
173static struct throtl_grp *throtl_lookup_create_tg(struct throtl_data *td, 173static struct throtl_grp *throtl_lookup_create_tg(struct throtl_data *td,
@@ -704,8 +704,7 @@ static void throtl_process_limit_change(struct throtl_data *td)
704 704
705 throtl_log(td, "limits changed"); 705 throtl_log(td, "limits changed");
706 706
707 list_for_each_entry_safe(blkg, n, &q->blkg_list[BLKIO_POLICY_THROTL], 707 list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) {
708 q_node[BLKIO_POLICY_THROTL]) {
709 struct throtl_grp *tg = blkg_to_tg(blkg); 708 struct throtl_grp *tg = blkg_to_tg(blkg);
710 709
711 if (!tg->limits_changed) 710 if (!tg->limits_changed)
@@ -1054,11 +1053,9 @@ void blk_throtl_exit(struct request_queue *q)
1054 1053
1055 throtl_shutdown_wq(q); 1054 throtl_shutdown_wq(q);
1056 1055
1057 blkg_destroy_all(q, BLKIO_POLICY_THROTL, true);
1058
1059 /* If there are other groups */ 1056 /* If there are other groups */
1060 spin_lock_irq(q->queue_lock); 1057 spin_lock_irq(q->queue_lock);
1061 wait = q->nr_blkgs[BLKIO_POLICY_THROTL]; 1058 wait = q->nr_blkgs;
1062 spin_unlock_irq(q->queue_lock); 1059 spin_unlock_irq(q->queue_lock);
1063 1060
1064 /* 1061 /*
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index dc73690dec4..393eaa59913 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -3462,15 +3462,13 @@ static void cfq_exit_queue(struct elevator_queue *e)
3462 3462
3463 spin_unlock_irq(q->queue_lock); 3463 spin_unlock_irq(q->queue_lock);
3464 3464
3465 blkg_destroy_all(q, BLKIO_POLICY_PROP, true);
3466
3467#ifdef CONFIG_BLK_CGROUP 3465#ifdef CONFIG_BLK_CGROUP
3468 /* 3466 /*
3469 * If there are groups which we could not unlink from blkcg list, 3467 * If there are groups which we could not unlink from blkcg list,
3470 * wait for a rcu period for them to be freed. 3468 * wait for a rcu period for them to be freed.
3471 */ 3469 */
3472 spin_lock_irq(q->queue_lock); 3470 spin_lock_irq(q->queue_lock);
3473 wait = q->nr_blkgs[BLKIO_POLICY_PROP]; 3471 wait = q->nr_blkgs;
3474 spin_unlock_irq(q->queue_lock); 3472 spin_unlock_irq(q->queue_lock);
3475#endif 3473#endif
3476 cfq_shutdown_timer_wq(cfqd); 3474 cfq_shutdown_timer_wq(cfqd);
@@ -3492,6 +3490,7 @@ static void cfq_exit_queue(struct elevator_queue *e)
3492#ifndef CONFIG_CFQ_GROUP_IOSCHED 3490#ifndef CONFIG_CFQ_GROUP_IOSCHED
3493 kfree(cfqd->root_group); 3491 kfree(cfqd->root_group);
3494#endif 3492#endif
3493 update_root_blkg_pd(q, BLKIO_POLICY_PROP);
3495 kfree(cfqd); 3494 kfree(cfqd);
3496} 3495}
3497 3496
diff --git a/block/elevator.c b/block/elevator.c
index d4d39dab841..451654fadab 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -876,7 +876,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
876{ 876{
877 struct elevator_queue *old = q->elevator; 877 struct elevator_queue *old = q->elevator;
878 bool registered = old->registered; 878 bool registered = old->registered;
879 int i, err; 879 int err;
880 880
881 /* 881 /*
882 * Turn on BYPASS and drain all requests w/ elevator private data. 882 * Turn on BYPASS and drain all requests w/ elevator private data.
@@ -895,8 +895,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
895 ioc_clear_queue(q); 895 ioc_clear_queue(q);
896 spin_unlock_irq(q->queue_lock); 896 spin_unlock_irq(q->queue_lock);
897 897
898 for (i = 0; i < BLKIO_NR_POLICIES; i++) 898 blkg_destroy_all(q, false);
899 blkg_destroy_all(q, i, false);
900 899
901 /* allocate, init and register new elevator */ 900 /* allocate, init and register new elevator */
902 err = -ENOMEM; 901 err = -ENOMEM;