diff options
author | Tejun Heo <tj@kernel.org> | 2012-04-13 16:11:33 -0400 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2012-04-20 04:06:06 -0400 |
commit | a2b1693bac45ea3fe3ba612fd22c45f17449f610 (patch) | |
tree | 2e05859caab6453efbc85d584dd72dca7ef03cd0 /block/blk-cgroup.c | |
parent | 03d8e11142a893ad322285d3c8a08e88b570cda1 (diff) |
blkcg: implement per-queue policy activation
All blkcg policies were assumed to be enabled on all request_queues.
Due to various implementation obstacles, during the recent blkcg core
updates, this was temporarily implemented as shooting down all !root
blkgs on elevator switch and policy [de]registration combined with
half-broken in-place root blkg updates. In addition to being buggy
and racy, this meant losing all blkcg configurations across those
events.
Now that blkcg is cleaned up enough, this patch replaces the temporary
implementation with proper per-queue policy activation. Each blkcg
policy should call the new blkcg_[de]activate_policy() to enable and
disable the policy on a specific queue. blkcg_activate_policy()
allocates and installs policy data for the policy for all existing
blkgs. blkcg_deactivate_policy() does the reverse. If a policy is
not enabled for a given queue, blkg printing / config functions skip
the respective blkg for the queue.
blkcg_activate_policy() also takes care of root blkg creation, and
cfq_init_queue() and blk_throtl_init() are updated accordingly.
This replaces blkcg_bypass_{start|end}() and update_root_blkg_pd()
unnecessary. Dropped.
v2: cfq_init_queue() was returning uninitialized @ret on root_group
alloc failure if !CONFIG_CFQ_GROUP_IOSCHED. Fixed.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block/blk-cgroup.c')
-rw-r--r-- | block/blk-cgroup.c | 228 |
1 files changed, 154 insertions, 74 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index d6e4555c982f..d6d59ad105b4 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c | |||
@@ -54,6 +54,17 @@ struct blkio_cgroup *bio_blkio_cgroup(struct bio *bio) | |||
54 | } | 54 | } |
55 | EXPORT_SYMBOL_GPL(bio_blkio_cgroup); | 55 | EXPORT_SYMBOL_GPL(bio_blkio_cgroup); |
56 | 56 | ||
57 | static bool blkcg_policy_enabled(struct request_queue *q, | ||
58 | const struct blkio_policy_type *pol) | ||
59 | { | ||
60 | return pol && test_bit(pol->plid, q->blkcg_pols); | ||
61 | } | ||
62 | |||
63 | static size_t blkg_pd_size(const struct blkio_policy_type *pol) | ||
64 | { | ||
65 | return sizeof(struct blkg_policy_data) + pol->pdata_size; | ||
66 | } | ||
67 | |||
57 | /** | 68 | /** |
58 | * blkg_free - free a blkg | 69 | * blkg_free - free a blkg |
59 | * @blkg: blkg to free | 70 | * @blkg: blkg to free |
@@ -111,12 +122,11 @@ static struct blkio_group *blkg_alloc(struct blkio_cgroup *blkcg, | |||
111 | struct blkio_policy_type *pol = blkio_policy[i]; | 122 | struct blkio_policy_type *pol = blkio_policy[i]; |
112 | struct blkg_policy_data *pd; | 123 | struct blkg_policy_data *pd; |
113 | 124 | ||
114 | if (!pol) | 125 | if (!blkcg_policy_enabled(q, pol)) |
115 | continue; | 126 | continue; |
116 | 127 | ||
117 | /* alloc per-policy data and attach it to blkg */ | 128 | /* alloc per-policy data and attach it to blkg */ |
118 | pd = kzalloc_node(sizeof(*pd) + pol->pdata_size, GFP_ATOMIC, | 129 | pd = kzalloc_node(blkg_pd_size(pol), GFP_ATOMIC, q->node); |
119 | q->node); | ||
120 | if (!pd) { | 130 | if (!pd) { |
121 | blkg_free(blkg); | 131 | blkg_free(blkg); |
122 | return NULL; | 132 | return NULL; |
@@ -130,7 +140,7 @@ static struct blkio_group *blkg_alloc(struct blkio_cgroup *blkcg, | |||
130 | for (i = 0; i < BLKCG_MAX_POLS; i++) { | 140 | for (i = 0; i < BLKCG_MAX_POLS; i++) { |
131 | struct blkio_policy_type *pol = blkio_policy[i]; | 141 | struct blkio_policy_type *pol = blkio_policy[i]; |
132 | 142 | ||
133 | if (pol) | 143 | if (blkcg_policy_enabled(blkg->q, pol)) |
134 | pol->ops.blkio_init_group_fn(blkg); | 144 | pol->ops.blkio_init_group_fn(blkg); |
135 | } | 145 | } |
136 | 146 | ||
@@ -236,36 +246,6 @@ static void blkg_destroy(struct blkio_group *blkg) | |||
236 | blkg_put(blkg); | 246 | blkg_put(blkg); |
237 | } | 247 | } |
238 | 248 | ||
239 | /* | ||
240 | * XXX: This updates blkg policy data in-place for root blkg, which is | ||
241 | * necessary across elevator switch and policy registration as root blkgs | ||
242 | * aren't shot down. This broken and racy implementation is temporary. | ||
243 | * Eventually, blkg shoot down will be replaced by proper in-place update. | ||
244 | */ | ||
245 | void update_root_blkg_pd(struct request_queue *q, | ||
246 | const struct blkio_policy_type *pol) | ||
247 | { | ||
248 | struct blkio_group *blkg = blkg_lookup(&blkio_root_cgroup, q); | ||
249 | struct blkg_policy_data *pd; | ||
250 | |||
251 | if (!blkg) | ||
252 | return; | ||
253 | |||
254 | kfree(blkg->pd[pol->plid]); | ||
255 | blkg->pd[pol->plid] = NULL; | ||
256 | |||
257 | if (!pol) | ||
258 | return; | ||
259 | |||
260 | pd = kzalloc(sizeof(*pd) + pol->pdata_size, GFP_KERNEL); | ||
261 | WARN_ON_ONCE(!pd); | ||
262 | |||
263 | blkg->pd[pol->plid] = pd; | ||
264 | pd->blkg = blkg; | ||
265 | pol->ops.blkio_init_group_fn(blkg); | ||
266 | } | ||
267 | EXPORT_SYMBOL_GPL(update_root_blkg_pd); | ||
268 | |||
269 | /** | 249 | /** |
270 | * blkg_destroy_all - destroy all blkgs associated with a request_queue | 250 | * blkg_destroy_all - destroy all blkgs associated with a request_queue |
271 | * @q: request_queue of interest | 251 | * @q: request_queue of interest |
@@ -339,7 +319,8 @@ blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val) | |||
339 | for (i = 0; i < BLKCG_MAX_POLS; i++) { | 319 | for (i = 0; i < BLKCG_MAX_POLS; i++) { |
340 | struct blkio_policy_type *pol = blkio_policy[i]; | 320 | struct blkio_policy_type *pol = blkio_policy[i]; |
341 | 321 | ||
342 | if (pol && pol->ops.blkio_reset_group_stats_fn) | 322 | if (blkcg_policy_enabled(blkg->q, pol) && |
323 | pol->ops.blkio_reset_group_stats_fn) | ||
343 | pol->ops.blkio_reset_group_stats_fn(blkg); | 324 | pol->ops.blkio_reset_group_stats_fn(blkg); |
344 | } | 325 | } |
345 | } | 326 | } |
@@ -385,7 +366,7 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkio_cgroup *blkcg, | |||
385 | 366 | ||
386 | spin_lock_irq(&blkcg->lock); | 367 | spin_lock_irq(&blkcg->lock); |
387 | hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) | 368 | hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) |
388 | if (blkg->pd[pol->plid]) | 369 | if (blkcg_policy_enabled(blkg->q, pol)) |
389 | total += prfill(sf, blkg->pd[pol->plid]->pdata, data); | 370 | total += prfill(sf, blkg->pd[pol->plid]->pdata, data); |
390 | spin_unlock_irq(&blkcg->lock); | 371 | spin_unlock_irq(&blkcg->lock); |
391 | 372 | ||
@@ -510,7 +491,10 @@ int blkg_conf_prep(struct blkio_cgroup *blkcg, | |||
510 | rcu_read_lock(); | 491 | rcu_read_lock(); |
511 | spin_lock_irq(disk->queue->queue_lock); | 492 | spin_lock_irq(disk->queue->queue_lock); |
512 | 493 | ||
513 | blkg = blkg_lookup_create(blkcg, disk->queue, false); | 494 | if (blkcg_policy_enabled(disk->queue, pol)) |
495 | blkg = blkg_lookup_create(blkcg, disk->queue, false); | ||
496 | else | ||
497 | blkg = ERR_PTR(-EINVAL); | ||
514 | 498 | ||
515 | if (IS_ERR(blkg)) { | 499 | if (IS_ERR(blkg)) { |
516 | ret = PTR_ERR(blkg); | 500 | ret = PTR_ERR(blkg); |
@@ -712,30 +696,6 @@ static int blkiocg_can_attach(struct cgroup *cgrp, struct cgroup_taskset *tset) | |||
712 | return ret; | 696 | return ret; |
713 | } | 697 | } |
714 | 698 | ||
715 | static void blkcg_bypass_start(void) | ||
716 | __acquires(&all_q_mutex) | ||
717 | { | ||
718 | struct request_queue *q; | ||
719 | |||
720 | mutex_lock(&all_q_mutex); | ||
721 | |||
722 | list_for_each_entry(q, &all_q_list, all_q_node) { | ||
723 | blk_queue_bypass_start(q); | ||
724 | blkg_destroy_all(q, false); | ||
725 | } | ||
726 | } | ||
727 | |||
728 | static void blkcg_bypass_end(void) | ||
729 | __releases(&all_q_mutex) | ||
730 | { | ||
731 | struct request_queue *q; | ||
732 | |||
733 | list_for_each_entry(q, &all_q_list, all_q_node) | ||
734 | blk_queue_bypass_end(q); | ||
735 | |||
736 | mutex_unlock(&all_q_mutex); | ||
737 | } | ||
738 | |||
739 | struct cgroup_subsys blkio_subsys = { | 699 | struct cgroup_subsys blkio_subsys = { |
740 | .name = "blkio", | 700 | .name = "blkio", |
741 | .create = blkiocg_create, | 701 | .create = blkiocg_create, |
@@ -749,6 +709,139 @@ struct cgroup_subsys blkio_subsys = { | |||
749 | EXPORT_SYMBOL_GPL(blkio_subsys); | 709 | EXPORT_SYMBOL_GPL(blkio_subsys); |
750 | 710 | ||
751 | /** | 711 | /** |
712 | * blkcg_activate_policy - activate a blkcg policy on a request_queue | ||
713 | * @q: request_queue of interest | ||
714 | * @pol: blkcg policy to activate | ||
715 | * | ||
716 | * Activate @pol on @q. Requires %GFP_KERNEL context. @q goes through | ||
717 | * bypass mode to populate its blkgs with policy_data for @pol. | ||
718 | * | ||
719 | * Activation happens with @q bypassed, so nobody would be accessing blkgs | ||
720 | * from IO path. Update of each blkg is protected by both queue and blkcg | ||
721 | * locks so that holding either lock and testing blkcg_policy_enabled() is | ||
722 | * always enough for dereferencing policy data. | ||
723 | * | ||
724 | * The caller is responsible for synchronizing [de]activations and policy | ||
725 | * [un]registerations. Returns 0 on success, -errno on failure. | ||
726 | */ | ||
727 | int blkcg_activate_policy(struct request_queue *q, | ||
728 | const struct blkio_policy_type *pol) | ||
729 | { | ||
730 | LIST_HEAD(pds); | ||
731 | struct blkio_group *blkg; | ||
732 | struct blkg_policy_data *pd, *n; | ||
733 | int cnt = 0, ret; | ||
734 | |||
735 | if (blkcg_policy_enabled(q, pol)) | ||
736 | return 0; | ||
737 | |||
738 | blk_queue_bypass_start(q); | ||
739 | |||
740 | /* make sure the root blkg exists and count the existing blkgs */ | ||
741 | spin_lock_irq(q->queue_lock); | ||
742 | |||
743 | rcu_read_lock(); | ||
744 | blkg = blkg_lookup_create(&blkio_root_cgroup, q, true); | ||
745 | rcu_read_unlock(); | ||
746 | |||
747 | if (IS_ERR(blkg)) { | ||
748 | ret = PTR_ERR(blkg); | ||
749 | goto out_unlock; | ||
750 | } | ||
751 | q->root_blkg = blkg; | ||
752 | |||
753 | list_for_each_entry(blkg, &q->blkg_list, q_node) | ||
754 | cnt++; | ||
755 | |||
756 | spin_unlock_irq(q->queue_lock); | ||
757 | |||
758 | /* allocate policy_data for all existing blkgs */ | ||
759 | while (cnt--) { | ||
760 | pd = kzalloc_node(blkg_pd_size(pol), GFP_KERNEL, q->node); | ||
761 | if (!pd) { | ||
762 | ret = -ENOMEM; | ||
763 | goto out_free; | ||
764 | } | ||
765 | list_add_tail(&pd->alloc_node, &pds); | ||
766 | } | ||
767 | |||
768 | /* | ||
769 | * Install the allocated pds. With @q bypassing, no new blkg | ||
770 | * should have been created while the queue lock was dropped. | ||
771 | */ | ||
772 | spin_lock_irq(q->queue_lock); | ||
773 | |||
774 | list_for_each_entry(blkg, &q->blkg_list, q_node) { | ||
775 | if (WARN_ON(list_empty(&pds))) { | ||
776 | /* umm... this shouldn't happen, just abort */ | ||
777 | ret = -ENOMEM; | ||
778 | goto out_unlock; | ||
779 | } | ||
780 | pd = list_first_entry(&pds, struct blkg_policy_data, alloc_node); | ||
781 | list_del_init(&pd->alloc_node); | ||
782 | |||
783 | /* grab blkcg lock too while installing @pd on @blkg */ | ||
784 | spin_lock(&blkg->blkcg->lock); | ||
785 | |||
786 | blkg->pd[pol->plid] = pd; | ||
787 | pd->blkg = blkg; | ||
788 | pol->ops.blkio_init_group_fn(blkg); | ||
789 | |||
790 | spin_unlock(&blkg->blkcg->lock); | ||
791 | } | ||
792 | |||
793 | __set_bit(pol->plid, q->blkcg_pols); | ||
794 | ret = 0; | ||
795 | out_unlock: | ||
796 | spin_unlock_irq(q->queue_lock); | ||
797 | out_free: | ||
798 | blk_queue_bypass_end(q); | ||
799 | list_for_each_entry_safe(pd, n, &pds, alloc_node) | ||
800 | kfree(pd); | ||
801 | return ret; | ||
802 | } | ||
803 | EXPORT_SYMBOL_GPL(blkcg_activate_policy); | ||
804 | |||
805 | /** | ||
806 | * blkcg_deactivate_policy - deactivate a blkcg policy on a request_queue | ||
807 | * @q: request_queue of interest | ||
808 | * @pol: blkcg policy to deactivate | ||
809 | * | ||
810 | * Deactivate @pol on @q. Follows the same synchronization rules as | ||
811 | * blkcg_activate_policy(). | ||
812 | */ | ||
813 | void blkcg_deactivate_policy(struct request_queue *q, | ||
814 | const struct blkio_policy_type *pol) | ||
815 | { | ||
816 | struct blkio_group *blkg; | ||
817 | |||
818 | if (!blkcg_policy_enabled(q, pol)) | ||
819 | return; | ||
820 | |||
821 | blk_queue_bypass_start(q); | ||
822 | spin_lock_irq(q->queue_lock); | ||
823 | |||
824 | __clear_bit(pol->plid, q->blkcg_pols); | ||
825 | |||
826 | list_for_each_entry(blkg, &q->blkg_list, q_node) { | ||
827 | /* grab blkcg lock too while removing @pd from @blkg */ | ||
828 | spin_lock(&blkg->blkcg->lock); | ||
829 | |||
830 | if (pol->ops.blkio_exit_group_fn) | ||
831 | pol->ops.blkio_exit_group_fn(blkg); | ||
832 | |||
833 | kfree(blkg->pd[pol->plid]); | ||
834 | blkg->pd[pol->plid] = NULL; | ||
835 | |||
836 | spin_unlock(&blkg->blkcg->lock); | ||
837 | } | ||
838 | |||
839 | spin_unlock_irq(q->queue_lock); | ||
840 | blk_queue_bypass_end(q); | ||
841 | } | ||
842 | EXPORT_SYMBOL_GPL(blkcg_deactivate_policy); | ||
843 | |||
844 | /** | ||
752 | * blkio_policy_register - register a blkcg policy | 845 | * blkio_policy_register - register a blkcg policy |
753 | * @blkiop: blkcg policy to register | 846 | * @blkiop: blkcg policy to register |
754 | * | 847 | * |
@@ -758,7 +851,6 @@ EXPORT_SYMBOL_GPL(blkio_subsys); | |||
758 | */ | 851 | */ |
759 | int blkio_policy_register(struct blkio_policy_type *blkiop) | 852 | int blkio_policy_register(struct blkio_policy_type *blkiop) |
760 | { | 853 | { |
761 | struct request_queue *q; | ||
762 | int i, ret; | 854 | int i, ret; |
763 | 855 | ||
764 | mutex_lock(&blkcg_pol_mutex); | 856 | mutex_lock(&blkcg_pol_mutex); |
@@ -775,11 +867,6 @@ int blkio_policy_register(struct blkio_policy_type *blkiop) | |||
775 | blkiop->plid = i; | 867 | blkiop->plid = i; |
776 | blkio_policy[i] = blkiop; | 868 | blkio_policy[i] = blkiop; |
777 | 869 | ||
778 | blkcg_bypass_start(); | ||
779 | list_for_each_entry(q, &all_q_list, all_q_node) | ||
780 | update_root_blkg_pd(q, blkiop); | ||
781 | blkcg_bypass_end(); | ||
782 | |||
783 | /* everything is in place, add intf files for the new policy */ | 870 | /* everything is in place, add intf files for the new policy */ |
784 | if (blkiop->cftypes) | 871 | if (blkiop->cftypes) |
785 | WARN_ON(cgroup_add_cftypes(&blkio_subsys, blkiop->cftypes)); | 872 | WARN_ON(cgroup_add_cftypes(&blkio_subsys, blkiop->cftypes)); |
@@ -798,8 +885,6 @@ EXPORT_SYMBOL_GPL(blkio_policy_register); | |||
798 | */ | 885 | */ |
799 | void blkio_policy_unregister(struct blkio_policy_type *blkiop) | 886 | void blkio_policy_unregister(struct blkio_policy_type *blkiop) |
800 | { | 887 | { |
801 | struct request_queue *q; | ||
802 | |||
803 | mutex_lock(&blkcg_pol_mutex); | 888 | mutex_lock(&blkcg_pol_mutex); |
804 | 889 | ||
805 | if (WARN_ON(blkio_policy[blkiop->plid] != blkiop)) | 890 | if (WARN_ON(blkio_policy[blkiop->plid] != blkiop)) |
@@ -811,11 +896,6 @@ void blkio_policy_unregister(struct blkio_policy_type *blkiop) | |||
811 | 896 | ||
812 | /* unregister and update blkgs */ | 897 | /* unregister and update blkgs */ |
813 | blkio_policy[blkiop->plid] = NULL; | 898 | blkio_policy[blkiop->plid] = NULL; |
814 | |||
815 | blkcg_bypass_start(); | ||
816 | list_for_each_entry(q, &all_q_list, all_q_node) | ||
817 | update_root_blkg_pd(q, blkiop); | ||
818 | blkcg_bypass_end(); | ||
819 | out_unlock: | 899 | out_unlock: |
820 | mutex_unlock(&blkcg_pol_mutex); | 900 | mutex_unlock(&blkcg_pol_mutex); |
821 | } | 901 | } |