diff options
author | Tejun Heo <tj@kernel.org> | 2012-03-05 16:15:06 -0500 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2012-03-06 15:27:22 -0500 |
commit | cd1604fab4f95f7cfc227d3955fd7ae14da61f38 (patch) | |
tree | 021881faedc1c2468730f9f54d364083e70dce76 /block/cfq-iosched.c | |
parent | f51b802c17e2a21926b29911493f5e7ddf6eee87 (diff) |
blkcg: factor out blkio_group creation
Currently both blk-throttle and cfq-iosched implement their own
blkio_group creation code in throtl_get_tg() and cfq_get_cfqg(). This
patch factors out the common code into blkg_lookup_create(), which
returns ERR_PTR value so that transitional failures due to queue
bypass can be distinguished from other failures.
* New plkio_policy_ops methods blkio_alloc_group_fn() and
blkio_link_group_fn added. Both are transitional and will be
removed once the blkg management code is fully moved into
blk-cgroup.c.
* blkio_alloc_group_fn() allocates policy-specific blkg which is
usually a larger data structure with blkg as the first entry and
intiailizes it. Note that initialization of blkg proper, including
percpu stats, is responsibility of blk-cgroup proper.
Note that default config (weight, bps...) initialization is done
from this method; otherwise, we end up violating locking order
between blkcg and q locks via blkcg_get_CONF() functions.
* blkio_link_group_fn() is called under queue_lock and responsible for
linking the blkg to the queue. blkcg side is handled by blk-cgroup
proper.
* The common blkg creation function is named blkg_lookup_create() and
blkiocg_lookup_group() is renamed to blkg_lookup() for consistency.
Also, throtl / cfq related functions are similarly [re]named for
consistency.
This simplifies blkcg policy implementations and enables further
cleanup.
-v2: Vivek noticed that blkg_lookup_create() incorrectly tested
blk_queue_dead() instead of blk_queue_bypass() leading a user of
the function ending up creating a new blkg on bypassing queue.
This is a bug introduced while relocating bypass patches before
this one. Fixed.
-v3: ERR_PTR patch folded into this one. @for_root added to
blkg_lookup_create() to allow creating root group on a bypassed
queue during elevator switch.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block/cfq-iosched.c')
-rw-r--r-- | block/cfq-iosched.c | 131 |
1 files changed, 40 insertions, 91 deletions
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 1c3f41b9d5dd..acef564578c3 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
@@ -1048,10 +1048,12 @@ static void cfq_update_blkio_group_weight(struct request_queue *q, | |||
1048 | cfqg->needs_update = true; | 1048 | cfqg->needs_update = true; |
1049 | } | 1049 | } |
1050 | 1050 | ||
1051 | static void cfq_init_add_cfqg_lists(struct cfq_data *cfqd, | 1051 | static void cfq_link_blkio_group(struct request_queue *q, |
1052 | struct cfq_group *cfqg, struct blkio_cgroup *blkcg) | 1052 | struct blkio_group *blkg) |
1053 | { | 1053 | { |
1054 | struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info; | 1054 | struct cfq_data *cfqd = q->elevator->elevator_data; |
1055 | struct backing_dev_info *bdi = &q->backing_dev_info; | ||
1056 | struct cfq_group *cfqg = cfqg_of_blkg(blkg); | ||
1055 | unsigned int major, minor; | 1057 | unsigned int major, minor; |
1056 | 1058 | ||
1057 | /* | 1059 | /* |
@@ -1062,34 +1064,26 @@ static void cfq_init_add_cfqg_lists(struct cfq_data *cfqd, | |||
1062 | */ | 1064 | */ |
1063 | if (bdi->dev) { | 1065 | if (bdi->dev) { |
1064 | sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor); | 1066 | sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor); |
1065 | cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg, | 1067 | blkg->dev = MKDEV(major, minor); |
1066 | cfqd->queue, MKDEV(major, minor)); | 1068 | } |
1067 | } else | ||
1068 | cfq_blkiocg_add_blkio_group(blkcg, &cfqg->blkg, | ||
1069 | cfqd->queue, 0); | ||
1070 | 1069 | ||
1071 | cfqd->nr_blkcg_linked_grps++; | 1070 | cfqd->nr_blkcg_linked_grps++; |
1072 | cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev); | ||
1073 | 1071 | ||
1074 | /* Add group on cfqd list */ | 1072 | /* Add group on cfqd list */ |
1075 | hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list); | 1073 | hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list); |
1076 | } | 1074 | } |
1077 | 1075 | ||
1078 | /* | 1076 | static struct blkio_group *cfq_alloc_blkio_group(struct request_queue *q, |
1079 | * Should be called from sleepable context. No request queue lock as per | 1077 | struct blkio_cgroup *blkcg) |
1080 | * cpu stats are allocated dynamically and alloc_percpu needs to be called | ||
1081 | * from sleepable context. | ||
1082 | */ | ||
1083 | static struct cfq_group * cfq_alloc_cfqg(struct cfq_data *cfqd) | ||
1084 | { | 1078 | { |
1085 | struct cfq_group *cfqg; | 1079 | struct cfq_group *cfqg; |
1086 | int ret; | ||
1087 | 1080 | ||
1088 | cfqg = kzalloc_node(sizeof(*cfqg), GFP_ATOMIC, cfqd->queue->node); | 1081 | cfqg = kzalloc_node(sizeof(*cfqg), GFP_ATOMIC, q->node); |
1089 | if (!cfqg) | 1082 | if (!cfqg) |
1090 | return NULL; | 1083 | return NULL; |
1091 | 1084 | ||
1092 | cfq_init_cfqg_base(cfqg); | 1085 | cfq_init_cfqg_base(cfqg); |
1086 | cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev); | ||
1093 | 1087 | ||
1094 | /* | 1088 | /* |
1095 | * Take the initial reference that will be released on destroy | 1089 | * Take the initial reference that will be released on destroy |
@@ -1099,90 +1093,38 @@ static struct cfq_group * cfq_alloc_cfqg(struct cfq_data *cfqd) | |||
1099 | */ | 1093 | */ |
1100 | cfqg->ref = 1; | 1094 | cfqg->ref = 1; |
1101 | 1095 | ||
1102 | ret = blkio_alloc_blkg_stats(&cfqg->blkg); | 1096 | return &cfqg->blkg; |
1103 | if (ret) { | ||
1104 | kfree(cfqg); | ||
1105 | return NULL; | ||
1106 | } | ||
1107 | |||
1108 | return cfqg; | ||
1109 | } | ||
1110 | |||
1111 | static struct cfq_group * | ||
1112 | cfq_find_cfqg(struct cfq_data *cfqd, struct blkio_cgroup *blkcg) | ||
1113 | { | ||
1114 | struct cfq_group *cfqg = NULL; | ||
1115 | struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info; | ||
1116 | unsigned int major, minor; | ||
1117 | |||
1118 | /* | ||
1119 | * This is the common case when there are no blkio cgroups. | ||
1120 | * Avoid lookup in this case | ||
1121 | */ | ||
1122 | if (blkcg == &blkio_root_cgroup) | ||
1123 | cfqg = cfqd->root_group; | ||
1124 | else | ||
1125 | cfqg = cfqg_of_blkg(blkiocg_lookup_group(blkcg, cfqd->queue, | ||
1126 | BLKIO_POLICY_PROP)); | ||
1127 | |||
1128 | if (cfqg && !cfqg->blkg.dev && bdi->dev && dev_name(bdi->dev)) { | ||
1129 | sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor); | ||
1130 | cfqg->blkg.dev = MKDEV(major, minor); | ||
1131 | } | ||
1132 | |||
1133 | return cfqg; | ||
1134 | } | 1097 | } |
1135 | 1098 | ||
1136 | /* | 1099 | /* |
1137 | * Search for the cfq group current task belongs to. request_queue lock must | 1100 | * Search for the cfq group current task belongs to. request_queue lock must |
1138 | * be held. | 1101 | * be held. |
1139 | */ | 1102 | */ |
1140 | static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, | 1103 | static struct cfq_group *cfq_lookup_create_cfqg(struct cfq_data *cfqd, |
1141 | struct blkio_cgroup *blkcg) | 1104 | struct blkio_cgroup *blkcg) |
1142 | { | 1105 | { |
1143 | struct cfq_group *cfqg = NULL, *__cfqg = NULL; | ||
1144 | struct request_queue *q = cfqd->queue; | 1106 | struct request_queue *q = cfqd->queue; |
1107 | struct backing_dev_info *bdi = &q->backing_dev_info; | ||
1108 | struct cfq_group *cfqg = NULL; | ||
1145 | 1109 | ||
1146 | cfqg = cfq_find_cfqg(cfqd, blkcg); | 1110 | /* avoid lookup for the common case where there's no blkio cgroup */ |
1147 | if (cfqg) | 1111 | if (blkcg == &blkio_root_cgroup) { |
1148 | return cfqg; | 1112 | cfqg = cfqd->root_group; |
1149 | 1113 | } else { | |
1150 | if (!css_tryget(&blkcg->css)) | 1114 | struct blkio_group *blkg; |
1151 | return NULL; | ||
1152 | |||
1153 | /* | ||
1154 | * Need to allocate a group. Allocation of group also needs allocation | ||
1155 | * of per cpu stats which in-turn takes a mutex() and can block. Hence | ||
1156 | * we need to drop rcu lock and queue_lock before we call alloc. | ||
1157 | * | ||
1158 | * Not taking any queue reference here and assuming that queue is | ||
1159 | * around by the time we return. CFQ queue allocation code does | ||
1160 | * the same. It might be racy though. | ||
1161 | */ | ||
1162 | rcu_read_unlock(); | ||
1163 | spin_unlock_irq(q->queue_lock); | ||
1164 | |||
1165 | cfqg = cfq_alloc_cfqg(cfqd); | ||
1166 | 1115 | ||
1167 | spin_lock_irq(q->queue_lock); | 1116 | blkg = blkg_lookup_create(blkcg, q, BLKIO_POLICY_PROP, false); |
1168 | rcu_read_lock(); | 1117 | if (!IS_ERR(blkg)) |
1169 | css_put(&blkcg->css); | 1118 | cfqg = cfqg_of_blkg(blkg); |
1119 | } | ||
1170 | 1120 | ||
1171 | /* | 1121 | if (cfqg && !cfqg->blkg.dev && bdi->dev && dev_name(bdi->dev)) { |
1172 | * If some other thread already allocated the group while we were | 1122 | unsigned int major, minor; |
1173 | * not holding queue lock, free up the group | ||
1174 | */ | ||
1175 | __cfqg = cfq_find_cfqg(cfqd, blkcg); | ||
1176 | 1123 | ||
1177 | if (__cfqg) { | 1124 | sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor); |
1178 | kfree(cfqg); | 1125 | cfqg->blkg.dev = MKDEV(major, minor); |
1179 | return __cfqg; | ||
1180 | } | 1126 | } |
1181 | 1127 | ||
1182 | if (!cfqg) | ||
1183 | cfqg = cfqd->root_group; | ||
1184 | |||
1185 | cfq_init_add_cfqg_lists(cfqd, cfqg, blkcg); | ||
1186 | return cfqg; | 1128 | return cfqg; |
1187 | } | 1129 | } |
1188 | 1130 | ||
@@ -1294,8 +1236,8 @@ static bool cfq_clear_queue(struct request_queue *q) | |||
1294 | } | 1236 | } |
1295 | 1237 | ||
1296 | #else /* GROUP_IOSCHED */ | 1238 | #else /* GROUP_IOSCHED */ |
1297 | static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, | 1239 | static struct cfq_group *cfq_lookup_create_cfqg(struct cfq_data *cfqd, |
1298 | struct blkio_cgroup *blkcg) | 1240 | struct blkio_cgroup *blkcg) |
1299 | { | 1241 | { |
1300 | return cfqd->root_group; | 1242 | return cfqd->root_group; |
1301 | } | 1243 | } |
@@ -2887,7 +2829,8 @@ retry: | |||
2887 | 2829 | ||
2888 | blkcg = task_blkio_cgroup(current); | 2830 | blkcg = task_blkio_cgroup(current); |
2889 | 2831 | ||
2890 | cfqg = cfq_get_cfqg(cfqd, blkcg); | 2832 | cfqg = cfq_lookup_create_cfqg(cfqd, blkcg); |
2833 | |||
2891 | cic = cfq_cic_lookup(cfqd, ioc); | 2834 | cic = cfq_cic_lookup(cfqd, ioc); |
2892 | /* cic always exists here */ | 2835 | /* cic always exists here */ |
2893 | cfqq = cic_to_cfqq(cic, is_sync); | 2836 | cfqq = cic_to_cfqq(cic, is_sync); |
@@ -3694,6 +3637,7 @@ static void cfq_exit_queue(struct elevator_queue *e) | |||
3694 | static int cfq_init_queue(struct request_queue *q) | 3637 | static int cfq_init_queue(struct request_queue *q) |
3695 | { | 3638 | { |
3696 | struct cfq_data *cfqd; | 3639 | struct cfq_data *cfqd; |
3640 | struct blkio_group *blkg __maybe_unused; | ||
3697 | int i; | 3641 | int i; |
3698 | 3642 | ||
3699 | cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node); | 3643 | cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node); |
@@ -3711,7 +3655,10 @@ static int cfq_init_queue(struct request_queue *q) | |||
3711 | rcu_read_lock(); | 3655 | rcu_read_lock(); |
3712 | spin_lock_irq(q->queue_lock); | 3656 | spin_lock_irq(q->queue_lock); |
3713 | 3657 | ||
3714 | cfqd->root_group = cfq_get_cfqg(cfqd, &blkio_root_cgroup); | 3658 | blkg = blkg_lookup_create(&blkio_root_cgroup, q, BLKIO_POLICY_PROP, |
3659 | true); | ||
3660 | if (!IS_ERR(blkg)) | ||
3661 | cfqd->root_group = cfqg_of_blkg(blkg); | ||
3715 | 3662 | ||
3716 | spin_unlock_irq(q->queue_lock); | 3663 | spin_unlock_irq(q->queue_lock); |
3717 | rcu_read_unlock(); | 3664 | rcu_read_unlock(); |
@@ -3897,6 +3844,8 @@ static struct elevator_type iosched_cfq = { | |||
3897 | #ifdef CONFIG_CFQ_GROUP_IOSCHED | 3844 | #ifdef CONFIG_CFQ_GROUP_IOSCHED |
3898 | static struct blkio_policy_type blkio_policy_cfq = { | 3845 | static struct blkio_policy_type blkio_policy_cfq = { |
3899 | .ops = { | 3846 | .ops = { |
3847 | .blkio_alloc_group_fn = cfq_alloc_blkio_group, | ||
3848 | .blkio_link_group_fn = cfq_link_blkio_group, | ||
3900 | .blkio_unlink_group_fn = cfq_unlink_blkio_group, | 3849 | .blkio_unlink_group_fn = cfq_unlink_blkio_group, |
3901 | .blkio_clear_queue_fn = cfq_clear_queue, | 3850 | .blkio_clear_queue_fn = cfq_clear_queue, |
3902 | .blkio_update_group_weight_fn = cfq_update_blkio_group_weight, | 3851 | .blkio_update_group_weight_fn = cfq_update_blkio_group_weight, |