diff options
author | Tejun Heo <tj@kernel.org> | 2012-03-05 16:15:15 -0500 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2012-03-06 15:27:23 -0500 |
commit | 1adaf3dde37a8b9b59ea59c5f58fed7761178383 (patch) | |
tree | e4a46485b1bf0370aa41a5b9a8f138fba34c9d23 | |
parent | 0381411e4b1a52cee134eb73750e5e3cc1155d09 (diff) |
blkcg: move refcnt to blkcg core
Currently, blkcg policy implementations manage blkg refcnt duplicating
mostly identical code in both policies. This patch moves refcnt to
blkg and let blkcg core handle refcnt and freeing of blkgs.
* cfq blkgs now also get freed via RCU.
* cfq blkgs lose RB_EMPTY_ROOT() sanity check on blkg free. If
necessary, we can add blkio_exit_group_fn() to resurrect this.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r-- | block/blk-cgroup.c | 24 | ||||
-rw-r--r-- | block/blk-cgroup.h | 35 | ||||
-rw-r--r-- | block/blk-throttle.c | 58 | ||||
-rw-r--r-- | block/cfq-iosched.c | 58 |
4 files changed, 73 insertions, 102 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 14367499cfed..3b6a0e1265aa 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c | |||
@@ -463,6 +463,7 @@ static struct blkio_group *blkg_alloc(struct blkio_cgroup *blkcg, | |||
463 | rcu_assign_pointer(blkg->q, q); | 463 | rcu_assign_pointer(blkg->q, q); |
464 | blkg->blkcg = blkcg; | 464 | blkg->blkcg = blkcg; |
465 | blkg->plid = pol->plid; | 465 | blkg->plid = pol->plid; |
466 | blkg->refcnt = 1; | ||
466 | cgroup_path(blkcg->css.cgroup, blkg->path, sizeof(blkg->path)); | 467 | cgroup_path(blkcg->css.cgroup, blkg->path, sizeof(blkg->path)); |
467 | 468 | ||
468 | /* alloc per-policy data */ | 469 | /* alloc per-policy data */ |
@@ -633,6 +634,29 @@ void blkg_destroy_all(struct request_queue *q) | |||
633 | } | 634 | } |
634 | } | 635 | } |
635 | 636 | ||
637 | static void blkg_rcu_free(struct rcu_head *rcu_head) | ||
638 | { | ||
639 | blkg_free(container_of(rcu_head, struct blkio_group, rcu_head)); | ||
640 | } | ||
641 | |||
642 | void __blkg_release(struct blkio_group *blkg) | ||
643 | { | ||
644 | /* release the extra blkcg reference this blkg has been holding */ | ||
645 | css_put(&blkg->blkcg->css); | ||
646 | |||
647 | /* | ||
648 | * A group is freed in rcu manner. But having an rcu lock does not | ||
649 | * mean that one can access all the fields of blkg and assume these | ||
650 | * are valid. For example, don't try to follow throtl_data and | ||
651 | * request queue links. | ||
652 | * | ||
653 | * Having a reference to blkg under an rcu allows acess to only | ||
654 | * values local to groups like group stats and group rate limits | ||
655 | */ | ||
656 | call_rcu(&blkg->rcu_head, blkg_rcu_free); | ||
657 | } | ||
658 | EXPORT_SYMBOL_GPL(__blkg_release); | ||
659 | |||
636 | static void blkio_reset_stats_cpu(struct blkio_group *blkg) | 660 | static void blkio_reset_stats_cpu(struct blkio_group *blkg) |
637 | { | 661 | { |
638 | struct blkio_group_stats_cpu *stats_cpu; | 662 | struct blkio_group_stats_cpu *stats_cpu; |
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index 9537819c29c6..7da106843f01 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h | |||
@@ -177,6 +177,8 @@ struct blkio_group { | |||
177 | char path[128]; | 177 | char path[128]; |
178 | /* policy which owns this blk group */ | 178 | /* policy which owns this blk group */ |
179 | enum blkio_policy_id plid; | 179 | enum blkio_policy_id plid; |
180 | /* reference count */ | ||
181 | int refcnt; | ||
180 | 182 | ||
181 | /* Configuration */ | 183 | /* Configuration */ |
182 | struct blkio_group_conf conf; | 184 | struct blkio_group_conf conf; |
@@ -188,6 +190,8 @@ struct blkio_group { | |||
188 | struct blkio_group_stats_cpu __percpu *stats_cpu; | 190 | struct blkio_group_stats_cpu __percpu *stats_cpu; |
189 | 191 | ||
190 | struct blkg_policy_data *pd; | 192 | struct blkg_policy_data *pd; |
193 | |||
194 | struct rcu_head rcu_head; | ||
191 | }; | 195 | }; |
192 | 196 | ||
193 | typedef void (blkio_init_group_fn)(struct blkio_group *blkg); | 197 | typedef void (blkio_init_group_fn)(struct blkio_group *blkg); |
@@ -272,6 +276,35 @@ static inline char *blkg_path(struct blkio_group *blkg) | |||
272 | return blkg->path; | 276 | return blkg->path; |
273 | } | 277 | } |
274 | 278 | ||
279 | /** | ||
280 | * blkg_get - get a blkg reference | ||
281 | * @blkg: blkg to get | ||
282 | * | ||
283 | * The caller should be holding queue_lock and an existing reference. | ||
284 | */ | ||
285 | static inline void blkg_get(struct blkio_group *blkg) | ||
286 | { | ||
287 | lockdep_assert_held(blkg->q->queue_lock); | ||
288 | WARN_ON_ONCE(!blkg->refcnt); | ||
289 | blkg->refcnt++; | ||
290 | } | ||
291 | |||
292 | void __blkg_release(struct blkio_group *blkg); | ||
293 | |||
294 | /** | ||
295 | * blkg_put - put a blkg reference | ||
296 | * @blkg: blkg to put | ||
297 | * | ||
298 | * The caller should be holding queue_lock. | ||
299 | */ | ||
300 | static inline void blkg_put(struct blkio_group *blkg) | ||
301 | { | ||
302 | lockdep_assert_held(blkg->q->queue_lock); | ||
303 | WARN_ON_ONCE(blkg->refcnt <= 0); | ||
304 | if (!--blkg->refcnt) | ||
305 | __blkg_release(blkg); | ||
306 | } | ||
307 | |||
275 | #else | 308 | #else |
276 | 309 | ||
277 | struct blkio_group { | 310 | struct blkio_group { |
@@ -292,6 +325,8 @@ static inline void *blkg_to_pdata(struct blkio_group *blkg, | |||
292 | static inline struct blkio_group *pdata_to_blkg(void *pdata, | 325 | static inline struct blkio_group *pdata_to_blkg(void *pdata, |
293 | struct blkio_policy_type *pol) { return NULL; } | 326 | struct blkio_policy_type *pol) { return NULL; } |
294 | static inline char *blkg_path(struct blkio_group *blkg) { return NULL; } | 327 | static inline char *blkg_path(struct blkio_group *blkg) { return NULL; } |
328 | static inline void blkg_get(struct blkio_group *blkg) { } | ||
329 | static inline void blkg_put(struct blkio_group *blkg) { } | ||
295 | 330 | ||
296 | #endif | 331 | #endif |
297 | 332 | ||
diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 9c8a12477e13..153ba509446b 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c | |||
@@ -54,7 +54,6 @@ struct throtl_grp { | |||
54 | */ | 54 | */ |
55 | unsigned long disptime; | 55 | unsigned long disptime; |
56 | 56 | ||
57 | atomic_t ref; | ||
58 | unsigned int flags; | 57 | unsigned int flags; |
59 | 58 | ||
60 | /* Two lists for READ and WRITE */ | 59 | /* Two lists for READ and WRITE */ |
@@ -80,8 +79,6 @@ struct throtl_grp { | |||
80 | 79 | ||
81 | /* Some throttle limits got updated for the group */ | 80 | /* Some throttle limits got updated for the group */ |
82 | int limits_changed; | 81 | int limits_changed; |
83 | |||
84 | struct rcu_head rcu_head; | ||
85 | }; | 82 | }; |
86 | 83 | ||
87 | struct throtl_data | 84 | struct throtl_data |
@@ -151,45 +148,6 @@ static inline unsigned int total_nr_queued(struct throtl_data *td) | |||
151 | return td->nr_queued[0] + td->nr_queued[1]; | 148 | return td->nr_queued[0] + td->nr_queued[1]; |
152 | } | 149 | } |
153 | 150 | ||
154 | static inline struct throtl_grp *throtl_ref_get_tg(struct throtl_grp *tg) | ||
155 | { | ||
156 | atomic_inc(&tg->ref); | ||
157 | return tg; | ||
158 | } | ||
159 | |||
160 | static void throtl_free_tg(struct rcu_head *head) | ||
161 | { | ||
162 | struct throtl_grp *tg = container_of(head, struct throtl_grp, rcu_head); | ||
163 | struct blkio_group *blkg = tg_to_blkg(tg); | ||
164 | |||
165 | free_percpu(blkg->stats_cpu); | ||
166 | kfree(blkg->pd); | ||
167 | kfree(blkg); | ||
168 | } | ||
169 | |||
170 | static void throtl_put_tg(struct throtl_grp *tg) | ||
171 | { | ||
172 | struct blkio_group *blkg = tg_to_blkg(tg); | ||
173 | |||
174 | BUG_ON(atomic_read(&tg->ref) <= 0); | ||
175 | if (!atomic_dec_and_test(&tg->ref)) | ||
176 | return; | ||
177 | |||
178 | /* release the extra blkcg reference this blkg has been holding */ | ||
179 | css_put(&blkg->blkcg->css); | ||
180 | |||
181 | /* | ||
182 | * A group is freed in rcu manner. But having an rcu lock does not | ||
183 | * mean that one can access all the fields of blkg and assume these | ||
184 | * are valid. For example, don't try to follow throtl_data and | ||
185 | * request queue links. | ||
186 | * | ||
187 | * Having a reference to blkg under an rcu allows acess to only | ||
188 | * values local to groups like group stats and group rate limits | ||
189 | */ | ||
190 | call_rcu(&tg->rcu_head, throtl_free_tg); | ||
191 | } | ||
192 | |||
193 | static void throtl_init_blkio_group(struct blkio_group *blkg) | 151 | static void throtl_init_blkio_group(struct blkio_group *blkg) |
194 | { | 152 | { |
195 | struct throtl_grp *tg = blkg_to_tg(blkg); | 153 | struct throtl_grp *tg = blkg_to_tg(blkg); |
@@ -204,14 +162,6 @@ static void throtl_init_blkio_group(struct blkio_group *blkg) | |||
204 | tg->bps[WRITE] = -1; | 162 | tg->bps[WRITE] = -1; |
205 | tg->iops[READ] = -1; | 163 | tg->iops[READ] = -1; |
206 | tg->iops[WRITE] = -1; | 164 | tg->iops[WRITE] = -1; |
207 | |||
208 | /* | ||
209 | * Take the initial reference that will be released on destroy | ||
210 | * This can be thought of a joint reference by cgroup and | ||
211 | * request queue which will be dropped by either request queue | ||
212 | * exit or cgroup deletion path depending on who is exiting first. | ||
213 | */ | ||
214 | atomic_set(&tg->ref, 1); | ||
215 | } | 165 | } |
216 | 166 | ||
217 | static void throtl_link_blkio_group(struct request_queue *q, | 167 | static void throtl_link_blkio_group(struct request_queue *q, |
@@ -648,7 +598,7 @@ static void throtl_add_bio_tg(struct throtl_data *td, struct throtl_grp *tg, | |||
648 | 598 | ||
649 | bio_list_add(&tg->bio_lists[rw], bio); | 599 | bio_list_add(&tg->bio_lists[rw], bio); |
650 | /* Take a bio reference on tg */ | 600 | /* Take a bio reference on tg */ |
651 | throtl_ref_get_tg(tg); | 601 | blkg_get(tg_to_blkg(tg)); |
652 | tg->nr_queued[rw]++; | 602 | tg->nr_queued[rw]++; |
653 | td->nr_queued[rw]++; | 603 | td->nr_queued[rw]++; |
654 | throtl_enqueue_tg(td, tg); | 604 | throtl_enqueue_tg(td, tg); |
@@ -681,8 +631,8 @@ static void tg_dispatch_one_bio(struct throtl_data *td, struct throtl_grp *tg, | |||
681 | 631 | ||
682 | bio = bio_list_pop(&tg->bio_lists[rw]); | 632 | bio = bio_list_pop(&tg->bio_lists[rw]); |
683 | tg->nr_queued[rw]--; | 633 | tg->nr_queued[rw]--; |
684 | /* Drop bio reference on tg */ | 634 | /* Drop bio reference on blkg */ |
685 | throtl_put_tg(tg); | 635 | blkg_put(tg_to_blkg(tg)); |
686 | 636 | ||
687 | BUG_ON(td->nr_queued[rw] <= 0); | 637 | BUG_ON(td->nr_queued[rw] <= 0); |
688 | td->nr_queued[rw]--; | 638 | td->nr_queued[rw]--; |
@@ -880,7 +830,7 @@ throtl_destroy_tg(struct throtl_data *td, struct throtl_grp *tg) | |||
880 | * Put the reference taken at the time of creation so that when all | 830 | * Put the reference taken at the time of creation so that when all |
881 | * queues are gone, group can be destroyed. | 831 | * queues are gone, group can be destroyed. |
882 | */ | 832 | */ |
883 | throtl_put_tg(tg); | 833 | blkg_put(tg_to_blkg(tg)); |
884 | td->nr_undestroyed_grps--; | 834 | td->nr_undestroyed_grps--; |
885 | } | 835 | } |
886 | 836 | ||
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index c7449db52a86..86980023339a 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
@@ -210,7 +210,6 @@ struct cfq_group { | |||
210 | enum wl_prio_t saved_serving_prio; | 210 | enum wl_prio_t saved_serving_prio; |
211 | #ifdef CONFIG_CFQ_GROUP_IOSCHED | 211 | #ifdef CONFIG_CFQ_GROUP_IOSCHED |
212 | struct hlist_node cfqd_node; | 212 | struct hlist_node cfqd_node; |
213 | int ref; | ||
214 | #endif | 213 | #endif |
215 | /* number of requests that are on the dispatch list or inside driver */ | 214 | /* number of requests that are on the dispatch list or inside driver */ |
216 | int dispatched; | 215 | int dispatched; |
@@ -1071,14 +1070,6 @@ static void cfq_init_blkio_group(struct blkio_group *blkg) | |||
1071 | 1070 | ||
1072 | cfq_init_cfqg_base(cfqg); | 1071 | cfq_init_cfqg_base(cfqg); |
1073 | cfqg->weight = blkg->blkcg->weight; | 1072 | cfqg->weight = blkg->blkcg->weight; |
1074 | |||
1075 | /* | ||
1076 | * Take the initial reference that will be released on destroy | ||
1077 | * This can be thought of a joint reference by cgroup and | ||
1078 | * elevator which will be dropped by either elevator exit | ||
1079 | * or cgroup deletion path depending on who is exiting first. | ||
1080 | */ | ||
1081 | cfqg->ref = 1; | ||
1082 | } | 1073 | } |
1083 | 1074 | ||
1084 | /* | 1075 | /* |
@@ -1105,12 +1096,6 @@ static struct cfq_group *cfq_lookup_create_cfqg(struct cfq_data *cfqd, | |||
1105 | return cfqg; | 1096 | return cfqg; |
1106 | } | 1097 | } |
1107 | 1098 | ||
1108 | static inline struct cfq_group *cfq_ref_get_cfqg(struct cfq_group *cfqg) | ||
1109 | { | ||
1110 | cfqg->ref++; | ||
1111 | return cfqg; | ||
1112 | } | ||
1113 | |||
1114 | static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) | 1099 | static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) |
1115 | { | 1100 | { |
1116 | /* Currently, all async queues are mapped to root group */ | 1101 | /* Currently, all async queues are mapped to root group */ |
@@ -1119,28 +1104,7 @@ static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) | |||
1119 | 1104 | ||
1120 | cfqq->cfqg = cfqg; | 1105 | cfqq->cfqg = cfqg; |
1121 | /* cfqq reference on cfqg */ | 1106 | /* cfqq reference on cfqg */ |
1122 | cfqq->cfqg->ref++; | 1107 | blkg_get(cfqg_to_blkg(cfqg)); |
1123 | } | ||
1124 | |||
1125 | static void cfq_put_cfqg(struct cfq_group *cfqg) | ||
1126 | { | ||
1127 | struct blkio_group *blkg = cfqg_to_blkg(cfqg); | ||
1128 | struct cfq_rb_root *st; | ||
1129 | int i, j; | ||
1130 | |||
1131 | BUG_ON(cfqg->ref <= 0); | ||
1132 | cfqg->ref--; | ||
1133 | if (cfqg->ref) | ||
1134 | return; | ||
1135 | |||
1136 | /* release the extra blkcg reference this blkg has been holding */ | ||
1137 | css_put(&blkg->blkcg->css); | ||
1138 | |||
1139 | for_each_cfqg_st(cfqg, i, j, st) | ||
1140 | BUG_ON(!RB_EMPTY_ROOT(&st->rb)); | ||
1141 | free_percpu(blkg->stats_cpu); | ||
1142 | kfree(blkg->pd); | ||
1143 | kfree(blkg); | ||
1144 | } | 1108 | } |
1145 | 1109 | ||
1146 | static void cfq_destroy_cfqg(struct cfq_data *cfqd, struct cfq_group *cfqg) | 1110 | static void cfq_destroy_cfqg(struct cfq_data *cfqd, struct cfq_group *cfqg) |
@@ -1157,7 +1121,7 @@ static void cfq_destroy_cfqg(struct cfq_data *cfqd, struct cfq_group *cfqg) | |||
1157 | * Put the reference taken at the time of creation so that when all | 1121 | * Put the reference taken at the time of creation so that when all |
1158 | * queues are gone, group can be destroyed. | 1122 | * queues are gone, group can be destroyed. |
1159 | */ | 1123 | */ |
1160 | cfq_put_cfqg(cfqg); | 1124 | blkg_put(cfqg_to_blkg(cfqg)); |
1161 | } | 1125 | } |
1162 | 1126 | ||
1163 | static bool cfq_release_cfq_groups(struct cfq_data *cfqd) | 1127 | static bool cfq_release_cfq_groups(struct cfq_data *cfqd) |
@@ -1225,18 +1189,12 @@ static struct cfq_group *cfq_lookup_create_cfqg(struct cfq_data *cfqd, | |||
1225 | return cfqd->root_group; | 1189 | return cfqd->root_group; |
1226 | } | 1190 | } |
1227 | 1191 | ||
1228 | static inline struct cfq_group *cfq_ref_get_cfqg(struct cfq_group *cfqg) | ||
1229 | { | ||
1230 | return cfqg; | ||
1231 | } | ||
1232 | |||
1233 | static inline void | 1192 | static inline void |
1234 | cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) { | 1193 | cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) { |
1235 | cfqq->cfqg = cfqg; | 1194 | cfqq->cfqg = cfqg; |
1236 | } | 1195 | } |
1237 | 1196 | ||
1238 | static void cfq_release_cfq_groups(struct cfq_data *cfqd) {} | 1197 | static void cfq_release_cfq_groups(struct cfq_data *cfqd) {} |
1239 | static inline void cfq_put_cfqg(struct cfq_group *cfqg) {} | ||
1240 | 1198 | ||
1241 | #endif /* GROUP_IOSCHED */ | 1199 | #endif /* GROUP_IOSCHED */ |
1242 | 1200 | ||
@@ -2630,7 +2588,7 @@ static void cfq_put_queue(struct cfq_queue *cfqq) | |||
2630 | 2588 | ||
2631 | BUG_ON(cfq_cfqq_on_rr(cfqq)); | 2589 | BUG_ON(cfq_cfqq_on_rr(cfqq)); |
2632 | kmem_cache_free(cfq_pool, cfqq); | 2590 | kmem_cache_free(cfq_pool, cfqq); |
2633 | cfq_put_cfqg(cfqg); | 2591 | blkg_put(cfqg_to_blkg(cfqg)); |
2634 | } | 2592 | } |
2635 | 2593 | ||
2636 | static void cfq_put_cooperator(struct cfq_queue *cfqq) | 2594 | static void cfq_put_cooperator(struct cfq_queue *cfqq) |
@@ -3382,7 +3340,7 @@ static void cfq_put_request(struct request *rq) | |||
3382 | cfqq->allocated[rw]--; | 3340 | cfqq->allocated[rw]--; |
3383 | 3341 | ||
3384 | /* Put down rq reference on cfqg */ | 3342 | /* Put down rq reference on cfqg */ |
3385 | cfq_put_cfqg(RQ_CFQG(rq)); | 3343 | blkg_put(cfqg_to_blkg(RQ_CFQG(rq))); |
3386 | rq->elv.priv[0] = NULL; | 3344 | rq->elv.priv[0] = NULL; |
3387 | rq->elv.priv[1] = NULL; | 3345 | rq->elv.priv[1] = NULL; |
3388 | 3346 | ||
@@ -3477,8 +3435,9 @@ new_queue: | |||
3477 | cfqq->allocated[rw]++; | 3435 | cfqq->allocated[rw]++; |
3478 | 3436 | ||
3479 | cfqq->ref++; | 3437 | cfqq->ref++; |
3438 | blkg_get(cfqg_to_blkg(cfqq->cfqg)); | ||
3480 | rq->elv.priv[0] = cfqq; | 3439 | rq->elv.priv[0] = cfqq; |
3481 | rq->elv.priv[1] = cfq_ref_get_cfqg(cfqq->cfqg); | 3440 | rq->elv.priv[1] = cfqq->cfqg; |
3482 | spin_unlock_irq(q->queue_lock); | 3441 | spin_unlock_irq(q->queue_lock); |
3483 | return 0; | 3442 | return 0; |
3484 | } | 3443 | } |
@@ -3676,8 +3635,11 @@ static int cfq_init_queue(struct request_queue *q) | |||
3676 | */ | 3635 | */ |
3677 | cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0); | 3636 | cfq_init_cfqq(cfqd, &cfqd->oom_cfqq, 1, 0); |
3678 | cfqd->oom_cfqq.ref++; | 3637 | cfqd->oom_cfqq.ref++; |
3638 | |||
3639 | spin_lock_irq(q->queue_lock); | ||
3679 | cfq_link_cfqq_cfqg(&cfqd->oom_cfqq, cfqd->root_group); | 3640 | cfq_link_cfqq_cfqg(&cfqd->oom_cfqq, cfqd->root_group); |
3680 | cfq_put_cfqg(cfqd->root_group); | 3641 | blkg_put(cfqg_to_blkg(cfqd->root_group)); |
3642 | spin_unlock_irq(q->queue_lock); | ||
3681 | 3643 | ||
3682 | init_timer(&cfqd->idle_slice_timer); | 3644 | init_timer(&cfqd->idle_slice_timer); |
3683 | cfqd->idle_slice_timer.function = cfq_idle_slice_timer; | 3645 | cfqd->idle_slice_timer.function = cfq_idle_slice_timer; |