diff options
author | Tejun Heo <tj@kernel.org> | 2012-03-05 16:15:00 -0500 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2012-03-06 15:27:22 -0500 |
commit | 72e06c255181537d0b3e1f657a9ed81655d745b1 (patch) | |
tree | eb656df2ad23a7709b4e9fe58f1dabdc32be9ae9 | |
parent | 6ecf23afab13c39d3bb0e2d826d0984b0dd53733 (diff) |
blkcg: shoot down blkio_groups on elevator switch
Elevator switch may involve changes to blkcg policies. Implement
shoot down of blkio_groups.
Combined with the previous bypass updates, the end goal is updating
blkcg core such that it can ensure that blkcg's being affected become
quiescent and don't have any per-blkg data hanging around before
commencing any policy updates. Until queues are made aware of the
policies that applies to them, as an interim step, all per-policy blkg
data will be shot down.
* blk-throtl doesn't need this change as it can't be disabled for a
live queue; however, update it anyway as the scheduled blkg
unification requires this behavior change. This means that
blk-throtl configuration will be unnecessarily lost over elevator
switch. This oddity will be removed after blkcg learns to associate
individual policies with request_queues.
* blk-throtl dosen't shoot down root_tg. This is to ease transition.
Unified blkg will always have persistent root group and not shooting
down root_tg for now eases transition to that point by avoiding
having to update td->root_tg and is safe as blk-throtl can never be
disabled
-v2: Vivek pointed out that group list is not guaranteed to be empty
on return from clear function if it raced cgroup removal and
lost. Fix it by waiting a bit and retrying. This kludge will
soon be removed once locking is updated such that blkg is never
in limbo state between blkcg and request_queue locks.
blk-throtl no longer shoots down root_tg to avoid breaking
td->root_tg.
Also, Nest queue_lock inside blkio_list_lock not the other way
around to avoid introduce possible deadlock via blkcg lock.
-v3: blkcg_clear_queue() repositioned and renamed to
blkg_destroy_all() to increase consistency with later changes.
cfq_clear_queue() updated to check q->elevator before
dereferencing it to avoid NULL dereference on not fully
initialized queues (used by later change).
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r-- | block/blk-cgroup.c | 34 | ||||
-rw-r--r-- | block/blk-cgroup.h | 5 | ||||
-rw-r--r-- | block/blk-throttle.c | 27 | ||||
-rw-r--r-- | block/cfq-iosched.c | 20 | ||||
-rw-r--r-- | block/elevator.c | 3 |
5 files changed, 84 insertions, 5 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 970a717a056f..159aef59589f 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c | |||
@@ -17,8 +17,9 @@ | |||
17 | #include <linux/err.h> | 17 | #include <linux/err.h> |
18 | #include <linux/blkdev.h> | 18 | #include <linux/blkdev.h> |
19 | #include <linux/slab.h> | 19 | #include <linux/slab.h> |
20 | #include "blk-cgroup.h" | ||
21 | #include <linux/genhd.h> | 20 | #include <linux/genhd.h> |
21 | #include <linux/delay.h> | ||
22 | #include "blk-cgroup.h" | ||
22 | 23 | ||
23 | #define MAX_KEY_LEN 100 | 24 | #define MAX_KEY_LEN 100 |
24 | 25 | ||
@@ -546,6 +547,37 @@ struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) | |||
546 | } | 547 | } |
547 | EXPORT_SYMBOL_GPL(blkiocg_lookup_group); | 548 | EXPORT_SYMBOL_GPL(blkiocg_lookup_group); |
548 | 549 | ||
550 | void blkg_destroy_all(struct request_queue *q) | ||
551 | { | ||
552 | struct blkio_policy_type *pol; | ||
553 | |||
554 | while (true) { | ||
555 | bool done = true; | ||
556 | |||
557 | spin_lock(&blkio_list_lock); | ||
558 | spin_lock_irq(q->queue_lock); | ||
559 | |||
560 | /* | ||
561 | * clear_queue_fn() might return with non-empty group list | ||
562 | * if it raced cgroup removal and lost. cgroup removal is | ||
563 | * guaranteed to make forward progress and retrying after a | ||
564 | * while is enough. This ugliness is scheduled to be | ||
565 | * removed after locking update. | ||
566 | */ | ||
567 | list_for_each_entry(pol, &blkio_list, list) | ||
568 | if (!pol->ops.blkio_clear_queue_fn(q)) | ||
569 | done = false; | ||
570 | |||
571 | spin_unlock_irq(q->queue_lock); | ||
572 | spin_unlock(&blkio_list_lock); | ||
573 | |||
574 | if (done) | ||
575 | break; | ||
576 | |||
577 | msleep(10); /* just some random duration I like */ | ||
578 | } | ||
579 | } | ||
580 | |||
549 | static void blkio_reset_stats_cpu(struct blkio_group *blkg) | 581 | static void blkio_reset_stats_cpu(struct blkio_group *blkg) |
550 | { | 582 | { |
551 | struct blkio_group_stats_cpu *stats_cpu; | 583 | struct blkio_group_stats_cpu *stats_cpu; |
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index 355168772f51..e5cfcbd4d2f4 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h | |||
@@ -203,7 +203,7 @@ extern unsigned int blkcg_get_write_iops(struct blkio_cgroup *blkcg, | |||
203 | dev_t dev); | 203 | dev_t dev); |
204 | 204 | ||
205 | typedef void (blkio_unlink_group_fn) (void *key, struct blkio_group *blkg); | 205 | typedef void (blkio_unlink_group_fn) (void *key, struct blkio_group *blkg); |
206 | 206 | typedef bool (blkio_clear_queue_fn)(struct request_queue *q); | |
207 | typedef void (blkio_update_group_weight_fn) (void *key, | 207 | typedef void (blkio_update_group_weight_fn) (void *key, |
208 | struct blkio_group *blkg, unsigned int weight); | 208 | struct blkio_group *blkg, unsigned int weight); |
209 | typedef void (blkio_update_group_read_bps_fn) (void * key, | 209 | typedef void (blkio_update_group_read_bps_fn) (void * key, |
@@ -217,6 +217,7 @@ typedef void (blkio_update_group_write_iops_fn) (void *key, | |||
217 | 217 | ||
218 | struct blkio_policy_ops { | 218 | struct blkio_policy_ops { |
219 | blkio_unlink_group_fn *blkio_unlink_group_fn; | 219 | blkio_unlink_group_fn *blkio_unlink_group_fn; |
220 | blkio_clear_queue_fn *blkio_clear_queue_fn; | ||
220 | blkio_update_group_weight_fn *blkio_update_group_weight_fn; | 221 | blkio_update_group_weight_fn *blkio_update_group_weight_fn; |
221 | blkio_update_group_read_bps_fn *blkio_update_group_read_bps_fn; | 222 | blkio_update_group_read_bps_fn *blkio_update_group_read_bps_fn; |
222 | blkio_update_group_write_bps_fn *blkio_update_group_write_bps_fn; | 223 | blkio_update_group_write_bps_fn *blkio_update_group_write_bps_fn; |
@@ -233,6 +234,7 @@ struct blkio_policy_type { | |||
233 | /* Blkio controller policy registration */ | 234 | /* Blkio controller policy registration */ |
234 | extern void blkio_policy_register(struct blkio_policy_type *); | 235 | extern void blkio_policy_register(struct blkio_policy_type *); |
235 | extern void blkio_policy_unregister(struct blkio_policy_type *); | 236 | extern void blkio_policy_unregister(struct blkio_policy_type *); |
237 | extern void blkg_destroy_all(struct request_queue *q); | ||
236 | 238 | ||
237 | static inline char *blkg_path(struct blkio_group *blkg) | 239 | static inline char *blkg_path(struct blkio_group *blkg) |
238 | { | 240 | { |
@@ -249,6 +251,7 @@ struct blkio_policy_type { | |||
249 | 251 | ||
250 | static inline void blkio_policy_register(struct blkio_policy_type *blkiop) { } | 252 | static inline void blkio_policy_register(struct blkio_policy_type *blkiop) { } |
251 | static inline void blkio_policy_unregister(struct blkio_policy_type *blkiop) { } | 253 | static inline void blkio_policy_unregister(struct blkio_policy_type *blkiop) { } |
254 | static inline void blkg_destroy_all(struct request_queue *q) { } | ||
252 | 255 | ||
253 | static inline char *blkg_path(struct blkio_group *blkg) { return NULL; } | 256 | static inline char *blkg_path(struct blkio_group *blkg) { return NULL; } |
254 | 257 | ||
diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 702c0e64e09f..3699ab40d494 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c | |||
@@ -989,12 +989,17 @@ throtl_destroy_tg(struct throtl_data *td, struct throtl_grp *tg) | |||
989 | td->nr_undestroyed_grps--; | 989 | td->nr_undestroyed_grps--; |
990 | } | 990 | } |
991 | 991 | ||
992 | static void throtl_release_tgs(struct throtl_data *td) | 992 | static bool throtl_release_tgs(struct throtl_data *td, bool release_root) |
993 | { | 993 | { |
994 | struct hlist_node *pos, *n; | 994 | struct hlist_node *pos, *n; |
995 | struct throtl_grp *tg; | 995 | struct throtl_grp *tg; |
996 | bool empty = true; | ||
996 | 997 | ||
997 | hlist_for_each_entry_safe(tg, pos, n, &td->tg_list, tg_node) { | 998 | hlist_for_each_entry_safe(tg, pos, n, &td->tg_list, tg_node) { |
999 | /* skip root? */ | ||
1000 | if (!release_root && tg == td->root_tg) | ||
1001 | continue; | ||
1002 | |||
998 | /* | 1003 | /* |
999 | * If cgroup removal path got to blk_group first and removed | 1004 | * If cgroup removal path got to blk_group first and removed |
1000 | * it from cgroup list, then it will take care of destroying | 1005 | * it from cgroup list, then it will take care of destroying |
@@ -1002,7 +1007,10 @@ static void throtl_release_tgs(struct throtl_data *td) | |||
1002 | */ | 1007 | */ |
1003 | if (!blkiocg_del_blkio_group(&tg->blkg)) | 1008 | if (!blkiocg_del_blkio_group(&tg->blkg)) |
1004 | throtl_destroy_tg(td, tg); | 1009 | throtl_destroy_tg(td, tg); |
1010 | else | ||
1011 | empty = false; | ||
1005 | } | 1012 | } |
1013 | return empty; | ||
1006 | } | 1014 | } |
1007 | 1015 | ||
1008 | /* | 1016 | /* |
@@ -1029,6 +1037,20 @@ void throtl_unlink_blkio_group(void *key, struct blkio_group *blkg) | |||
1029 | spin_unlock_irqrestore(td->queue->queue_lock, flags); | 1037 | spin_unlock_irqrestore(td->queue->queue_lock, flags); |
1030 | } | 1038 | } |
1031 | 1039 | ||
1040 | static bool throtl_clear_queue(struct request_queue *q) | ||
1041 | { | ||
1042 | lockdep_assert_held(q->queue_lock); | ||
1043 | |||
1044 | /* | ||
1045 | * Clear tgs but leave the root one alone. This is necessary | ||
1046 | * because root_tg is expected to be persistent and safe because | ||
1047 | * blk-throtl can never be disabled while @q is alive. This is a | ||
1048 | * kludge to prepare for unified blkg. This whole function will be | ||
1049 | * removed soon. | ||
1050 | */ | ||
1051 | return throtl_release_tgs(q->td, false); | ||
1052 | } | ||
1053 | |||
1032 | static void throtl_update_blkio_group_common(struct throtl_data *td, | 1054 | static void throtl_update_blkio_group_common(struct throtl_data *td, |
1033 | struct throtl_grp *tg) | 1055 | struct throtl_grp *tg) |
1034 | { | 1056 | { |
@@ -1097,6 +1119,7 @@ static void throtl_shutdown_wq(struct request_queue *q) | |||
1097 | static struct blkio_policy_type blkio_policy_throtl = { | 1119 | static struct blkio_policy_type blkio_policy_throtl = { |
1098 | .ops = { | 1120 | .ops = { |
1099 | .blkio_unlink_group_fn = throtl_unlink_blkio_group, | 1121 | .blkio_unlink_group_fn = throtl_unlink_blkio_group, |
1122 | .blkio_clear_queue_fn = throtl_clear_queue, | ||
1100 | .blkio_update_group_read_bps_fn = | 1123 | .blkio_update_group_read_bps_fn = |
1101 | throtl_update_blkio_group_read_bps, | 1124 | throtl_update_blkio_group_read_bps, |
1102 | .blkio_update_group_write_bps_fn = | 1125 | .blkio_update_group_write_bps_fn = |
@@ -1282,7 +1305,7 @@ void blk_throtl_exit(struct request_queue *q) | |||
1282 | throtl_shutdown_wq(q); | 1305 | throtl_shutdown_wq(q); |
1283 | 1306 | ||
1284 | spin_lock_irq(q->queue_lock); | 1307 | spin_lock_irq(q->queue_lock); |
1285 | throtl_release_tgs(td); | 1308 | throtl_release_tgs(td, true); |
1286 | 1309 | ||
1287 | /* If there are other groups */ | 1310 | /* If there are other groups */ |
1288 | if (td->nr_undestroyed_grps > 0) | 1311 | if (td->nr_undestroyed_grps > 0) |
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 72680a6715fc..61693d3404d0 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
@@ -1225,10 +1225,11 @@ static void cfq_destroy_cfqg(struct cfq_data *cfqd, struct cfq_group *cfqg) | |||
1225 | cfq_put_cfqg(cfqg); | 1225 | cfq_put_cfqg(cfqg); |
1226 | } | 1226 | } |
1227 | 1227 | ||
1228 | static void cfq_release_cfq_groups(struct cfq_data *cfqd) | 1228 | static bool cfq_release_cfq_groups(struct cfq_data *cfqd) |
1229 | { | 1229 | { |
1230 | struct hlist_node *pos, *n; | 1230 | struct hlist_node *pos, *n; |
1231 | struct cfq_group *cfqg; | 1231 | struct cfq_group *cfqg; |
1232 | bool empty = true; | ||
1232 | 1233 | ||
1233 | hlist_for_each_entry_safe(cfqg, pos, n, &cfqd->cfqg_list, cfqd_node) { | 1234 | hlist_for_each_entry_safe(cfqg, pos, n, &cfqd->cfqg_list, cfqd_node) { |
1234 | /* | 1235 | /* |
@@ -1238,7 +1239,10 @@ static void cfq_release_cfq_groups(struct cfq_data *cfqd) | |||
1238 | */ | 1239 | */ |
1239 | if (!cfq_blkiocg_del_blkio_group(&cfqg->blkg)) | 1240 | if (!cfq_blkiocg_del_blkio_group(&cfqg->blkg)) |
1240 | cfq_destroy_cfqg(cfqd, cfqg); | 1241 | cfq_destroy_cfqg(cfqd, cfqg); |
1242 | else | ||
1243 | empty = false; | ||
1241 | } | 1244 | } |
1245 | return empty; | ||
1242 | } | 1246 | } |
1243 | 1247 | ||
1244 | /* | 1248 | /* |
@@ -1265,6 +1269,19 @@ static void cfq_unlink_blkio_group(void *key, struct blkio_group *blkg) | |||
1265 | spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); | 1269 | spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); |
1266 | } | 1270 | } |
1267 | 1271 | ||
1272 | static struct elevator_type iosched_cfq; | ||
1273 | |||
1274 | static bool cfq_clear_queue(struct request_queue *q) | ||
1275 | { | ||
1276 | lockdep_assert_held(q->queue_lock); | ||
1277 | |||
1278 | /* shoot down blkgs iff the current elevator is cfq */ | ||
1279 | if (!q->elevator || q->elevator->type != &iosched_cfq) | ||
1280 | return true; | ||
1281 | |||
1282 | return cfq_release_cfq_groups(q->elevator->elevator_data); | ||
1283 | } | ||
1284 | |||
1268 | #else /* GROUP_IOSCHED */ | 1285 | #else /* GROUP_IOSCHED */ |
1269 | static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd) | 1286 | static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd) |
1270 | { | 1287 | { |
@@ -3875,6 +3892,7 @@ static struct elevator_type iosched_cfq = { | |||
3875 | static struct blkio_policy_type blkio_policy_cfq = { | 3892 | static struct blkio_policy_type blkio_policy_cfq = { |
3876 | .ops = { | 3893 | .ops = { |
3877 | .blkio_unlink_group_fn = cfq_unlink_blkio_group, | 3894 | .blkio_unlink_group_fn = cfq_unlink_blkio_group, |
3895 | .blkio_clear_queue_fn = cfq_clear_queue, | ||
3878 | .blkio_update_group_weight_fn = cfq_update_blkio_group_weight, | 3896 | .blkio_update_group_weight_fn = cfq_update_blkio_group_weight, |
3879 | }, | 3897 | }, |
3880 | .plid = BLKIO_POLICY_PROP, | 3898 | .plid = BLKIO_POLICY_PROP, |
diff --git a/block/elevator.c b/block/elevator.c index 0bdea0ed03a3..8c7561fd2c79 100644 --- a/block/elevator.c +++ b/block/elevator.c | |||
@@ -38,6 +38,7 @@ | |||
38 | #include <trace/events/block.h> | 38 | #include <trace/events/block.h> |
39 | 39 | ||
40 | #include "blk.h" | 40 | #include "blk.h" |
41 | #include "blk-cgroup.h" | ||
41 | 42 | ||
42 | static DEFINE_SPINLOCK(elv_list_lock); | 43 | static DEFINE_SPINLOCK(elv_list_lock); |
43 | static LIST_HEAD(elv_list); | 44 | static LIST_HEAD(elv_list); |
@@ -894,6 +895,8 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) | |||
894 | ioc_clear_queue(q); | 895 | ioc_clear_queue(q); |
895 | spin_unlock_irq(q->queue_lock); | 896 | spin_unlock_irq(q->queue_lock); |
896 | 897 | ||
898 | blkg_destroy_all(q); | ||
899 | |||
897 | /* allocate, init and register new elevator */ | 900 | /* allocate, init and register new elevator */ |
898 | err = -ENOMEM; | 901 | err = -ENOMEM; |
899 | q->elevator = elevator_alloc(q, new_e); | 902 | q->elevator = elevator_alloc(q, new_e); |