aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2012-03-05 16:15:00 -0500
committerJens Axboe <axboe@kernel.dk>2012-03-06 15:27:22 -0500
commit72e06c255181537d0b3e1f657a9ed81655d745b1 (patch)
treeeb656df2ad23a7709b4e9fe58f1dabdc32be9ae9
parent6ecf23afab13c39d3bb0e2d826d0984b0dd53733 (diff)
blkcg: shoot down blkio_groups on elevator switch
Elevator switch may involve changes to blkcg policies. Implement shoot down of blkio_groups. Combined with the previous bypass updates, the end goal is updating blkcg core such that it can ensure that blkcg's being affected become quiescent and don't have any per-blkg data hanging around before commencing any policy updates. Until queues are made aware of the policies that applies to them, as an interim step, all per-policy blkg data will be shot down. * blk-throtl doesn't need this change as it can't be disabled for a live queue; however, update it anyway as the scheduled blkg unification requires this behavior change. This means that blk-throtl configuration will be unnecessarily lost over elevator switch. This oddity will be removed after blkcg learns to associate individual policies with request_queues. * blk-throtl dosen't shoot down root_tg. This is to ease transition. Unified blkg will always have persistent root group and not shooting down root_tg for now eases transition to that point by avoiding having to update td->root_tg and is safe as blk-throtl can never be disabled -v2: Vivek pointed out that group list is not guaranteed to be empty on return from clear function if it raced cgroup removal and lost. Fix it by waiting a bit and retrying. This kludge will soon be removed once locking is updated such that blkg is never in limbo state between blkcg and request_queue locks. blk-throtl no longer shoots down root_tg to avoid breaking td->root_tg. Also, Nest queue_lock inside blkio_list_lock not the other way around to avoid introduce possible deadlock via blkcg lock. -v3: blkcg_clear_queue() repositioned and renamed to blkg_destroy_all() to increase consistency with later changes. cfq_clear_queue() updated to check q->elevator before dereferencing it to avoid NULL dereference on not fully initialized queues (used by later change). Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Vivek Goyal <vgoyal@redhat.com> Signed-off-by: Jens Axboe <axboe@kernel.dk>
-rw-r--r--block/blk-cgroup.c34
-rw-r--r--block/blk-cgroup.h5
-rw-r--r--block/blk-throttle.c27
-rw-r--r--block/cfq-iosched.c20
-rw-r--r--block/elevator.c3
5 files changed, 84 insertions, 5 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 970a717a056f..159aef59589f 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -17,8 +17,9 @@
17#include <linux/err.h> 17#include <linux/err.h>
18#include <linux/blkdev.h> 18#include <linux/blkdev.h>
19#include <linux/slab.h> 19#include <linux/slab.h>
20#include "blk-cgroup.h"
21#include <linux/genhd.h> 20#include <linux/genhd.h>
21#include <linux/delay.h>
22#include "blk-cgroup.h"
22 23
23#define MAX_KEY_LEN 100 24#define MAX_KEY_LEN 100
24 25
@@ -546,6 +547,37 @@ struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key)
546} 547}
547EXPORT_SYMBOL_GPL(blkiocg_lookup_group); 548EXPORT_SYMBOL_GPL(blkiocg_lookup_group);
548 549
550void blkg_destroy_all(struct request_queue *q)
551{
552 struct blkio_policy_type *pol;
553
554 while (true) {
555 bool done = true;
556
557 spin_lock(&blkio_list_lock);
558 spin_lock_irq(q->queue_lock);
559
560 /*
561 * clear_queue_fn() might return with non-empty group list
562 * if it raced cgroup removal and lost. cgroup removal is
563 * guaranteed to make forward progress and retrying after a
564 * while is enough. This ugliness is scheduled to be
565 * removed after locking update.
566 */
567 list_for_each_entry(pol, &blkio_list, list)
568 if (!pol->ops.blkio_clear_queue_fn(q))
569 done = false;
570
571 spin_unlock_irq(q->queue_lock);
572 spin_unlock(&blkio_list_lock);
573
574 if (done)
575 break;
576
577 msleep(10); /* just some random duration I like */
578 }
579}
580
549static void blkio_reset_stats_cpu(struct blkio_group *blkg) 581static void blkio_reset_stats_cpu(struct blkio_group *blkg)
550{ 582{
551 struct blkio_group_stats_cpu *stats_cpu; 583 struct blkio_group_stats_cpu *stats_cpu;
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 355168772f51..e5cfcbd4d2f4 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -203,7 +203,7 @@ extern unsigned int blkcg_get_write_iops(struct blkio_cgroup *blkcg,
203 dev_t dev); 203 dev_t dev);
204 204
205typedef void (blkio_unlink_group_fn) (void *key, struct blkio_group *blkg); 205typedef void (blkio_unlink_group_fn) (void *key, struct blkio_group *blkg);
206 206typedef bool (blkio_clear_queue_fn)(struct request_queue *q);
207typedef void (blkio_update_group_weight_fn) (void *key, 207typedef void (blkio_update_group_weight_fn) (void *key,
208 struct blkio_group *blkg, unsigned int weight); 208 struct blkio_group *blkg, unsigned int weight);
209typedef void (blkio_update_group_read_bps_fn) (void * key, 209typedef void (blkio_update_group_read_bps_fn) (void * key,
@@ -217,6 +217,7 @@ typedef void (blkio_update_group_write_iops_fn) (void *key,
217 217
218struct blkio_policy_ops { 218struct blkio_policy_ops {
219 blkio_unlink_group_fn *blkio_unlink_group_fn; 219 blkio_unlink_group_fn *blkio_unlink_group_fn;
220 blkio_clear_queue_fn *blkio_clear_queue_fn;
220 blkio_update_group_weight_fn *blkio_update_group_weight_fn; 221 blkio_update_group_weight_fn *blkio_update_group_weight_fn;
221 blkio_update_group_read_bps_fn *blkio_update_group_read_bps_fn; 222 blkio_update_group_read_bps_fn *blkio_update_group_read_bps_fn;
222 blkio_update_group_write_bps_fn *blkio_update_group_write_bps_fn; 223 blkio_update_group_write_bps_fn *blkio_update_group_write_bps_fn;
@@ -233,6 +234,7 @@ struct blkio_policy_type {
233/* Blkio controller policy registration */ 234/* Blkio controller policy registration */
234extern void blkio_policy_register(struct blkio_policy_type *); 235extern void blkio_policy_register(struct blkio_policy_type *);
235extern void blkio_policy_unregister(struct blkio_policy_type *); 236extern void blkio_policy_unregister(struct blkio_policy_type *);
237extern void blkg_destroy_all(struct request_queue *q);
236 238
237static inline char *blkg_path(struct blkio_group *blkg) 239static inline char *blkg_path(struct blkio_group *blkg)
238{ 240{
@@ -249,6 +251,7 @@ struct blkio_policy_type {
249 251
250static inline void blkio_policy_register(struct blkio_policy_type *blkiop) { } 252static inline void blkio_policy_register(struct blkio_policy_type *blkiop) { }
251static inline void blkio_policy_unregister(struct blkio_policy_type *blkiop) { } 253static inline void blkio_policy_unregister(struct blkio_policy_type *blkiop) { }
254static inline void blkg_destroy_all(struct request_queue *q) { }
252 255
253static inline char *blkg_path(struct blkio_group *blkg) { return NULL; } 256static inline char *blkg_path(struct blkio_group *blkg) { return NULL; }
254 257
diff --git a/block/blk-throttle.c b/block/blk-throttle.c
index 702c0e64e09f..3699ab40d494 100644
--- a/block/blk-throttle.c
+++ b/block/blk-throttle.c
@@ -989,12 +989,17 @@ throtl_destroy_tg(struct throtl_data *td, struct throtl_grp *tg)
989 td->nr_undestroyed_grps--; 989 td->nr_undestroyed_grps--;
990} 990}
991 991
992static void throtl_release_tgs(struct throtl_data *td) 992static bool throtl_release_tgs(struct throtl_data *td, bool release_root)
993{ 993{
994 struct hlist_node *pos, *n; 994 struct hlist_node *pos, *n;
995 struct throtl_grp *tg; 995 struct throtl_grp *tg;
996 bool empty = true;
996 997
997 hlist_for_each_entry_safe(tg, pos, n, &td->tg_list, tg_node) { 998 hlist_for_each_entry_safe(tg, pos, n, &td->tg_list, tg_node) {
999 /* skip root? */
1000 if (!release_root && tg == td->root_tg)
1001 continue;
1002
998 /* 1003 /*
999 * If cgroup removal path got to blk_group first and removed 1004 * If cgroup removal path got to blk_group first and removed
1000 * it from cgroup list, then it will take care of destroying 1005 * it from cgroup list, then it will take care of destroying
@@ -1002,7 +1007,10 @@ static void throtl_release_tgs(struct throtl_data *td)
1002 */ 1007 */
1003 if (!blkiocg_del_blkio_group(&tg->blkg)) 1008 if (!blkiocg_del_blkio_group(&tg->blkg))
1004 throtl_destroy_tg(td, tg); 1009 throtl_destroy_tg(td, tg);
1010 else
1011 empty = false;
1005 } 1012 }
1013 return empty;
1006} 1014}
1007 1015
1008/* 1016/*
@@ -1029,6 +1037,20 @@ void throtl_unlink_blkio_group(void *key, struct blkio_group *blkg)
1029 spin_unlock_irqrestore(td->queue->queue_lock, flags); 1037 spin_unlock_irqrestore(td->queue->queue_lock, flags);
1030} 1038}
1031 1039
1040static bool throtl_clear_queue(struct request_queue *q)
1041{
1042 lockdep_assert_held(q->queue_lock);
1043
1044 /*
1045 * Clear tgs but leave the root one alone. This is necessary
1046 * because root_tg is expected to be persistent and safe because
1047 * blk-throtl can never be disabled while @q is alive. This is a
1048 * kludge to prepare for unified blkg. This whole function will be
1049 * removed soon.
1050 */
1051 return throtl_release_tgs(q->td, false);
1052}
1053
1032static void throtl_update_blkio_group_common(struct throtl_data *td, 1054static void throtl_update_blkio_group_common(struct throtl_data *td,
1033 struct throtl_grp *tg) 1055 struct throtl_grp *tg)
1034{ 1056{
@@ -1097,6 +1119,7 @@ static void throtl_shutdown_wq(struct request_queue *q)
1097static struct blkio_policy_type blkio_policy_throtl = { 1119static struct blkio_policy_type blkio_policy_throtl = {
1098 .ops = { 1120 .ops = {
1099 .blkio_unlink_group_fn = throtl_unlink_blkio_group, 1121 .blkio_unlink_group_fn = throtl_unlink_blkio_group,
1122 .blkio_clear_queue_fn = throtl_clear_queue,
1100 .blkio_update_group_read_bps_fn = 1123 .blkio_update_group_read_bps_fn =
1101 throtl_update_blkio_group_read_bps, 1124 throtl_update_blkio_group_read_bps,
1102 .blkio_update_group_write_bps_fn = 1125 .blkio_update_group_write_bps_fn =
@@ -1282,7 +1305,7 @@ void blk_throtl_exit(struct request_queue *q)
1282 throtl_shutdown_wq(q); 1305 throtl_shutdown_wq(q);
1283 1306
1284 spin_lock_irq(q->queue_lock); 1307 spin_lock_irq(q->queue_lock);
1285 throtl_release_tgs(td); 1308 throtl_release_tgs(td, true);
1286 1309
1287 /* If there are other groups */ 1310 /* If there are other groups */
1288 if (td->nr_undestroyed_grps > 0) 1311 if (td->nr_undestroyed_grps > 0)
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index 72680a6715fc..61693d3404d0 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -1225,10 +1225,11 @@ static void cfq_destroy_cfqg(struct cfq_data *cfqd, struct cfq_group *cfqg)
1225 cfq_put_cfqg(cfqg); 1225 cfq_put_cfqg(cfqg);
1226} 1226}
1227 1227
1228static void cfq_release_cfq_groups(struct cfq_data *cfqd) 1228static bool cfq_release_cfq_groups(struct cfq_data *cfqd)
1229{ 1229{
1230 struct hlist_node *pos, *n; 1230 struct hlist_node *pos, *n;
1231 struct cfq_group *cfqg; 1231 struct cfq_group *cfqg;
1232 bool empty = true;
1232 1233
1233 hlist_for_each_entry_safe(cfqg, pos, n, &cfqd->cfqg_list, cfqd_node) { 1234 hlist_for_each_entry_safe(cfqg, pos, n, &cfqd->cfqg_list, cfqd_node) {
1234 /* 1235 /*
@@ -1238,7 +1239,10 @@ static void cfq_release_cfq_groups(struct cfq_data *cfqd)
1238 */ 1239 */
1239 if (!cfq_blkiocg_del_blkio_group(&cfqg->blkg)) 1240 if (!cfq_blkiocg_del_blkio_group(&cfqg->blkg))
1240 cfq_destroy_cfqg(cfqd, cfqg); 1241 cfq_destroy_cfqg(cfqd, cfqg);
1242 else
1243 empty = false;
1241 } 1244 }
1245 return empty;
1242} 1246}
1243 1247
1244/* 1248/*
@@ -1265,6 +1269,19 @@ static void cfq_unlink_blkio_group(void *key, struct blkio_group *blkg)
1265 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); 1269 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
1266} 1270}
1267 1271
1272static struct elevator_type iosched_cfq;
1273
1274static bool cfq_clear_queue(struct request_queue *q)
1275{
1276 lockdep_assert_held(q->queue_lock);
1277
1278 /* shoot down blkgs iff the current elevator is cfq */
1279 if (!q->elevator || q->elevator->type != &iosched_cfq)
1280 return true;
1281
1282 return cfq_release_cfq_groups(q->elevator->elevator_data);
1283}
1284
1268#else /* GROUP_IOSCHED */ 1285#else /* GROUP_IOSCHED */
1269static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd) 1286static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd)
1270{ 1287{
@@ -3875,6 +3892,7 @@ static struct elevator_type iosched_cfq = {
3875static struct blkio_policy_type blkio_policy_cfq = { 3892static struct blkio_policy_type blkio_policy_cfq = {
3876 .ops = { 3893 .ops = {
3877 .blkio_unlink_group_fn = cfq_unlink_blkio_group, 3894 .blkio_unlink_group_fn = cfq_unlink_blkio_group,
3895 .blkio_clear_queue_fn = cfq_clear_queue,
3878 .blkio_update_group_weight_fn = cfq_update_blkio_group_weight, 3896 .blkio_update_group_weight_fn = cfq_update_blkio_group_weight,
3879 }, 3897 },
3880 .plid = BLKIO_POLICY_PROP, 3898 .plid = BLKIO_POLICY_PROP,
diff --git a/block/elevator.c b/block/elevator.c
index 0bdea0ed03a3..8c7561fd2c79 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -38,6 +38,7 @@
38#include <trace/events/block.h> 38#include <trace/events/block.h>
39 39
40#include "blk.h" 40#include "blk.h"
41#include "blk-cgroup.h"
41 42
42static DEFINE_SPINLOCK(elv_list_lock); 43static DEFINE_SPINLOCK(elv_list_lock);
43static LIST_HEAD(elv_list); 44static LIST_HEAD(elv_list);
@@ -894,6 +895,8 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
894 ioc_clear_queue(q); 895 ioc_clear_queue(q);
895 spin_unlock_irq(q->queue_lock); 896 spin_unlock_irq(q->queue_lock);
896 897
898 blkg_destroy_all(q);
899
897 /* allocate, init and register new elevator */ 900 /* allocate, init and register new elevator */
898 err = -ENOMEM; 901 err = -ENOMEM;
899 q->elevator = elevator_alloc(q, new_e); 902 q->elevator = elevator_alloc(q, new_e);