aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVivek Goyal <vgoyal@redhat.com>2009-12-03 12:59:47 -0500
committerJens Axboe <jens.axboe@oracle.com>2009-12-03 13:28:52 -0500
commitb1c3576961847da26c91b1e97f226bb66be5fa3f (patch)
treee228525220031232463f9cbbe017bad67807e6d4
parent25fb5169d4c9d4255107abbb7c08ab712434efc8 (diff)
blkio: Take care of cgroup deletion and cfq group reference counting
o One can choose to change elevator or delete a cgroup. Implement group reference counting so that both elevator exit and cgroup deletion can take place gracefully. Signed-off-by: Vivek Goyal <vgoyal@redhat.com> Signed-off-by: Nauman Rafique <nauman@google.com> Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
-rw-r--r--block/blk-cgroup.c66
-rw-r--r--block/blk-cgroup.h1
-rw-r--r--block/cfq-iosched.c95
3 files changed, 160 insertions, 2 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 4f6afd76ec59..0426ab692fd5 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -13,6 +13,8 @@
13#include <linux/ioprio.h> 13#include <linux/ioprio.h>
14#include "blk-cgroup.h" 14#include "blk-cgroup.h"
15 15
16extern void cfq_unlink_blkio_group(void *, struct blkio_group *);
17
16struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT }; 18struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };
17 19
18struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup) 20struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
@@ -28,14 +30,43 @@ void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
28 30
29 spin_lock_irqsave(&blkcg->lock, flags); 31 spin_lock_irqsave(&blkcg->lock, flags);
30 rcu_assign_pointer(blkg->key, key); 32 rcu_assign_pointer(blkg->key, key);
33 blkg->blkcg_id = css_id(&blkcg->css);
31 hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list); 34 hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
32 spin_unlock_irqrestore(&blkcg->lock, flags); 35 spin_unlock_irqrestore(&blkcg->lock, flags);
33} 36}
34 37
38static void __blkiocg_del_blkio_group(struct blkio_group *blkg)
39{
40 hlist_del_init_rcu(&blkg->blkcg_node);
41 blkg->blkcg_id = 0;
42}
43
44/*
45 * returns 0 if blkio_group was still on cgroup list. Otherwise returns 1
46 * indicating that blk_group was unhashed by the time we got to it.
47 */
35int blkiocg_del_blkio_group(struct blkio_group *blkg) 48int blkiocg_del_blkio_group(struct blkio_group *blkg)
36{ 49{
37 /* Implemented later */ 50 struct blkio_cgroup *blkcg;
38 return 0; 51 unsigned long flags;
52 struct cgroup_subsys_state *css;
53 int ret = 1;
54
55 rcu_read_lock();
56 css = css_lookup(&blkio_subsys, blkg->blkcg_id);
57 if (!css)
58 goto out;
59
60 blkcg = container_of(css, struct blkio_cgroup, css);
61 spin_lock_irqsave(&blkcg->lock, flags);
62 if (!hlist_unhashed(&blkg->blkcg_node)) {
63 __blkiocg_del_blkio_group(blkg);
64 ret = 0;
65 }
66 spin_unlock_irqrestore(&blkcg->lock, flags);
67out:
68 rcu_read_unlock();
69 return ret;
39} 70}
40 71
41/* called under rcu_read_lock(). */ 72/* called under rcu_read_lock(). */
@@ -97,8 +128,39 @@ static int blkiocg_populate(struct cgroup_subsys *subsys, struct cgroup *cgroup)
97static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup) 128static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup)
98{ 129{
99 struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup); 130 struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
131 unsigned long flags;
132 struct blkio_group *blkg;
133 void *key;
100 134
135 rcu_read_lock();
136remove_entry:
137 spin_lock_irqsave(&blkcg->lock, flags);
138
139 if (hlist_empty(&blkcg->blkg_list)) {
140 spin_unlock_irqrestore(&blkcg->lock, flags);
141 goto done;
142 }
143
144 blkg = hlist_entry(blkcg->blkg_list.first, struct blkio_group,
145 blkcg_node);
146 key = rcu_dereference(blkg->key);
147 __blkiocg_del_blkio_group(blkg);
148
149 spin_unlock_irqrestore(&blkcg->lock, flags);
150
151 /*
152 * This blkio_group is being unlinked as associated cgroup is going
153 * away. Let all the IO controlling policies know about this event.
154 *
155 * Currently this is static call to one io controlling policy. Once
156 * we have more policies in place, we need some dynamic registration
157 * of callback function.
158 */
159 cfq_unlink_blkio_group(key, blkg);
160 goto remove_entry;
161done:
101 free_css_id(&blkio_subsys, &blkcg->css); 162 free_css_id(&blkio_subsys, &blkcg->css);
163 rcu_read_unlock();
102 kfree(blkcg); 164 kfree(blkcg);
103} 165}
104 166
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index ba5703f69b42..cd50a2f8733e 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -26,6 +26,7 @@ struct blkio_group {
26 /* An rcu protected unique identifier for the group */ 26 /* An rcu protected unique identifier for the group */
27 void *key; 27 void *key;
28 struct hlist_node blkcg_node; 28 struct hlist_node blkcg_node;
29 unsigned short blkcg_id;
29}; 30};
30 31
31#define BLKIO_WEIGHT_MIN 100 32#define BLKIO_WEIGHT_MIN 100
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
index a877eeee80af..8bc31a50a57f 100644
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -192,6 +192,7 @@ struct cfq_group {
192 struct blkio_group blkg; 192 struct blkio_group blkg;
193#ifdef CONFIG_CFQ_GROUP_IOSCHED 193#ifdef CONFIG_CFQ_GROUP_IOSCHED
194 struct hlist_node cfqd_node; 194 struct hlist_node cfqd_node;
195 atomic_t ref;
195#endif 196#endif
196}; 197};
197 198
@@ -924,6 +925,14 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
924 *st = CFQ_RB_ROOT; 925 *st = CFQ_RB_ROOT;
925 RB_CLEAR_NODE(&cfqg->rb_node); 926 RB_CLEAR_NODE(&cfqg->rb_node);
926 927
928 /*
929 * Take the initial reference that will be released on destroy
930 * This can be thought of a joint reference by cgroup and
931 * elevator which will be dropped by either elevator exit
932 * or cgroup deletion path depending on who is exiting first.
933 */
934 atomic_set(&cfqg->ref, 1);
935
927 /* Add group onto cgroup list */ 936 /* Add group onto cgroup list */
928 blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd); 937 blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd);
929 938
@@ -960,7 +969,77 @@ static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg)
960 cfqg = &cfqq->cfqd->root_group; 969 cfqg = &cfqq->cfqd->root_group;
961 970
962 cfqq->cfqg = cfqg; 971 cfqq->cfqg = cfqg;
972 /* cfqq reference on cfqg */
973 atomic_inc(&cfqq->cfqg->ref);
974}
975
976static void cfq_put_cfqg(struct cfq_group *cfqg)
977{
978 struct cfq_rb_root *st;
979 int i, j;
980
981 BUG_ON(atomic_read(&cfqg->ref) <= 0);
982 if (!atomic_dec_and_test(&cfqg->ref))
983 return;
984 for_each_cfqg_st(cfqg, i, j, st)
985 BUG_ON(!RB_EMPTY_ROOT(&st->rb) || st->active != NULL);
986 kfree(cfqg);
987}
988
989static void cfq_destroy_cfqg(struct cfq_data *cfqd, struct cfq_group *cfqg)
990{
991 /* Something wrong if we are trying to remove same group twice */
992 BUG_ON(hlist_unhashed(&cfqg->cfqd_node));
993
994 hlist_del_init(&cfqg->cfqd_node);
995
996 /*
997 * Put the reference taken at the time of creation so that when all
998 * queues are gone, group can be destroyed.
999 */
1000 cfq_put_cfqg(cfqg);
1001}
1002
1003static void cfq_release_cfq_groups(struct cfq_data *cfqd)
1004{
1005 struct hlist_node *pos, *n;
1006 struct cfq_group *cfqg;
1007
1008 hlist_for_each_entry_safe(cfqg, pos, n, &cfqd->cfqg_list, cfqd_node) {
1009 /*
1010 * If cgroup removal path got to blk_group first and removed
1011 * it from cgroup list, then it will take care of destroying
1012 * cfqg also.
1013 */
1014 if (!blkiocg_del_blkio_group(&cfqg->blkg))
1015 cfq_destroy_cfqg(cfqd, cfqg);
1016 }
963} 1017}
1018
1019/*
1020 * Blk cgroup controller notification saying that blkio_group object is being
1021 * delinked as associated cgroup object is going away. That also means that
1022 * no new IO will come in this group. So get rid of this group as soon as
1023 * any pending IO in the group is finished.
1024 *
1025 * This function is called under rcu_read_lock(). key is the rcu protected
1026 * pointer. That means "key" is a valid cfq_data pointer as long as we are rcu
1027 * read lock.
1028 *
1029 * "key" was fetched from blkio_group under blkio_cgroup->lock. That means
1030 * it should not be NULL as even if elevator was exiting, cgroup deltion
1031 * path got to it first.
1032 */
1033void cfq_unlink_blkio_group(void *key, struct blkio_group *blkg)
1034{
1035 unsigned long flags;
1036 struct cfq_data *cfqd = key;
1037
1038 spin_lock_irqsave(cfqd->queue->queue_lock, flags);
1039 cfq_destroy_cfqg(cfqd, cfqg_of_blkg(blkg));
1040 spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
1041}
1042
964#else /* GROUP_IOSCHED */ 1043#else /* GROUP_IOSCHED */
965static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create) 1044static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create)
966{ 1045{
@@ -971,6 +1050,9 @@ cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) {
971 cfqq->cfqg = cfqg; 1050 cfqq->cfqg = cfqg;
972} 1051}
973 1052
1053static void cfq_release_cfq_groups(struct cfq_data *cfqd) {}
1054static inline void cfq_put_cfqg(struct cfq_group *cfqg) {}
1055
974#endif /* GROUP_IOSCHED */ 1056#endif /* GROUP_IOSCHED */
975 1057
976/* 1058/*
@@ -2172,11 +2254,13 @@ static int cfq_dispatch_requests(struct request_queue *q, int force)
2172 * task holds one reference to the queue, dropped when task exits. each rq 2254 * task holds one reference to the queue, dropped when task exits. each rq
2173 * in-flight on this queue also holds a reference, dropped when rq is freed. 2255 * in-flight on this queue also holds a reference, dropped when rq is freed.
2174 * 2256 *
2257 * Each cfq queue took a reference on the parent group. Drop it now.
2175 * queue lock must be held here. 2258 * queue lock must be held here.
2176 */ 2259 */
2177static void cfq_put_queue(struct cfq_queue *cfqq) 2260static void cfq_put_queue(struct cfq_queue *cfqq)
2178{ 2261{
2179 struct cfq_data *cfqd = cfqq->cfqd; 2262 struct cfq_data *cfqd = cfqq->cfqd;
2263 struct cfq_group *cfqg;
2180 2264
2181 BUG_ON(atomic_read(&cfqq->ref) <= 0); 2265 BUG_ON(atomic_read(&cfqq->ref) <= 0);
2182 2266
@@ -2186,6 +2270,7 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
2186 cfq_log_cfqq(cfqd, cfqq, "put_queue"); 2270 cfq_log_cfqq(cfqd, cfqq, "put_queue");
2187 BUG_ON(rb_first(&cfqq->sort_list)); 2271 BUG_ON(rb_first(&cfqq->sort_list));
2188 BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]); 2272 BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]);
2273 cfqg = cfqq->cfqg;
2189 2274
2190 if (unlikely(cfqd->active_queue == cfqq)) { 2275 if (unlikely(cfqd->active_queue == cfqq)) {
2191 __cfq_slice_expired(cfqd, cfqq, 0); 2276 __cfq_slice_expired(cfqd, cfqq, 0);
@@ -2194,6 +2279,7 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
2194 2279
2195 BUG_ON(cfq_cfqq_on_rr(cfqq)); 2280 BUG_ON(cfq_cfqq_on_rr(cfqq));
2196 kmem_cache_free(cfq_pool, cfqq); 2281 kmem_cache_free(cfq_pool, cfqq);
2282 cfq_put_cfqg(cfqg);
2197} 2283}
2198 2284
2199/* 2285/*
@@ -3369,11 +3455,15 @@ static void cfq_exit_queue(struct elevator_queue *e)
3369 } 3455 }
3370 3456
3371 cfq_put_async_queues(cfqd); 3457 cfq_put_async_queues(cfqd);
3458 cfq_release_cfq_groups(cfqd);
3459 blkiocg_del_blkio_group(&cfqd->root_group.blkg);
3372 3460
3373 spin_unlock_irq(q->queue_lock); 3461 spin_unlock_irq(q->queue_lock);
3374 3462
3375 cfq_shutdown_timer_wq(cfqd); 3463 cfq_shutdown_timer_wq(cfqd);
3376 3464
3465 /* Wait for cfqg->blkg->key accessors to exit their grace periods. */
3466 synchronize_rcu();
3377 kfree(cfqd); 3467 kfree(cfqd);
3378} 3468}
3379 3469
@@ -3401,6 +3491,11 @@ static void *cfq_init_queue(struct request_queue *q)
3401 cfqg->weight = 2*BLKIO_WEIGHT_DEFAULT; 3491 cfqg->weight = 2*BLKIO_WEIGHT_DEFAULT;
3402 3492
3403#ifdef CONFIG_CFQ_GROUP_IOSCHED 3493#ifdef CONFIG_CFQ_GROUP_IOSCHED
3494 /*
3495 * Take a reference to root group which we never drop. This is just
3496 * to make sure that cfq_put_cfqg() does not try to kfree root group
3497 */
3498 atomic_set(&cfqg->ref, 1);
3404 blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg, (void *)cfqd); 3499 blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg, (void *)cfqd);
3405#endif 3500#endif
3406 /* 3501 /*