diff options
author | Vivek Goyal <vgoyal@redhat.com> | 2009-12-03 12:59:47 -0500 |
---|---|---|
committer | Jens Axboe <jens.axboe@oracle.com> | 2009-12-03 13:28:52 -0500 |
commit | b1c3576961847da26c91b1e97f226bb66be5fa3f (patch) | |
tree | e228525220031232463f9cbbe017bad67807e6d4 | |
parent | 25fb5169d4c9d4255107abbb7c08ab712434efc8 (diff) |
blkio: Take care of cgroup deletion and cfq group reference counting
o One can choose to change elevator or delete a cgroup. Implement group
reference counting so that both elevator exit and cgroup deletion can
take place gracefully.
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
Signed-off-by: Nauman Rafique <nauman@google.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
-rw-r--r-- | block/blk-cgroup.c | 66 | ||||
-rw-r--r-- | block/blk-cgroup.h | 1 | ||||
-rw-r--r-- | block/cfq-iosched.c | 95 |
3 files changed, 160 insertions, 2 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 4f6afd76ec59..0426ab692fd5 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c | |||
@@ -13,6 +13,8 @@ | |||
13 | #include <linux/ioprio.h> | 13 | #include <linux/ioprio.h> |
14 | #include "blk-cgroup.h" | 14 | #include "blk-cgroup.h" |
15 | 15 | ||
16 | extern void cfq_unlink_blkio_group(void *, struct blkio_group *); | ||
17 | |||
16 | struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT }; | 18 | struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT }; |
17 | 19 | ||
18 | struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup) | 20 | struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup) |
@@ -28,14 +30,43 @@ void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg, | |||
28 | 30 | ||
29 | spin_lock_irqsave(&blkcg->lock, flags); | 31 | spin_lock_irqsave(&blkcg->lock, flags); |
30 | rcu_assign_pointer(blkg->key, key); | 32 | rcu_assign_pointer(blkg->key, key); |
33 | blkg->blkcg_id = css_id(&blkcg->css); | ||
31 | hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list); | 34 | hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list); |
32 | spin_unlock_irqrestore(&blkcg->lock, flags); | 35 | spin_unlock_irqrestore(&blkcg->lock, flags); |
33 | } | 36 | } |
34 | 37 | ||
38 | static void __blkiocg_del_blkio_group(struct blkio_group *blkg) | ||
39 | { | ||
40 | hlist_del_init_rcu(&blkg->blkcg_node); | ||
41 | blkg->blkcg_id = 0; | ||
42 | } | ||
43 | |||
44 | /* | ||
45 | * returns 0 if blkio_group was still on cgroup list. Otherwise returns 1 | ||
46 | * indicating that blk_group was unhashed by the time we got to it. | ||
47 | */ | ||
35 | int blkiocg_del_blkio_group(struct blkio_group *blkg) | 48 | int blkiocg_del_blkio_group(struct blkio_group *blkg) |
36 | { | 49 | { |
37 | /* Implemented later */ | 50 | struct blkio_cgroup *blkcg; |
38 | return 0; | 51 | unsigned long flags; |
52 | struct cgroup_subsys_state *css; | ||
53 | int ret = 1; | ||
54 | |||
55 | rcu_read_lock(); | ||
56 | css = css_lookup(&blkio_subsys, blkg->blkcg_id); | ||
57 | if (!css) | ||
58 | goto out; | ||
59 | |||
60 | blkcg = container_of(css, struct blkio_cgroup, css); | ||
61 | spin_lock_irqsave(&blkcg->lock, flags); | ||
62 | if (!hlist_unhashed(&blkg->blkcg_node)) { | ||
63 | __blkiocg_del_blkio_group(blkg); | ||
64 | ret = 0; | ||
65 | } | ||
66 | spin_unlock_irqrestore(&blkcg->lock, flags); | ||
67 | out: | ||
68 | rcu_read_unlock(); | ||
69 | return ret; | ||
39 | } | 70 | } |
40 | 71 | ||
41 | /* called under rcu_read_lock(). */ | 72 | /* called under rcu_read_lock(). */ |
@@ -97,8 +128,39 @@ static int blkiocg_populate(struct cgroup_subsys *subsys, struct cgroup *cgroup) | |||
97 | static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup) | 128 | static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup) |
98 | { | 129 | { |
99 | struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup); | 130 | struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup); |
131 | unsigned long flags; | ||
132 | struct blkio_group *blkg; | ||
133 | void *key; | ||
100 | 134 | ||
135 | rcu_read_lock(); | ||
136 | remove_entry: | ||
137 | spin_lock_irqsave(&blkcg->lock, flags); | ||
138 | |||
139 | if (hlist_empty(&blkcg->blkg_list)) { | ||
140 | spin_unlock_irqrestore(&blkcg->lock, flags); | ||
141 | goto done; | ||
142 | } | ||
143 | |||
144 | blkg = hlist_entry(blkcg->blkg_list.first, struct blkio_group, | ||
145 | blkcg_node); | ||
146 | key = rcu_dereference(blkg->key); | ||
147 | __blkiocg_del_blkio_group(blkg); | ||
148 | |||
149 | spin_unlock_irqrestore(&blkcg->lock, flags); | ||
150 | |||
151 | /* | ||
152 | * This blkio_group is being unlinked as associated cgroup is going | ||
153 | * away. Let all the IO controlling policies know about this event. | ||
154 | * | ||
155 | * Currently this is static call to one io controlling policy. Once | ||
156 | * we have more policies in place, we need some dynamic registration | ||
157 | * of callback function. | ||
158 | */ | ||
159 | cfq_unlink_blkio_group(key, blkg); | ||
160 | goto remove_entry; | ||
161 | done: | ||
101 | free_css_id(&blkio_subsys, &blkcg->css); | 162 | free_css_id(&blkio_subsys, &blkcg->css); |
163 | rcu_read_unlock(); | ||
102 | kfree(blkcg); | 164 | kfree(blkcg); |
103 | } | 165 | } |
104 | 166 | ||
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h index ba5703f69b42..cd50a2f8733e 100644 --- a/block/blk-cgroup.h +++ b/block/blk-cgroup.h | |||
@@ -26,6 +26,7 @@ struct blkio_group { | |||
26 | /* An rcu protected unique identifier for the group */ | 26 | /* An rcu protected unique identifier for the group */ |
27 | void *key; | 27 | void *key; |
28 | struct hlist_node blkcg_node; | 28 | struct hlist_node blkcg_node; |
29 | unsigned short blkcg_id; | ||
29 | }; | 30 | }; |
30 | 31 | ||
31 | #define BLKIO_WEIGHT_MIN 100 | 32 | #define BLKIO_WEIGHT_MIN 100 |
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index a877eeee80af..8bc31a50a57f 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c | |||
@@ -192,6 +192,7 @@ struct cfq_group { | |||
192 | struct blkio_group blkg; | 192 | struct blkio_group blkg; |
193 | #ifdef CONFIG_CFQ_GROUP_IOSCHED | 193 | #ifdef CONFIG_CFQ_GROUP_IOSCHED |
194 | struct hlist_node cfqd_node; | 194 | struct hlist_node cfqd_node; |
195 | atomic_t ref; | ||
195 | #endif | 196 | #endif |
196 | }; | 197 | }; |
197 | 198 | ||
@@ -924,6 +925,14 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create) | |||
924 | *st = CFQ_RB_ROOT; | 925 | *st = CFQ_RB_ROOT; |
925 | RB_CLEAR_NODE(&cfqg->rb_node); | 926 | RB_CLEAR_NODE(&cfqg->rb_node); |
926 | 927 | ||
928 | /* | ||
929 | * Take the initial reference that will be released on destroy | ||
930 | * This can be thought of a joint reference by cgroup and | ||
931 | * elevator which will be dropped by either elevator exit | ||
932 | * or cgroup deletion path depending on who is exiting first. | ||
933 | */ | ||
934 | atomic_set(&cfqg->ref, 1); | ||
935 | |||
927 | /* Add group onto cgroup list */ | 936 | /* Add group onto cgroup list */ |
928 | blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd); | 937 | blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd); |
929 | 938 | ||
@@ -960,7 +969,77 @@ static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) | |||
960 | cfqg = &cfqq->cfqd->root_group; | 969 | cfqg = &cfqq->cfqd->root_group; |
961 | 970 | ||
962 | cfqq->cfqg = cfqg; | 971 | cfqq->cfqg = cfqg; |
972 | /* cfqq reference on cfqg */ | ||
973 | atomic_inc(&cfqq->cfqg->ref); | ||
974 | } | ||
975 | |||
976 | static void cfq_put_cfqg(struct cfq_group *cfqg) | ||
977 | { | ||
978 | struct cfq_rb_root *st; | ||
979 | int i, j; | ||
980 | |||
981 | BUG_ON(atomic_read(&cfqg->ref) <= 0); | ||
982 | if (!atomic_dec_and_test(&cfqg->ref)) | ||
983 | return; | ||
984 | for_each_cfqg_st(cfqg, i, j, st) | ||
985 | BUG_ON(!RB_EMPTY_ROOT(&st->rb) || st->active != NULL); | ||
986 | kfree(cfqg); | ||
987 | } | ||
988 | |||
989 | static void cfq_destroy_cfqg(struct cfq_data *cfqd, struct cfq_group *cfqg) | ||
990 | { | ||
991 | /* Something wrong if we are trying to remove same group twice */ | ||
992 | BUG_ON(hlist_unhashed(&cfqg->cfqd_node)); | ||
993 | |||
994 | hlist_del_init(&cfqg->cfqd_node); | ||
995 | |||
996 | /* | ||
997 | * Put the reference taken at the time of creation so that when all | ||
998 | * queues are gone, group can be destroyed. | ||
999 | */ | ||
1000 | cfq_put_cfqg(cfqg); | ||
1001 | } | ||
1002 | |||
1003 | static void cfq_release_cfq_groups(struct cfq_data *cfqd) | ||
1004 | { | ||
1005 | struct hlist_node *pos, *n; | ||
1006 | struct cfq_group *cfqg; | ||
1007 | |||
1008 | hlist_for_each_entry_safe(cfqg, pos, n, &cfqd->cfqg_list, cfqd_node) { | ||
1009 | /* | ||
1010 | * If cgroup removal path got to blk_group first and removed | ||
1011 | * it from cgroup list, then it will take care of destroying | ||
1012 | * cfqg also. | ||
1013 | */ | ||
1014 | if (!blkiocg_del_blkio_group(&cfqg->blkg)) | ||
1015 | cfq_destroy_cfqg(cfqd, cfqg); | ||
1016 | } | ||
963 | } | 1017 | } |
1018 | |||
1019 | /* | ||
1020 | * Blk cgroup controller notification saying that blkio_group object is being | ||
1021 | * delinked as associated cgroup object is going away. That also means that | ||
1022 | * no new IO will come in this group. So get rid of this group as soon as | ||
1023 | * any pending IO in the group is finished. | ||
1024 | * | ||
1025 | * This function is called under rcu_read_lock(). key is the rcu protected | ||
1026 | * pointer. That means "key" is a valid cfq_data pointer as long as we are rcu | ||
1027 | * read lock. | ||
1028 | * | ||
1029 | * "key" was fetched from blkio_group under blkio_cgroup->lock. That means | ||
1030 | * it should not be NULL as even if elevator was exiting, cgroup deltion | ||
1031 | * path got to it first. | ||
1032 | */ | ||
1033 | void cfq_unlink_blkio_group(void *key, struct blkio_group *blkg) | ||
1034 | { | ||
1035 | unsigned long flags; | ||
1036 | struct cfq_data *cfqd = key; | ||
1037 | |||
1038 | spin_lock_irqsave(cfqd->queue->queue_lock, flags); | ||
1039 | cfq_destroy_cfqg(cfqd, cfqg_of_blkg(blkg)); | ||
1040 | spin_unlock_irqrestore(cfqd->queue->queue_lock, flags); | ||
1041 | } | ||
1042 | |||
964 | #else /* GROUP_IOSCHED */ | 1043 | #else /* GROUP_IOSCHED */ |
965 | static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create) | 1044 | static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create) |
966 | { | 1045 | { |
@@ -971,6 +1050,9 @@ cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) { | |||
971 | cfqq->cfqg = cfqg; | 1050 | cfqq->cfqg = cfqg; |
972 | } | 1051 | } |
973 | 1052 | ||
1053 | static void cfq_release_cfq_groups(struct cfq_data *cfqd) {} | ||
1054 | static inline void cfq_put_cfqg(struct cfq_group *cfqg) {} | ||
1055 | |||
974 | #endif /* GROUP_IOSCHED */ | 1056 | #endif /* GROUP_IOSCHED */ |
975 | 1057 | ||
976 | /* | 1058 | /* |
@@ -2172,11 +2254,13 @@ static int cfq_dispatch_requests(struct request_queue *q, int force) | |||
2172 | * task holds one reference to the queue, dropped when task exits. each rq | 2254 | * task holds one reference to the queue, dropped when task exits. each rq |
2173 | * in-flight on this queue also holds a reference, dropped when rq is freed. | 2255 | * in-flight on this queue also holds a reference, dropped when rq is freed. |
2174 | * | 2256 | * |
2257 | * Each cfq queue took a reference on the parent group. Drop it now. | ||
2175 | * queue lock must be held here. | 2258 | * queue lock must be held here. |
2176 | */ | 2259 | */ |
2177 | static void cfq_put_queue(struct cfq_queue *cfqq) | 2260 | static void cfq_put_queue(struct cfq_queue *cfqq) |
2178 | { | 2261 | { |
2179 | struct cfq_data *cfqd = cfqq->cfqd; | 2262 | struct cfq_data *cfqd = cfqq->cfqd; |
2263 | struct cfq_group *cfqg; | ||
2180 | 2264 | ||
2181 | BUG_ON(atomic_read(&cfqq->ref) <= 0); | 2265 | BUG_ON(atomic_read(&cfqq->ref) <= 0); |
2182 | 2266 | ||
@@ -2186,6 +2270,7 @@ static void cfq_put_queue(struct cfq_queue *cfqq) | |||
2186 | cfq_log_cfqq(cfqd, cfqq, "put_queue"); | 2270 | cfq_log_cfqq(cfqd, cfqq, "put_queue"); |
2187 | BUG_ON(rb_first(&cfqq->sort_list)); | 2271 | BUG_ON(rb_first(&cfqq->sort_list)); |
2188 | BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]); | 2272 | BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]); |
2273 | cfqg = cfqq->cfqg; | ||
2189 | 2274 | ||
2190 | if (unlikely(cfqd->active_queue == cfqq)) { | 2275 | if (unlikely(cfqd->active_queue == cfqq)) { |
2191 | __cfq_slice_expired(cfqd, cfqq, 0); | 2276 | __cfq_slice_expired(cfqd, cfqq, 0); |
@@ -2194,6 +2279,7 @@ static void cfq_put_queue(struct cfq_queue *cfqq) | |||
2194 | 2279 | ||
2195 | BUG_ON(cfq_cfqq_on_rr(cfqq)); | 2280 | BUG_ON(cfq_cfqq_on_rr(cfqq)); |
2196 | kmem_cache_free(cfq_pool, cfqq); | 2281 | kmem_cache_free(cfq_pool, cfqq); |
2282 | cfq_put_cfqg(cfqg); | ||
2197 | } | 2283 | } |
2198 | 2284 | ||
2199 | /* | 2285 | /* |
@@ -3369,11 +3455,15 @@ static void cfq_exit_queue(struct elevator_queue *e) | |||
3369 | } | 3455 | } |
3370 | 3456 | ||
3371 | cfq_put_async_queues(cfqd); | 3457 | cfq_put_async_queues(cfqd); |
3458 | cfq_release_cfq_groups(cfqd); | ||
3459 | blkiocg_del_blkio_group(&cfqd->root_group.blkg); | ||
3372 | 3460 | ||
3373 | spin_unlock_irq(q->queue_lock); | 3461 | spin_unlock_irq(q->queue_lock); |
3374 | 3462 | ||
3375 | cfq_shutdown_timer_wq(cfqd); | 3463 | cfq_shutdown_timer_wq(cfqd); |
3376 | 3464 | ||
3465 | /* Wait for cfqg->blkg->key accessors to exit their grace periods. */ | ||
3466 | synchronize_rcu(); | ||
3377 | kfree(cfqd); | 3467 | kfree(cfqd); |
3378 | } | 3468 | } |
3379 | 3469 | ||
@@ -3401,6 +3491,11 @@ static void *cfq_init_queue(struct request_queue *q) | |||
3401 | cfqg->weight = 2*BLKIO_WEIGHT_DEFAULT; | 3491 | cfqg->weight = 2*BLKIO_WEIGHT_DEFAULT; |
3402 | 3492 | ||
3403 | #ifdef CONFIG_CFQ_GROUP_IOSCHED | 3493 | #ifdef CONFIG_CFQ_GROUP_IOSCHED |
3494 | /* | ||
3495 | * Take a reference to root group which we never drop. This is just | ||
3496 | * to make sure that cfq_put_cfqg() does not try to kfree root group | ||
3497 | */ | ||
3498 | atomic_set(&cfqg->ref, 1); | ||
3404 | blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg, (void *)cfqd); | 3499 | blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg, (void *)cfqd); |
3405 | #endif | 3500 | #endif |
3406 | /* | 3501 | /* |