aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2013-01-09 11:05:10 -0500
committerTejun Heo <tj@kernel.org>2013-01-09 11:05:10 -0500
commit3c547865902e9fc30dc15941f326fd8039c6628d (patch)
treee86d926e86258cfc69d6665e28551fa14267634e
parent93e6d5d8f5c909479623c6ab4427f038c6c3f63f (diff)
blkcg: make blkcg_gq's hierarchical
Currently a child blkg (blkcg_gq) can be created even if its parent doesn't exist. ie. Given a blkg, it's not guaranteed that its ancestors will exist. This makes it difficult to implement proper hierarchy support for blkcg policies. Always create blkgs recursively and make a child blkg hold a reference to its parent. blkg->parent is added so that finding the parent is easy. blkcg_parent() is also added in the process. This change can be visible to userland. e.g. while issuing IO in a nested cgroup didn't affect the ancestors at all, now it will initialize all ancestor blkgs and zero stats for the request_queue will always appear on them. While this is userland visible, this shouldn't cause any functional difference. Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Vivek Goyal <vgoyal@redhat.com>
-rw-r--r--block/blk-cgroup.c42
-rw-r--r--block/blk-cgroup.h18
2 files changed, 55 insertions, 5 deletions
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 18ae48083f4a..942f344fdfa7 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -201,7 +201,16 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
201 } 201 }
202 blkg = new_blkg; 202 blkg = new_blkg;
203 203
204 /* insert */ 204 /* link parent and insert */
205 if (blkcg_parent(blkcg)) {
206 blkg->parent = __blkg_lookup(blkcg_parent(blkcg), q, false);
207 if (WARN_ON_ONCE(!blkg->parent)) {
208 blkg = ERR_PTR(-EINVAL);
209 goto err_put_css;
210 }
211 blkg_get(blkg->parent);
212 }
213
205 spin_lock(&blkcg->lock); 214 spin_lock(&blkcg->lock);
206 ret = radix_tree_insert(&blkcg->blkg_tree, q->id, blkg); 215 ret = radix_tree_insert(&blkcg->blkg_tree, q->id, blkg);
207 if (likely(!ret)) { 216 if (likely(!ret)) {
@@ -213,6 +222,10 @@ static struct blkcg_gq *blkg_create(struct blkcg *blkcg,
213 if (!ret) 222 if (!ret)
214 return blkg; 223 return blkg;
215 224
225 /* @blkg failed fully initialized, use the usual release path */
226 blkg_put(blkg);
227 return ERR_PTR(ret);
228
216err_put_css: 229err_put_css:
217 css_put(&blkcg->css); 230 css_put(&blkcg->css);
218err_free_blkg: 231err_free_blkg:
@@ -226,8 +239,9 @@ err_free_blkg:
226 * @q: request_queue of interest 239 * @q: request_queue of interest
227 * 240 *
228 * Lookup blkg for the @blkcg - @q pair. If it doesn't exist, try to 241 * Lookup blkg for the @blkcg - @q pair. If it doesn't exist, try to
229 * create one. This function should be called under RCU read lock and 242 * create one. blkg creation is performed recursively from blkcg_root such
230 * @q->queue_lock. 243 * that all non-root blkg's have access to the parent blkg. This function
244 * should be called under RCU read lock and @q->queue_lock.
231 * 245 *
232 * Returns pointer to the looked up or created blkg on success, ERR_PTR() 246 * Returns pointer to the looked up or created blkg on success, ERR_PTR()
233 * value on error. If @q is dead, returns ERR_PTR(-EINVAL). If @q is not 247 * value on error. If @q is dead, returns ERR_PTR(-EINVAL). If @q is not
@@ -252,7 +266,23 @@ struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg,
252 if (blkg) 266 if (blkg)
253 return blkg; 267 return blkg;
254 268
255 return blkg_create(blkcg, q, NULL); 269 /*
270 * Create blkgs walking down from blkcg_root to @blkcg, so that all
271 * non-root blkgs have access to their parents.
272 */
273 while (true) {
274 struct blkcg *pos = blkcg;
275 struct blkcg *parent = blkcg_parent(blkcg);
276
277 while (parent && !__blkg_lookup(parent, q, false)) {
278 pos = parent;
279 parent = blkcg_parent(parent);
280 }
281
282 blkg = blkg_create(pos, q, NULL);
283 if (pos == blkcg || IS_ERR(blkg))
284 return blkg;
285 }
256} 286}
257EXPORT_SYMBOL_GPL(blkg_lookup_create); 287EXPORT_SYMBOL_GPL(blkg_lookup_create);
258 288
@@ -321,8 +351,10 @@ static void blkg_rcu_free(struct rcu_head *rcu_head)
321 351
322void __blkg_release(struct blkcg_gq *blkg) 352void __blkg_release(struct blkcg_gq *blkg)
323{ 353{
324 /* release the extra blkcg reference this blkg has been holding */ 354 /* release the blkcg and parent blkg refs this blkg has been holding */
325 css_put(&blkg->blkcg->css); 355 css_put(&blkg->blkcg->css);
356 if (blkg->parent)
357 blkg_put(blkg->parent);
326 358
327 /* 359 /*
328 * A group is freed in rcu manner. But having an rcu lock does not 360 * A group is freed in rcu manner. But having an rcu lock does not
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h
index 24597309e23d..b26ed58899fe 100644
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -94,8 +94,13 @@ struct blkcg_gq {
94 struct list_head q_node; 94 struct list_head q_node;
95 struct hlist_node blkcg_node; 95 struct hlist_node blkcg_node;
96 struct blkcg *blkcg; 96 struct blkcg *blkcg;
97
98 /* all non-root blkcg_gq's are guaranteed to have access to parent */
99 struct blkcg_gq *parent;
100
97 /* request allocation list for this blkcg-q pair */ 101 /* request allocation list for this blkcg-q pair */
98 struct request_list rl; 102 struct request_list rl;
103
99 /* reference count */ 104 /* reference count */
100 int refcnt; 105 int refcnt;
101 106
@@ -181,6 +186,19 @@ static inline struct blkcg *bio_blkcg(struct bio *bio)
181} 186}
182 187
183/** 188/**
189 * blkcg_parent - get the parent of a blkcg
190 * @blkcg: blkcg of interest
191 *
192 * Return the parent blkcg of @blkcg. Can be called anytime.
193 */
194static inline struct blkcg *blkcg_parent(struct blkcg *blkcg)
195{
196 struct cgroup *pcg = blkcg->css.cgroup->parent;
197
198 return pcg ? cgroup_to_blkcg(pcg) : NULL;
199}
200
201/**
184 * blkg_to_pdata - get policy private data 202 * blkg_to_pdata - get policy private data
185 * @blkg: blkg of interest 203 * @blkg: blkg of interest
186 * @pol: policy of interest 204 * @pol: policy of interest