aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2013-08-13 20:22:51 -0400
committerTejun Heo <tj@kernel.org>2013-08-13 20:22:51 -0400
commit0c21ead136a900c36f1ab74fd7d09a306dc31324 (patch)
tree38be4611e1f6ebd1c7ea0c9f61dd2c6aed30fc79 /kernel
parent3c14f8b44fafaa60519440bea1591e495b928327 (diff)
cgroup: RCU protect each cgroup_subsys_state release
With the planned unified hierarchy, individual css's will be created and destroyed dynamically across the lifetime of a cgroup. To enable such usages, css destruction is being decoupled from cgroup destruction. Most of the destruction path has been decoupled but the actual free of css still depends on cgroup free path. When all css refs are drained, css_release() kicks off css_free_work_fn() which puts the cgroup. When the cgroup refcnt reaches zero, cgroup_diput() is invoked which in turn schedules RCU free of the cgroup. After a grace period, all css's are freed along with the cgroup itself. This patch moves the RCU grace period and css freeing from cgroup release path to css release path. css_release(), instead of kicking off css_free_work_fn() directly, schedules RCU callback css_free_rcu_fn() which in turn kicks off css_free_work_fn() after a RCU grace period. css_free_work_fn() is updated to free the css directly. The five-way punting - percpu ref kill confirmation, a work item, percpu ref release, RCU grace period, and again a work item - is quite hairy but the work items are there only to provide process context and the actual sequence is kill confirm -> release -> RCU free, which isn't simple but not too crazy. This removes cgroup_css() usage after offline_css() allowing clearing cgroup->subsys[] from offline_css(), which makes it consistent with online_css() and brings it closer to proper lifetime management for individual css's. Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Li Zefan <lizefan@huawei.com>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c53
1 files changed, 37 insertions, 16 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 3137e38995b0..66d01078eebe 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -869,18 +869,8 @@ static struct cgroup_name *cgroup_alloc_name(struct dentry *dentry)
869static void cgroup_free_fn(struct work_struct *work) 869static void cgroup_free_fn(struct work_struct *work)
870{ 870{
871 struct cgroup *cgrp = container_of(work, struct cgroup, destroy_work); 871 struct cgroup *cgrp = container_of(work, struct cgroup, destroy_work);
872 struct cgroup_subsys *ss;
873 872
874 mutex_lock(&cgroup_mutex); 873 mutex_lock(&cgroup_mutex);
875 /*
876 * Release the subsystem state objects.
877 */
878 for_each_root_subsys(cgrp->root, ss) {
879 struct cgroup_subsys_state *css = cgroup_css(cgrp, ss->subsys_id);
880
881 ss->css_free(css);
882 }
883
884 cgrp->root->number_of_cgroups--; 874 cgrp->root->number_of_cgroups--;
885 mutex_unlock(&cgroup_mutex); 875 mutex_unlock(&cgroup_mutex);
886 876
@@ -4281,32 +4271,62 @@ err:
4281 return ret; 4271 return ret;
4282} 4272}
4283 4273
4274/*
4275 * css destruction is four-stage process.
4276 *
4277 * 1. Destruction starts. Killing of the percpu_ref is initiated.
4278 * Implemented in kill_css().
4279 *
4280 * 2. When the percpu_ref is confirmed to be visible as killed on all CPUs
4281 * and thus css_tryget() is guaranteed to fail, the css can be offlined
4282 * by invoking offline_css(). After offlining, the base ref is put.
4283 * Implemented in css_killed_work_fn().
4284 *
4285 * 3. When the percpu_ref reaches zero, the only possible remaining
4286 * accessors are inside RCU read sections. css_release() schedules the
4287 * RCU callback.
4288 *
4289 * 4. After the grace period, the css can be freed. Implemented in
4290 * css_free_work_fn().
4291 *
4292 * It is actually hairier because both step 2 and 4 require process context
4293 * and thus involve punting to css->destroy_work adding two additional
4294 * steps to the already complex sequence.
4295 */
4284static void css_free_work_fn(struct work_struct *work) 4296static void css_free_work_fn(struct work_struct *work)
4285{ 4297{
4286 struct cgroup_subsys_state *css = 4298 struct cgroup_subsys_state *css =
4287 container_of(work, struct cgroup_subsys_state, destroy_work); 4299 container_of(work, struct cgroup_subsys_state, destroy_work);
4300 struct cgroup *cgrp = css->cgroup;
4288 4301
4289 if (css->parent) 4302 if (css->parent)
4290 css_put(css->parent); 4303 css_put(css->parent);
4291 4304
4292 cgroup_dput(css->cgroup); 4305 css->ss->css_free(css);
4306 cgroup_dput(cgrp);
4293} 4307}
4294 4308
4295static void css_release(struct percpu_ref *ref) 4309static void css_free_rcu_fn(struct rcu_head *rcu_head)
4296{ 4310{
4297 struct cgroup_subsys_state *css = 4311 struct cgroup_subsys_state *css =
4298 container_of(ref, struct cgroup_subsys_state, refcnt); 4312 container_of(rcu_head, struct cgroup_subsys_state, rcu_head);
4299 4313
4300 /* 4314 /*
4301 * css holds an extra ref to @cgrp->dentry which is put on the last 4315 * css holds an extra ref to @cgrp->dentry which is put on the last
4302 * css_put(). dput() requires process context, which css_put() may 4316 * css_put(). dput() requires process context which we don't have.
4303 * be called without. @css->destroy_work will be used to invoke
4304 * dput() asynchronously from css_put().
4305 */ 4317 */
4306 INIT_WORK(&css->destroy_work, css_free_work_fn); 4318 INIT_WORK(&css->destroy_work, css_free_work_fn);
4307 schedule_work(&css->destroy_work); 4319 schedule_work(&css->destroy_work);
4308} 4320}
4309 4321
4322static void css_release(struct percpu_ref *ref)
4323{
4324 struct cgroup_subsys_state *css =
4325 container_of(ref, struct cgroup_subsys_state, refcnt);
4326
4327 call_rcu(&css->rcu_head, css_free_rcu_fn);
4328}
4329
4310static void init_css(struct cgroup_subsys_state *css, struct cgroup_subsys *ss, 4330static void init_css(struct cgroup_subsys_state *css, struct cgroup_subsys *ss,
4311 struct cgroup *cgrp) 4331 struct cgroup *cgrp)
4312{ 4332{
@@ -4356,6 +4376,7 @@ static void offline_css(struct cgroup_subsys_state *css)
4356 4376
4357 css->flags &= ~CSS_ONLINE; 4377 css->flags &= ~CSS_ONLINE;
4358 css->cgroup->nr_css--; 4378 css->cgroup->nr_css--;
4379 RCU_INIT_POINTER(css->cgroup->subsys[ss->subsys_id], css);
4359} 4380}
4360 4381
4361/* 4382/*