aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/cgroup.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/cgroup.c')
-rw-r--r--kernel/cgroup.c85
1 files changed, 63 insertions, 22 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 4c62513fe19f..bc1dcabe9217 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -90,6 +90,14 @@ static DEFINE_MUTEX(cgroup_mutex);
90static DEFINE_MUTEX(cgroup_root_mutex); 90static DEFINE_MUTEX(cgroup_root_mutex);
91 91
92/* 92/*
93 * cgroup destruction makes heavy use of work items and there can be a lot
94 * of concurrent destructions. Use a separate workqueue so that cgroup
95 * destruction work items don't end up filling up max_active of system_wq
96 * which may lead to deadlock.
97 */
98static struct workqueue_struct *cgroup_destroy_wq;
99
100/*
93 * Generate an array of cgroup subsystem pointers. At boot time, this is 101 * Generate an array of cgroup subsystem pointers. At boot time, this is
94 * populated with the built in subsystems, and modular subsystems are 102 * populated with the built in subsystems, and modular subsystems are
95 * registered after that. The mutable section of this array is protected by 103 * registered after that. The mutable section of this array is protected by
@@ -191,6 +199,7 @@ static void cgroup_destroy_css_killed(struct cgroup *cgrp);
191static int cgroup_destroy_locked(struct cgroup *cgrp); 199static int cgroup_destroy_locked(struct cgroup *cgrp);
192static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[], 200static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[],
193 bool is_add); 201 bool is_add);
202static int cgroup_file_release(struct inode *inode, struct file *file);
194 203
195/** 204/**
196 * cgroup_css - obtain a cgroup's css for the specified subsystem 205 * cgroup_css - obtain a cgroup's css for the specified subsystem
@@ -871,7 +880,7 @@ static void cgroup_free_rcu(struct rcu_head *head)
871 struct cgroup *cgrp = container_of(head, struct cgroup, rcu_head); 880 struct cgroup *cgrp = container_of(head, struct cgroup, rcu_head);
872 881
873 INIT_WORK(&cgrp->destroy_work, cgroup_free_fn); 882 INIT_WORK(&cgrp->destroy_work, cgroup_free_fn);
874 schedule_work(&cgrp->destroy_work); 883 queue_work(cgroup_destroy_wq, &cgrp->destroy_work);
875} 884}
876 885
877static void cgroup_diput(struct dentry *dentry, struct inode *inode) 886static void cgroup_diput(struct dentry *dentry, struct inode *inode)
@@ -881,6 +890,16 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)
881 struct cgroup *cgrp = dentry->d_fsdata; 890 struct cgroup *cgrp = dentry->d_fsdata;
882 891
883 BUG_ON(!(cgroup_is_dead(cgrp))); 892 BUG_ON(!(cgroup_is_dead(cgrp)));
893
894 /*
895 * XXX: cgrp->id is only used to look up css's. As cgroup
896 * and css's lifetimes will be decoupled, it should be made
897 * per-subsystem and moved to css->id so that lookups are
898 * successful until the target css is released.
899 */
900 idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
901 cgrp->id = -1;
902
884 call_rcu(&cgrp->rcu_head, cgroup_free_rcu); 903 call_rcu(&cgrp->rcu_head, cgroup_free_rcu);
885 } else { 904 } else {
886 struct cfent *cfe = __d_cfe(dentry); 905 struct cfent *cfe = __d_cfe(dentry);
@@ -2421,7 +2440,7 @@ static const struct file_operations cgroup_seqfile_operations = {
2421 .read = seq_read, 2440 .read = seq_read,
2422 .write = cgroup_file_write, 2441 .write = cgroup_file_write,
2423 .llseek = seq_lseek, 2442 .llseek = seq_lseek,
2424 .release = single_release, 2443 .release = cgroup_file_release,
2425}; 2444};
2426 2445
2427static int cgroup_file_open(struct inode *inode, struct file *file) 2446static int cgroup_file_open(struct inode *inode, struct file *file)
@@ -2482,6 +2501,8 @@ static int cgroup_file_release(struct inode *inode, struct file *file)
2482 ret = cft->release(inode, file); 2501 ret = cft->release(inode, file);
2483 if (css->ss) 2502 if (css->ss)
2484 css_put(css); 2503 css_put(css);
2504 if (file->f_op == &cgroup_seqfile_operations)
2505 single_release(inode, file);
2485 return ret; 2506 return ret;
2486} 2507}
2487 2508
@@ -4249,7 +4270,7 @@ static void css_free_rcu_fn(struct rcu_head *rcu_head)
4249 * css_put(). dput() requires process context which we don't have. 4270 * css_put(). dput() requires process context which we don't have.
4250 */ 4271 */
4251 INIT_WORK(&css->destroy_work, css_free_work_fn); 4272 INIT_WORK(&css->destroy_work, css_free_work_fn);
4252 schedule_work(&css->destroy_work); 4273 queue_work(cgroup_destroy_wq, &css->destroy_work);
4253} 4274}
4254 4275
4255static void css_release(struct percpu_ref *ref) 4276static void css_release(struct percpu_ref *ref)
@@ -4257,6 +4278,7 @@ static void css_release(struct percpu_ref *ref)
4257 struct cgroup_subsys_state *css = 4278 struct cgroup_subsys_state *css =
4258 container_of(ref, struct cgroup_subsys_state, refcnt); 4279 container_of(ref, struct cgroup_subsys_state, refcnt);
4259 4280
4281 rcu_assign_pointer(css->cgroup->subsys[css->ss->subsys_id], NULL);
4260 call_rcu(&css->rcu_head, css_free_rcu_fn); 4282 call_rcu(&css->rcu_head, css_free_rcu_fn);
4261} 4283}
4262 4284
@@ -4415,14 +4437,6 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
4415 list_add_tail_rcu(&cgrp->sibling, &cgrp->parent->children); 4437 list_add_tail_rcu(&cgrp->sibling, &cgrp->parent->children);
4416 root->number_of_cgroups++; 4438 root->number_of_cgroups++;
4417 4439
4418 /* each css holds a ref to the cgroup's dentry and the parent css */
4419 for_each_root_subsys(root, ss) {
4420 struct cgroup_subsys_state *css = css_ar[ss->subsys_id];
4421
4422 dget(dentry);
4423 css_get(css->parent);
4424 }
4425
4426 /* hold a ref to the parent's dentry */ 4440 /* hold a ref to the parent's dentry */
4427 dget(parent->dentry); 4441 dget(parent->dentry);
4428 4442
@@ -4434,6 +4448,13 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,
4434 if (err) 4448 if (err)
4435 goto err_destroy; 4449 goto err_destroy;
4436 4450
4451 /* each css holds a ref to the cgroup's dentry and parent css */
4452 dget(dentry);
4453 css_get(css->parent);
4454
4455 /* mark it consumed for error path */
4456 css_ar[ss->subsys_id] = NULL;
4457
4437 if (ss->broken_hierarchy && !ss->warned_broken_hierarchy && 4458 if (ss->broken_hierarchy && !ss->warned_broken_hierarchy &&
4438 parent->parent) { 4459 parent->parent) {
4439 pr_warning("cgroup: %s (%d) created nested cgroup for controller \"%s\" which has incomplete hierarchy support. Nested cgroups may change behavior in the future.\n", 4460 pr_warning("cgroup: %s (%d) created nested cgroup for controller \"%s\" which has incomplete hierarchy support. Nested cgroups may change behavior in the future.\n",
@@ -4480,6 +4501,14 @@ err_free_cgrp:
4480 return err; 4501 return err;
4481 4502
4482err_destroy: 4503err_destroy:
4504 for_each_root_subsys(root, ss) {
4505 struct cgroup_subsys_state *css = css_ar[ss->subsys_id];
4506
4507 if (css) {
4508 percpu_ref_cancel_init(&css->refcnt);
4509 ss->css_free(css);
4510 }
4511 }
4483 cgroup_destroy_locked(cgrp); 4512 cgroup_destroy_locked(cgrp);
4484 mutex_unlock(&cgroup_mutex); 4513 mutex_unlock(&cgroup_mutex);
4485 mutex_unlock(&dentry->d_inode->i_mutex); 4514 mutex_unlock(&dentry->d_inode->i_mutex);
@@ -4539,7 +4568,7 @@ static void css_killed_ref_fn(struct percpu_ref *ref)
4539 container_of(ref, struct cgroup_subsys_state, refcnt); 4568 container_of(ref, struct cgroup_subsys_state, refcnt);
4540 4569
4541 INIT_WORK(&css->destroy_work, css_killed_work_fn); 4570 INIT_WORK(&css->destroy_work, css_killed_work_fn);
4542 schedule_work(&css->destroy_work); 4571 queue_work(cgroup_destroy_wq, &css->destroy_work);
4543} 4572}
4544 4573
4545/** 4574/**
@@ -4641,8 +4670,12 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
4641 * will be invoked to perform the rest of destruction once the 4670 * will be invoked to perform the rest of destruction once the
4642 * percpu refs of all css's are confirmed to be killed. 4671 * percpu refs of all css's are confirmed to be killed.
4643 */ 4672 */
4644 for_each_root_subsys(cgrp->root, ss) 4673 for_each_root_subsys(cgrp->root, ss) {
4645 kill_css(cgroup_css(cgrp, ss)); 4674 struct cgroup_subsys_state *css = cgroup_css(cgrp, ss);
4675
4676 if (css)
4677 kill_css(css);
4678 }
4646 4679
4647 /* 4680 /*
4648 * Mark @cgrp dead. This prevents further task migration and child 4681 * Mark @cgrp dead. This prevents further task migration and child
@@ -4711,14 +4744,6 @@ static void cgroup_destroy_css_killed(struct cgroup *cgrp)
4711 /* delete this cgroup from parent->children */ 4744 /* delete this cgroup from parent->children */
4712 list_del_rcu(&cgrp->sibling); 4745 list_del_rcu(&cgrp->sibling);
4713 4746
4714 /*
4715 * We should remove the cgroup object from idr before its grace
4716 * period starts, so we won't be looking up a cgroup while the
4717 * cgroup is being freed.
4718 */
4719 idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
4720 cgrp->id = -1;
4721
4722 dput(d); 4747 dput(d);
4723 4748
4724 set_bit(CGRP_RELEASABLE, &parent->flags); 4749 set_bit(CGRP_RELEASABLE, &parent->flags);
@@ -5063,6 +5088,22 @@ out:
5063 return err; 5088 return err;
5064} 5089}
5065 5090
5091static int __init cgroup_wq_init(void)
5092{
5093 /*
5094 * There isn't much point in executing destruction path in
5095 * parallel. Good chunk is serialized with cgroup_mutex anyway.
5096 * Use 1 for @max_active.
5097 *
5098 * We would prefer to do this in cgroup_init() above, but that
5099 * is called before init_workqueues(): so leave this until after.
5100 */
5101 cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1);
5102 BUG_ON(!cgroup_destroy_wq);
5103 return 0;
5104}
5105core_initcall(cgroup_wq_init);
5106
5066/* 5107/*
5067 * proc_cgroup_show() 5108 * proc_cgroup_show()
5068 * - Print task's cgroup paths into seq_file, one line for each hierarchy 5109 * - Print task's cgroup paths into seq_file, one line for each hierarchy