aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-11-29 12:47:06 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2013-11-29 12:47:06 -0500
commit2855987d13d2de99eb337cae98f5656e93452617 (patch)
tree8b43b58d2778ff26c71a0a2c06cde9ca66149174 /kernel
parentb8495995dd8ad425ec1b78f7182586d5a004d8ec (diff)
parente605b36575e896edd8161534550c9ea021b03bc0 (diff)
Merge branch 'for-3.13-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup fixes from Tejun Heo: "Fixes for three issues. - cgroup destruction path could swamp system_wq possibly leading to deadlock. This actually seems to happen in the wild with memcg because memcg destruction path adds nested dependency on system_wq. Resolved by isolating cgroup destruction work items on its dedicated workqueue. - Possible locking context deadlock through seqcount reported by lockdep - Memory leak under certain conditions" * 'for-3.13-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: cgroup: fix cgroup_subsys_state leak for seq_files cpuset: Fix memory allocator deadlock cgroup: use a dedicated workqueue for cgroup destruction
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup.c35
-rw-r--r--kernel/cpuset.c8
2 files changed, 37 insertions, 6 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 4c62513fe19f..8b729c278b64 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -90,6 +90,14 @@ static DEFINE_MUTEX(cgroup_mutex);
90static DEFINE_MUTEX(cgroup_root_mutex); 90static DEFINE_MUTEX(cgroup_root_mutex);
91 91
92/* 92/*
93 * cgroup destruction makes heavy use of work items and there can be a lot
94 * of concurrent destructions. Use a separate workqueue so that cgroup
95 * destruction work items don't end up filling up max_active of system_wq
96 * which may lead to deadlock.
97 */
98static struct workqueue_struct *cgroup_destroy_wq;
99
100/*
93 * Generate an array of cgroup subsystem pointers. At boot time, this is 101 * Generate an array of cgroup subsystem pointers. At boot time, this is
94 * populated with the built in subsystems, and modular subsystems are 102 * populated with the built in subsystems, and modular subsystems are
95 * registered after that. The mutable section of this array is protected by 103 * registered after that. The mutable section of this array is protected by
@@ -191,6 +199,7 @@ static void cgroup_destroy_css_killed(struct cgroup *cgrp);
191static int cgroup_destroy_locked(struct cgroup *cgrp); 199static int cgroup_destroy_locked(struct cgroup *cgrp);
192static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[], 200static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[],
193 bool is_add); 201 bool is_add);
202static int cgroup_file_release(struct inode *inode, struct file *file);
194 203
195/** 204/**
196 * cgroup_css - obtain a cgroup's css for the specified subsystem 205 * cgroup_css - obtain a cgroup's css for the specified subsystem
@@ -871,7 +880,7 @@ static void cgroup_free_rcu(struct rcu_head *head)
871 struct cgroup *cgrp = container_of(head, struct cgroup, rcu_head); 880 struct cgroup *cgrp = container_of(head, struct cgroup, rcu_head);
872 881
873 INIT_WORK(&cgrp->destroy_work, cgroup_free_fn); 882 INIT_WORK(&cgrp->destroy_work, cgroup_free_fn);
874 schedule_work(&cgrp->destroy_work); 883 queue_work(cgroup_destroy_wq, &cgrp->destroy_work);
875} 884}
876 885
877static void cgroup_diput(struct dentry *dentry, struct inode *inode) 886static void cgroup_diput(struct dentry *dentry, struct inode *inode)
@@ -2421,7 +2430,7 @@ static const struct file_operations cgroup_seqfile_operations = {
2421 .read = seq_read, 2430 .read = seq_read,
2422 .write = cgroup_file_write, 2431 .write = cgroup_file_write,
2423 .llseek = seq_lseek, 2432 .llseek = seq_lseek,
2424 .release = single_release, 2433 .release = cgroup_file_release,
2425}; 2434};
2426 2435
2427static int cgroup_file_open(struct inode *inode, struct file *file) 2436static int cgroup_file_open(struct inode *inode, struct file *file)
@@ -2482,6 +2491,8 @@ static int cgroup_file_release(struct inode *inode, struct file *file)
2482 ret = cft->release(inode, file); 2491 ret = cft->release(inode, file);
2483 if (css->ss) 2492 if (css->ss)
2484 css_put(css); 2493 css_put(css);
2494 if (file->f_op == &cgroup_seqfile_operations)
2495 single_release(inode, file);
2485 return ret; 2496 return ret;
2486} 2497}
2487 2498
@@ -4249,7 +4260,7 @@ static void css_free_rcu_fn(struct rcu_head *rcu_head)
4249 * css_put(). dput() requires process context which we don't have. 4260 * css_put(). dput() requires process context which we don't have.
4250 */ 4261 */
4251 INIT_WORK(&css->destroy_work, css_free_work_fn); 4262 INIT_WORK(&css->destroy_work, css_free_work_fn);
4252 schedule_work(&css->destroy_work); 4263 queue_work(cgroup_destroy_wq, &css->destroy_work);
4253} 4264}
4254 4265
4255static void css_release(struct percpu_ref *ref) 4266static void css_release(struct percpu_ref *ref)
@@ -4539,7 +4550,7 @@ static void css_killed_ref_fn(struct percpu_ref *ref)
4539 container_of(ref, struct cgroup_subsys_state, refcnt); 4550 container_of(ref, struct cgroup_subsys_state, refcnt);
4540 4551
4541 INIT_WORK(&css->destroy_work, css_killed_work_fn); 4552 INIT_WORK(&css->destroy_work, css_killed_work_fn);
4542 schedule_work(&css->destroy_work); 4553 queue_work(cgroup_destroy_wq, &css->destroy_work);
4543} 4554}
4544 4555
4545/** 4556/**
@@ -5063,6 +5074,22 @@ out:
5063 return err; 5074 return err;
5064} 5075}
5065 5076
5077static int __init cgroup_wq_init(void)
5078{
5079 /*
5080 * There isn't much point in executing destruction path in
5081 * parallel. Good chunk is serialized with cgroup_mutex anyway.
5082 * Use 1 for @max_active.
5083 *
5084 * We would prefer to do this in cgroup_init() above, but that
5085 * is called before init_workqueues(): so leave this until after.
5086 */
5087 cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1);
5088 BUG_ON(!cgroup_destroy_wq);
5089 return 0;
5090}
5091core_initcall(cgroup_wq_init);
5092
5066/* 5093/*
5067 * proc_cgroup_show() 5094 * proc_cgroup_show()
5068 * - Print task's cgroup paths into seq_file, one line for each hierarchy 5095 * - Print task's cgroup paths into seq_file, one line for each hierarchy
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 6bf981e13c43..4772034b4b17 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1033,8 +1033,10 @@ static void cpuset_change_task_nodemask(struct task_struct *tsk,
1033 need_loop = task_has_mempolicy(tsk) || 1033 need_loop = task_has_mempolicy(tsk) ||
1034 !nodes_intersects(*newmems, tsk->mems_allowed); 1034 !nodes_intersects(*newmems, tsk->mems_allowed);
1035 1035
1036 if (need_loop) 1036 if (need_loop) {
1037 local_irq_disable();
1037 write_seqcount_begin(&tsk->mems_allowed_seq); 1038 write_seqcount_begin(&tsk->mems_allowed_seq);
1039 }
1038 1040
1039 nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems); 1041 nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems);
1040 mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1); 1042 mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1);
@@ -1042,8 +1044,10 @@ static void cpuset_change_task_nodemask(struct task_struct *tsk,
1042 mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP2); 1044 mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP2);
1043 tsk->mems_allowed = *newmems; 1045 tsk->mems_allowed = *newmems;
1044 1046
1045 if (need_loop) 1047 if (need_loop) {
1046 write_seqcount_end(&tsk->mems_allowed_seq); 1048 write_seqcount_end(&tsk->mems_allowed_seq);
1049 local_irq_enable();
1050 }
1047 1051
1048 task_unlock(tsk); 1052 task_unlock(tsk);
1049} 1053}