diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-11-29 12:47:06 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-11-29 12:47:06 -0500 |
commit | 2855987d13d2de99eb337cae98f5656e93452617 (patch) | |
tree | 8b43b58d2778ff26c71a0a2c06cde9ca66149174 /kernel | |
parent | b8495995dd8ad425ec1b78f7182586d5a004d8ec (diff) | |
parent | e605b36575e896edd8161534550c9ea021b03bc0 (diff) |
Merge branch 'for-3.13-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup
Pull cgroup fixes from Tejun Heo:
"Fixes for three issues.
- cgroup destruction path could swamp system_wq possibly leading to
deadlock. This actually seems to happen in the wild with memcg
because memcg destruction path adds nested dependency on system_wq.
Resolved by isolating cgroup destruction work items on its
dedicated workqueue.
- Possible locking context deadlock through seqcount reported by
lockdep
- Memory leak under certain conditions"
* 'for-3.13-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup:
cgroup: fix cgroup_subsys_state leak for seq_files
cpuset: Fix memory allocator deadlock
cgroup: use a dedicated workqueue for cgroup destruction
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cgroup.c | 35 | ||||
-rw-r--r-- | kernel/cpuset.c | 8 |
2 files changed, 37 insertions, 6 deletions
diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 4c62513fe19f..8b729c278b64 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c | |||
@@ -90,6 +90,14 @@ static DEFINE_MUTEX(cgroup_mutex); | |||
90 | static DEFINE_MUTEX(cgroup_root_mutex); | 90 | static DEFINE_MUTEX(cgroup_root_mutex); |
91 | 91 | ||
92 | /* | 92 | /* |
93 | * cgroup destruction makes heavy use of work items and there can be a lot | ||
94 | * of concurrent destructions. Use a separate workqueue so that cgroup | ||
95 | * destruction work items don't end up filling up max_active of system_wq | ||
96 | * which may lead to deadlock. | ||
97 | */ | ||
98 | static struct workqueue_struct *cgroup_destroy_wq; | ||
99 | |||
100 | /* | ||
93 | * Generate an array of cgroup subsystem pointers. At boot time, this is | 101 | * Generate an array of cgroup subsystem pointers. At boot time, this is |
94 | * populated with the built in subsystems, and modular subsystems are | 102 | * populated with the built in subsystems, and modular subsystems are |
95 | * registered after that. The mutable section of this array is protected by | 103 | * registered after that. The mutable section of this array is protected by |
@@ -191,6 +199,7 @@ static void cgroup_destroy_css_killed(struct cgroup *cgrp); | |||
191 | static int cgroup_destroy_locked(struct cgroup *cgrp); | 199 | static int cgroup_destroy_locked(struct cgroup *cgrp); |
192 | static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[], | 200 | static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[], |
193 | bool is_add); | 201 | bool is_add); |
202 | static int cgroup_file_release(struct inode *inode, struct file *file); | ||
194 | 203 | ||
195 | /** | 204 | /** |
196 | * cgroup_css - obtain a cgroup's css for the specified subsystem | 205 | * cgroup_css - obtain a cgroup's css for the specified subsystem |
@@ -871,7 +880,7 @@ static void cgroup_free_rcu(struct rcu_head *head) | |||
871 | struct cgroup *cgrp = container_of(head, struct cgroup, rcu_head); | 880 | struct cgroup *cgrp = container_of(head, struct cgroup, rcu_head); |
872 | 881 | ||
873 | INIT_WORK(&cgrp->destroy_work, cgroup_free_fn); | 882 | INIT_WORK(&cgrp->destroy_work, cgroup_free_fn); |
874 | schedule_work(&cgrp->destroy_work); | 883 | queue_work(cgroup_destroy_wq, &cgrp->destroy_work); |
875 | } | 884 | } |
876 | 885 | ||
877 | static void cgroup_diput(struct dentry *dentry, struct inode *inode) | 886 | static void cgroup_diput(struct dentry *dentry, struct inode *inode) |
@@ -2421,7 +2430,7 @@ static const struct file_operations cgroup_seqfile_operations = { | |||
2421 | .read = seq_read, | 2430 | .read = seq_read, |
2422 | .write = cgroup_file_write, | 2431 | .write = cgroup_file_write, |
2423 | .llseek = seq_lseek, | 2432 | .llseek = seq_lseek, |
2424 | .release = single_release, | 2433 | .release = cgroup_file_release, |
2425 | }; | 2434 | }; |
2426 | 2435 | ||
2427 | static int cgroup_file_open(struct inode *inode, struct file *file) | 2436 | static int cgroup_file_open(struct inode *inode, struct file *file) |
@@ -2482,6 +2491,8 @@ static int cgroup_file_release(struct inode *inode, struct file *file) | |||
2482 | ret = cft->release(inode, file); | 2491 | ret = cft->release(inode, file); |
2483 | if (css->ss) | 2492 | if (css->ss) |
2484 | css_put(css); | 2493 | css_put(css); |
2494 | if (file->f_op == &cgroup_seqfile_operations) | ||
2495 | single_release(inode, file); | ||
2485 | return ret; | 2496 | return ret; |
2486 | } | 2497 | } |
2487 | 2498 | ||
@@ -4249,7 +4260,7 @@ static void css_free_rcu_fn(struct rcu_head *rcu_head) | |||
4249 | * css_put(). dput() requires process context which we don't have. | 4260 | * css_put(). dput() requires process context which we don't have. |
4250 | */ | 4261 | */ |
4251 | INIT_WORK(&css->destroy_work, css_free_work_fn); | 4262 | INIT_WORK(&css->destroy_work, css_free_work_fn); |
4252 | schedule_work(&css->destroy_work); | 4263 | queue_work(cgroup_destroy_wq, &css->destroy_work); |
4253 | } | 4264 | } |
4254 | 4265 | ||
4255 | static void css_release(struct percpu_ref *ref) | 4266 | static void css_release(struct percpu_ref *ref) |
@@ -4539,7 +4550,7 @@ static void css_killed_ref_fn(struct percpu_ref *ref) | |||
4539 | container_of(ref, struct cgroup_subsys_state, refcnt); | 4550 | container_of(ref, struct cgroup_subsys_state, refcnt); |
4540 | 4551 | ||
4541 | INIT_WORK(&css->destroy_work, css_killed_work_fn); | 4552 | INIT_WORK(&css->destroy_work, css_killed_work_fn); |
4542 | schedule_work(&css->destroy_work); | 4553 | queue_work(cgroup_destroy_wq, &css->destroy_work); |
4543 | } | 4554 | } |
4544 | 4555 | ||
4545 | /** | 4556 | /** |
@@ -5063,6 +5074,22 @@ out: | |||
5063 | return err; | 5074 | return err; |
5064 | } | 5075 | } |
5065 | 5076 | ||
5077 | static int __init cgroup_wq_init(void) | ||
5078 | { | ||
5079 | /* | ||
5080 | * There isn't much point in executing destruction path in | ||
5081 | * parallel. Good chunk is serialized with cgroup_mutex anyway. | ||
5082 | * Use 1 for @max_active. | ||
5083 | * | ||
5084 | * We would prefer to do this in cgroup_init() above, but that | ||
5085 | * is called before init_workqueues(): so leave this until after. | ||
5086 | */ | ||
5087 | cgroup_destroy_wq = alloc_workqueue("cgroup_destroy", 0, 1); | ||
5088 | BUG_ON(!cgroup_destroy_wq); | ||
5089 | return 0; | ||
5090 | } | ||
5091 | core_initcall(cgroup_wq_init); | ||
5092 | |||
5066 | /* | 5093 | /* |
5067 | * proc_cgroup_show() | 5094 | * proc_cgroup_show() |
5068 | * - Print task's cgroup paths into seq_file, one line for each hierarchy | 5095 | * - Print task's cgroup paths into seq_file, one line for each hierarchy |
diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 6bf981e13c43..4772034b4b17 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c | |||
@@ -1033,8 +1033,10 @@ static void cpuset_change_task_nodemask(struct task_struct *tsk, | |||
1033 | need_loop = task_has_mempolicy(tsk) || | 1033 | need_loop = task_has_mempolicy(tsk) || |
1034 | !nodes_intersects(*newmems, tsk->mems_allowed); | 1034 | !nodes_intersects(*newmems, tsk->mems_allowed); |
1035 | 1035 | ||
1036 | if (need_loop) | 1036 | if (need_loop) { |
1037 | local_irq_disable(); | ||
1037 | write_seqcount_begin(&tsk->mems_allowed_seq); | 1038 | write_seqcount_begin(&tsk->mems_allowed_seq); |
1039 | } | ||
1038 | 1040 | ||
1039 | nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems); | 1041 | nodes_or(tsk->mems_allowed, tsk->mems_allowed, *newmems); |
1040 | mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1); | 1042 | mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP1); |
@@ -1042,8 +1044,10 @@ static void cpuset_change_task_nodemask(struct task_struct *tsk, | |||
1042 | mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP2); | 1044 | mpol_rebind_task(tsk, newmems, MPOL_REBIND_STEP2); |
1043 | tsk->mems_allowed = *newmems; | 1045 | tsk->mems_allowed = *newmems; |
1044 | 1046 | ||
1045 | if (need_loop) | 1047 | if (need_loop) { |
1046 | write_seqcount_end(&tsk->mems_allowed_seq); | 1048 | write_seqcount_end(&tsk->mems_allowed_seq); |
1049 | local_irq_enable(); | ||
1050 | } | ||
1047 | 1051 | ||
1048 | task_unlock(tsk); | 1052 | task_unlock(tsk); |
1049 | } | 1053 | } |