aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaisuke Nishimura <nishimura@mxp.nes.nec.co.jp>2009-01-07 21:08:29 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2009-01-08 11:31:09 -0500
commit7f4d454dee2e0bdd21bafd413d1c53e443a26540 (patch)
treeabf54c2bd7c91fe09685e42b3a92d84679403058
parenta5e924f5f8abf97944e625d74967cc9452cfbce8 (diff)
memcg: avoid deadlock caused by race between oom and cpuset_attach
mpol_rebind_mm(), which can be called from cpuset_attach(), does down_write(mm->mmap_sem). This means down_write(mm->mmap_sem) can be called under cgroup_mutex. OTOH, page fault path does down_read(mm->mmap_sem) and calls mem_cgroup_try_charge_xxx(), which may eventually calls mem_cgroup_out_of_memory(). And mem_cgroup_out_of_memory() calls cgroup_lock(). This means cgroup_lock() can be called under down_read(mm->mmap_sem). If those two paths race, deadlock can happen. This patch avoid this deadlock by: - remove cgroup_lock() from mem_cgroup_out_of_memory(). - define new mutex (memcg_tasklist) and serialize mem_cgroup_move_task() (->attach handler of memory cgroup) and mem_cgroup_out_of_memory. Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/memcontrol.c5
-rw-r--r--mm/oom_kill.c2
2 files changed, 5 insertions, 2 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 435f08dac8bf..861037070f66 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -51,6 +51,7 @@ static int really_do_swap_account __initdata = 1; /* for remember boot option*/
51#define do_swap_account (0) 51#define do_swap_account (0)
52#endif 52#endif
53 53
54static DEFINE_MUTEX(memcg_tasklist); /* can be hold under cgroup_mutex */
54 55
55/* 56/*
56 * Statistics for memory cgroup. 57 * Statistics for memory cgroup.
@@ -827,7 +828,9 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
827 828
828 if (!nr_retries--) { 829 if (!nr_retries--) {
829 if (oom) { 830 if (oom) {
831 mutex_lock(&memcg_tasklist);
830 mem_cgroup_out_of_memory(mem_over_limit, gfp_mask); 832 mem_cgroup_out_of_memory(mem_over_limit, gfp_mask);
833 mutex_unlock(&memcg_tasklist);
831 mem_over_limit->last_oom_jiffies = jiffies; 834 mem_over_limit->last_oom_jiffies = jiffies;
832 } 835 }
833 goto nomem; 836 goto nomem;
@@ -2211,10 +2214,12 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss,
2211 struct cgroup *old_cont, 2214 struct cgroup *old_cont,
2212 struct task_struct *p) 2215 struct task_struct *p)
2213{ 2216{
2217 mutex_lock(&memcg_tasklist);
2214 /* 2218 /*
2215 * FIXME: It's better to move charges of this process from old 2219 * FIXME: It's better to move charges of this process from old
2216 * memcg to new memcg. But it's just on TODO-List now. 2220 * memcg to new memcg. But it's just on TODO-List now.
2217 */ 2221 */
2222 mutex_unlock(&memcg_tasklist);
2218} 2223}
2219 2224
2220struct cgroup_subsys mem_cgroup_subsys = { 2225struct cgroup_subsys mem_cgroup_subsys = {
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index fd150e3a2567..40ba05061a4f 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -429,7 +429,6 @@ void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask)
429 unsigned long points = 0; 429 unsigned long points = 0;
430 struct task_struct *p; 430 struct task_struct *p;
431 431
432 cgroup_lock();
433 read_lock(&tasklist_lock); 432 read_lock(&tasklist_lock);
434retry: 433retry:
435 p = select_bad_process(&points, mem); 434 p = select_bad_process(&points, mem);
@@ -444,7 +443,6 @@ retry:
444 goto retry; 443 goto retry;
445out: 444out:
446 read_unlock(&tasklist_lock); 445 read_unlock(&tasklist_lock);
447 cgroup_unlock();
448} 446}
449#endif 447#endif
450 448