diff options
author | Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> | 2009-01-07 21:08:29 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-01-08 11:31:09 -0500 |
commit | 7f4d454dee2e0bdd21bafd413d1c53e443a26540 (patch) | |
tree | abf54c2bd7c91fe09685e42b3a92d84679403058 /mm | |
parent | a5e924f5f8abf97944e625d74967cc9452cfbce8 (diff) |
memcg: avoid deadlock caused by race between oom and cpuset_attach
mpol_rebind_mm(), which can be called from cpuset_attach(), does
down_write(mm->mmap_sem). This means down_write(mm->mmap_sem) can be
called under cgroup_mutex.
OTOH, page fault path does down_read(mm->mmap_sem) and calls
mem_cgroup_try_charge_xxx(), which may eventually calls
mem_cgroup_out_of_memory(). And mem_cgroup_out_of_memory() calls
cgroup_lock(). This means cgroup_lock() can be called under
down_read(mm->mmap_sem).
If those two paths race, deadlock can happen.
This patch avoid this deadlock by:
- remove cgroup_lock() from mem_cgroup_out_of_memory().
- define new mutex (memcg_tasklist) and serialize mem_cgroup_move_task()
(->attach handler of memory cgroup) and mem_cgroup_out_of_memory.
Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp>
Reviewed-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Acked-by: Balbir Singh <balbir@linux.vnet.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/memcontrol.c | 5 | ||||
-rw-r--r-- | mm/oom_kill.c | 2 |
2 files changed, 5 insertions, 2 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 435f08dac8b..861037070f6 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -51,6 +51,7 @@ static int really_do_swap_account __initdata = 1; /* for remember boot option*/ | |||
51 | #define do_swap_account (0) | 51 | #define do_swap_account (0) |
52 | #endif | 52 | #endif |
53 | 53 | ||
54 | static DEFINE_MUTEX(memcg_tasklist); /* can be hold under cgroup_mutex */ | ||
54 | 55 | ||
55 | /* | 56 | /* |
56 | * Statistics for memory cgroup. | 57 | * Statistics for memory cgroup. |
@@ -827,7 +828,9 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, | |||
827 | 828 | ||
828 | if (!nr_retries--) { | 829 | if (!nr_retries--) { |
829 | if (oom) { | 830 | if (oom) { |
831 | mutex_lock(&memcg_tasklist); | ||
830 | mem_cgroup_out_of_memory(mem_over_limit, gfp_mask); | 832 | mem_cgroup_out_of_memory(mem_over_limit, gfp_mask); |
833 | mutex_unlock(&memcg_tasklist); | ||
831 | mem_over_limit->last_oom_jiffies = jiffies; | 834 | mem_over_limit->last_oom_jiffies = jiffies; |
832 | } | 835 | } |
833 | goto nomem; | 836 | goto nomem; |
@@ -2211,10 +2214,12 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss, | |||
2211 | struct cgroup *old_cont, | 2214 | struct cgroup *old_cont, |
2212 | struct task_struct *p) | 2215 | struct task_struct *p) |
2213 | { | 2216 | { |
2217 | mutex_lock(&memcg_tasklist); | ||
2214 | /* | 2218 | /* |
2215 | * FIXME: It's better to move charges of this process from old | 2219 | * FIXME: It's better to move charges of this process from old |
2216 | * memcg to new memcg. But it's just on TODO-List now. | 2220 | * memcg to new memcg. But it's just on TODO-List now. |
2217 | */ | 2221 | */ |
2222 | mutex_unlock(&memcg_tasklist); | ||
2218 | } | 2223 | } |
2219 | 2224 | ||
2220 | struct cgroup_subsys mem_cgroup_subsys = { | 2225 | struct cgroup_subsys mem_cgroup_subsys = { |
diff --git a/mm/oom_kill.c b/mm/oom_kill.c index fd150e3a256..40ba05061a4 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c | |||
@@ -429,7 +429,6 @@ void mem_cgroup_out_of_memory(struct mem_cgroup *mem, gfp_t gfp_mask) | |||
429 | unsigned long points = 0; | 429 | unsigned long points = 0; |
430 | struct task_struct *p; | 430 | struct task_struct *p; |
431 | 431 | ||
432 | cgroup_lock(); | ||
433 | read_lock(&tasklist_lock); | 432 | read_lock(&tasklist_lock); |
434 | retry: | 433 | retry: |
435 | p = select_bad_process(&points, mem); | 434 | p = select_bad_process(&points, mem); |
@@ -444,7 +443,6 @@ retry: | |||
444 | goto retry; | 443 | goto retry; |
445 | out: | 444 | out: |
446 | read_unlock(&tasklist_lock); | 445 | read_unlock(&tasklist_lock); |
447 | cgroup_unlock(); | ||
448 | } | 446 | } |
449 | #endif | 447 | #endif |
450 | 448 | ||