aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/exit.c
diff options
context:
space:
mode:
authorKAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>2011-06-15 18:08:43 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-06-15 23:04:01 -0400
commit733eda7ac316cd4e550fa096e4ed42356dc546e7 (patch)
tree6522f47608fb97314a7b615b1cb1d6e2c58952c8 /kernel/exit.c
parent37573e8c718277103f61f03741bdc5606d31b07e (diff)
memcg: clear mm->owner when last possible owner leaves
The following crash was reported: > Call Trace: > [<ffffffff81139792>] mem_cgroup_from_task+0x15/0x17 > [<ffffffff8113a75a>] __mem_cgroup_try_charge+0x148/0x4b4 > [<ffffffff810493f3>] ? need_resched+0x23/0x2d > [<ffffffff814cbf43>] ? preempt_schedule+0x46/0x4f > [<ffffffff8113afe8>] mem_cgroup_charge_common+0x9a/0xce > [<ffffffff8113b6d1>] mem_cgroup_newpage_charge+0x5d/0x5f > [<ffffffff81134024>] khugepaged+0x5da/0xfaf > [<ffffffff81078ea0>] ? __init_waitqueue_head+0x4b/0x4b > [<ffffffff81133a4a>] ? add_mm_counter.constprop.5+0x13/0x13 > [<ffffffff81078625>] kthread+0xa8/0xb0 > [<ffffffff814d13e8>] ? sub_preempt_count+0xa1/0xb4 > [<ffffffff814d5664>] kernel_thread_helper+0x4/0x10 > [<ffffffff814ce858>] ? retint_restore_args+0x13/0x13 > [<ffffffff8107857d>] ? __init_kthread_worker+0x5a/0x5a What happens is that khugepaged tries to charge a huge page against an mm whose last possible owner has already exited, and the memory controller crashes when the stale mm->owner is used to look up the cgroup to charge. mm->owner has never been set to NULL with the last owner going away, but nobody cared until khugepaged came along. Even then it wasn't a problem because the final mmput() on an mm was forced to acquire and release mmap_sem in write-mode, preventing an exiting owner to go away while the mmap_sem was held, and until "692e0b3 mm: thp: optimize memcg charge in khugepaged", the memory cgroup charge was protected by mmap_sem in read-mode. Instead of going back to relying on the mmap_sem to enforce lifetime of a task, this patch ensures that mm->owner is properly set to NULL when the last possible owner is exiting, which the memory controller can handle just fine. [akpm@linux-foundation.org: tweak comments] Signed-off-by: Hugh Dickins <hughd@google.com> Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Reported-by: Hugh Dickins <hughd@google.com> Reported-by: Dave Jones <davej@redhat.com> Reviewed-by: Andrea Arcangeli <aarcange@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel/exit.c')
-rw-r--r--kernel/exit.c31
1 files changed, 15 insertions, 16 deletions
diff --git a/kernel/exit.c b/kernel/exit.c
index 20a406471525..f2b321bae440 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -561,29 +561,28 @@ void exit_files(struct task_struct *tsk)
561 561
562#ifdef CONFIG_MM_OWNER 562#ifdef CONFIG_MM_OWNER
563/* 563/*
564 * Task p is exiting and it owned mm, lets find a new owner for it 564 * A task is exiting. If it owned this mm, find a new owner for the mm.
565 */ 565 */
566static inline int
567mm_need_new_owner(struct mm_struct *mm, struct task_struct *p)
568{
569 /*
570 * If there are other users of the mm and the owner (us) is exiting
571 * we need to find a new owner to take on the responsibility.
572 */
573 if (atomic_read(&mm->mm_users) <= 1)
574 return 0;
575 if (mm->owner != p)
576 return 0;
577 return 1;
578}
579
580void mm_update_next_owner(struct mm_struct *mm) 566void mm_update_next_owner(struct mm_struct *mm)
581{ 567{
582 struct task_struct *c, *g, *p = current; 568 struct task_struct *c, *g, *p = current;
583 569
584retry: 570retry:
585 if (!mm_need_new_owner(mm, p)) 571 /*
572 * If the exiting or execing task is not the owner, it's
573 * someone else's problem.
574 */
575 if (mm->owner != p)
586 return; 576 return;
577 /*
578 * The current owner is exiting/execing and there are no other
579 * candidates. Do not leave the mm pointing to a possibly
580 * freed task structure.
581 */
582 if (atomic_read(&mm->mm_users) <= 1) {
583 mm->owner = NULL;
584 return;
585 }
587 586
588 read_lock(&tasklist_lock); 587 read_lock(&tasklist_lock);
589 /* 588 /*