aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorKonstantin Khlebnikov <khlebnikov@openvz.org>2012-06-07 17:21:14 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-06-07 17:43:55 -0400
commit40af1bbdca47e5c8a2044039bb78ca8fd8b20f94 (patch)
tree70b5a21579f8b0137c09e54d523246d84be93430 /kernel
parent39caa0916ef27cf1da5026eb708a2b8413156f75 (diff)
mm: correctly synchronize rss-counters at exit/exec
mm->rss_stat counters have per-task delta: task->rss_stat. Before changing task->mm pointer the kernel must flush this delta with sync_mm_rss(). do_exit() already calls sync_mm_rss() to flush the rss-counters before committing the rss statistics into task->signal->maxrss, taskstats, audit and other stuff. Unfortunately the kernel does this before calling mm_release(), which can call put_user() for processing task->clear_child_tid. So at this point we can trigger page-faults and task->rss_stat becomes non-zero again. As a result mm->rss_stat becomes inconsistent and check_mm() will print something like this: | BUG: Bad rss-counter state mm:ffff88020813c380 idx:1 val:-1 | BUG: Bad rss-counter state mm:ffff88020813c380 idx:2 val:1 This patch moves sync_mm_rss() into mm_release(), and moves mm_release() out of do_exit() and calls it earlier. After mm_release() there should be no pagefaults. [akpm@linux-foundation.org: tweak comment] Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org> Reported-by: Markus Trippelsdorf <markus@trippelsdorf.de> Cc: Hugh Dickins <hughd@google.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Oleg Nesterov <oleg@redhat.com> Cc: <stable@vger.kernel.org> [3.4.x] Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/exit.c13
-rw-r--r--kernel/fork.c8
2 files changed, 16 insertions, 5 deletions
diff --git a/kernel/exit.c b/kernel/exit.c
index 34867cc5b42a..804fb6bb8161 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -423,6 +423,7 @@ void daemonize(const char *name, ...)
423 * user space pages. We don't need them, and if we didn't close them 423 * user space pages. We don't need them, and if we didn't close them
424 * they would be locked into memory. 424 * they would be locked into memory.
425 */ 425 */
426 mm_release(current, current->mm);
426 exit_mm(current); 427 exit_mm(current);
427 /* 428 /*
428 * We don't want to get frozen, in case system-wide hibernation 429 * We don't want to get frozen, in case system-wide hibernation
@@ -640,7 +641,6 @@ static void exit_mm(struct task_struct * tsk)
640 struct mm_struct *mm = tsk->mm; 641 struct mm_struct *mm = tsk->mm;
641 struct core_state *core_state; 642 struct core_state *core_state;
642 643
643 mm_release(tsk, mm);
644 if (!mm) 644 if (!mm)
645 return; 645 return;
646 /* 646 /*
@@ -960,9 +960,13 @@ void do_exit(long code)
960 preempt_count()); 960 preempt_count());
961 961
962 acct_update_integrals(tsk); 962 acct_update_integrals(tsk);
963 /* sync mm's RSS info before statistics gathering */ 963
964 if (tsk->mm) 964 /* Set exit_code before complete_vfork_done() in mm_release() */
965 sync_mm_rss(tsk->mm); 965 tsk->exit_code = code;
966
967 /* Release mm and sync mm's RSS info before statistics gathering */
968 mm_release(tsk, tsk->mm);
969
966 group_dead = atomic_dec_and_test(&tsk->signal->live); 970 group_dead = atomic_dec_and_test(&tsk->signal->live);
967 if (group_dead) { 971 if (group_dead) {
968 hrtimer_cancel(&tsk->signal->real_timer); 972 hrtimer_cancel(&tsk->signal->real_timer);
@@ -975,7 +979,6 @@ void do_exit(long code)
975 tty_audit_exit(); 979 tty_audit_exit();
976 audit_free(tsk); 980 audit_free(tsk);
977 981
978 tsk->exit_code = code;
979 taskstats_exit(tsk, group_dead); 982 taskstats_exit(tsk, group_dead);
980 983
981 exit_mm(tsk); 984 exit_mm(tsk);
diff --git a/kernel/fork.c b/kernel/fork.c
index ab5211b9e622..0560781c6904 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -619,6 +619,14 @@ void mmput(struct mm_struct *mm)
619 module_put(mm->binfmt->module); 619 module_put(mm->binfmt->module);
620 mmdrop(mm); 620 mmdrop(mm);
621 } 621 }
622
623 /*
624 * Final rss-counter synchronization. After this point there must be
625 * no pagefaults into this mm from the current context. Otherwise
626 * mm->rss_stat will be inconsistent.
627 */
628 if (mm)
629 sync_mm_rss(mm);
622} 630}
623EXPORT_SYMBOL_GPL(mmput); 631EXPORT_SYMBOL_GPL(mmput);
624 632