diff options
author | Konstantin Khlebnikov <khlebnikov@openvz.org> | 2012-06-07 17:21:14 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-06-07 17:43:55 -0400 |
commit | 40af1bbdca47e5c8a2044039bb78ca8fd8b20f94 (patch) | |
tree | 70b5a21579f8b0137c09e54d523246d84be93430 /kernel | |
parent | 39caa0916ef27cf1da5026eb708a2b8413156f75 (diff) |
mm: correctly synchronize rss-counters at exit/exec
mm->rss_stat counters have per-task delta: task->rss_stat. Before
changing task->mm pointer the kernel must flush this delta with
sync_mm_rss().
do_exit() already calls sync_mm_rss() to flush the rss-counters before
committing the rss statistics into task->signal->maxrss, taskstats,
audit and other stuff. Unfortunately the kernel does this before
calling mm_release(), which can call put_user() for processing
task->clear_child_tid. So at this point we can trigger page-faults and
task->rss_stat becomes non-zero again. As a result mm->rss_stat becomes
inconsistent and check_mm() will print something like this:
| BUG: Bad rss-counter state mm:ffff88020813c380 idx:1 val:-1
| BUG: Bad rss-counter state mm:ffff88020813c380 idx:2 val:1
This patch moves sync_mm_rss() into mm_release(), and moves mm_release()
out of do_exit() and calls it earlier. After mm_release() there should
be no pagefaults.
[akpm@linux-foundation.org: tweak comment]
Signed-off-by: Konstantin Khlebnikov <khlebnikov@openvz.org>
Reported-by: Markus Trippelsdorf <markus@trippelsdorf.de>
Cc: Hugh Dickins <hughd@google.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: <stable@vger.kernel.org> [3.4.x]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/exit.c | 13 | ||||
-rw-r--r-- | kernel/fork.c | 8 |
2 files changed, 16 insertions, 5 deletions
diff --git a/kernel/exit.c b/kernel/exit.c index 34867cc5b42a..804fb6bb8161 100644 --- a/kernel/exit.c +++ b/kernel/exit.c | |||
@@ -423,6 +423,7 @@ void daemonize(const char *name, ...) | |||
423 | * user space pages. We don't need them, and if we didn't close them | 423 | * user space pages. We don't need them, and if we didn't close them |
424 | * they would be locked into memory. | 424 | * they would be locked into memory. |
425 | */ | 425 | */ |
426 | mm_release(current, current->mm); | ||
426 | exit_mm(current); | 427 | exit_mm(current); |
427 | /* | 428 | /* |
428 | * We don't want to get frozen, in case system-wide hibernation | 429 | * We don't want to get frozen, in case system-wide hibernation |
@@ -640,7 +641,6 @@ static void exit_mm(struct task_struct * tsk) | |||
640 | struct mm_struct *mm = tsk->mm; | 641 | struct mm_struct *mm = tsk->mm; |
641 | struct core_state *core_state; | 642 | struct core_state *core_state; |
642 | 643 | ||
643 | mm_release(tsk, mm); | ||
644 | if (!mm) | 644 | if (!mm) |
645 | return; | 645 | return; |
646 | /* | 646 | /* |
@@ -960,9 +960,13 @@ void do_exit(long code) | |||
960 | preempt_count()); | 960 | preempt_count()); |
961 | 961 | ||
962 | acct_update_integrals(tsk); | 962 | acct_update_integrals(tsk); |
963 | /* sync mm's RSS info before statistics gathering */ | 963 | |
964 | if (tsk->mm) | 964 | /* Set exit_code before complete_vfork_done() in mm_release() */ |
965 | sync_mm_rss(tsk->mm); | 965 | tsk->exit_code = code; |
966 | |||
967 | /* Release mm and sync mm's RSS info before statistics gathering */ | ||
968 | mm_release(tsk, tsk->mm); | ||
969 | |||
966 | group_dead = atomic_dec_and_test(&tsk->signal->live); | 970 | group_dead = atomic_dec_and_test(&tsk->signal->live); |
967 | if (group_dead) { | 971 | if (group_dead) { |
968 | hrtimer_cancel(&tsk->signal->real_timer); | 972 | hrtimer_cancel(&tsk->signal->real_timer); |
@@ -975,7 +979,6 @@ void do_exit(long code) | |||
975 | tty_audit_exit(); | 979 | tty_audit_exit(); |
976 | audit_free(tsk); | 980 | audit_free(tsk); |
977 | 981 | ||
978 | tsk->exit_code = code; | ||
979 | taskstats_exit(tsk, group_dead); | 982 | taskstats_exit(tsk, group_dead); |
980 | 983 | ||
981 | exit_mm(tsk); | 984 | exit_mm(tsk); |
diff --git a/kernel/fork.c b/kernel/fork.c index ab5211b9e622..0560781c6904 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -619,6 +619,14 @@ void mmput(struct mm_struct *mm) | |||
619 | module_put(mm->binfmt->module); | 619 | module_put(mm->binfmt->module); |
620 | mmdrop(mm); | 620 | mmdrop(mm); |
621 | } | 621 | } |
622 | |||
623 | /* | ||
624 | * Final rss-counter synchronization. After this point there must be | ||
625 | * no pagefaults into this mm from the current context. Otherwise | ||
626 | * mm->rss_stat will be inconsistent. | ||
627 | */ | ||
628 | if (mm) | ||
629 | sync_mm_rss(mm); | ||
622 | } | 630 | } |
623 | EXPORT_SYMBOL_GPL(mmput); | 631 | EXPORT_SYMBOL_GPL(mmput); |
624 | 632 | ||