aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichal Hocko <mhocko@suse.cz>2015-02-11 18:24:56 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2015-02-11 20:06:00 -0500
commit83363b917a2982dd509a5e2125e905b6873505a3 (patch)
treeb654d0a1ddd9fa54d94d0587864658d789e1df2e
parentd7a94e7e11badf8404d40b41e008c3131a3cebe3 (diff)
oom: make sure that TIF_MEMDIE is set under task_lock
OOM killer tries to exclude tasks which do not have mm_struct associated because killing such a task wouldn't help much. The OOM victim gets TIF_MEMDIE set to disable OOM killer while the current victim releases the memory and then enables the OOM killer again by dropping the flag. oom_kill_process is currently prone to a race condition when the OOM victim is already exiting and TIF_MEMDIE is set after the task releases its address space. This might theoretically lead to OOM livelock if the OOM victim blocks on an allocation later during exiting because it wouldn't kill any other process and the exiting one won't be able to exit. The situation is highly unlikely because the OOM victim is expected to release some memory which should help to sort out OOM situation. Fix this by checking task->mm and setting TIF_MEMDIE flag under task_lock which will serialize the OOM killer with exit_mm which sets task->mm to NULL. Setting the flag for current is not necessary because check and set is not racy. Reported-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> Signed-off-by: Michal Hocko <mhocko@suse.cz> Cc: David Rientjes <rientjes@google.com> Cc: Oleg Nesterov <oleg@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/oom_kill.c7
1 files changed, 5 insertions, 2 deletions
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index f82dd13cca68..294493a7ae4b 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -438,11 +438,14 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
438 * If the task is already exiting, don't alarm the sysadmin or kill 438 * If the task is already exiting, don't alarm the sysadmin or kill
439 * its children or threads, just set TIF_MEMDIE so it can die quickly 439 * its children or threads, just set TIF_MEMDIE so it can die quickly
440 */ 440 */
441 if (task_will_free_mem(p)) { 441 task_lock(p);
442 if (p->mm && task_will_free_mem(p)) {
442 set_tsk_thread_flag(p, TIF_MEMDIE); 443 set_tsk_thread_flag(p, TIF_MEMDIE);
444 task_unlock(p);
443 put_task_struct(p); 445 put_task_struct(p);
444 return; 446 return;
445 } 447 }
448 task_unlock(p);
446 449
447 if (__ratelimit(&oom_rs)) 450 if (__ratelimit(&oom_rs))
448 dump_header(p, gfp_mask, order, memcg, nodemask); 451 dump_header(p, gfp_mask, order, memcg, nodemask);
@@ -492,6 +495,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
492 495
493 /* mm cannot safely be dereferenced after task_unlock(victim) */ 496 /* mm cannot safely be dereferenced after task_unlock(victim) */
494 mm = victim->mm; 497 mm = victim->mm;
498 set_tsk_thread_flag(victim, TIF_MEMDIE);
495 pr_err("Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB\n", 499 pr_err("Killed process %d (%s) total-vm:%lukB, anon-rss:%lukB, file-rss:%lukB\n",
496 task_pid_nr(victim), victim->comm, K(victim->mm->total_vm), 500 task_pid_nr(victim), victim->comm, K(victim->mm->total_vm),
497 K(get_mm_counter(victim->mm, MM_ANONPAGES)), 501 K(get_mm_counter(victim->mm, MM_ANONPAGES)),
@@ -522,7 +526,6 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
522 } 526 }
523 rcu_read_unlock(); 527 rcu_read_unlock();
524 528
525 set_tsk_thread_flag(victim, TIF_MEMDIE);
526 do_send_sig_info(SIGKILL, SEND_SIG_FORCED, victim, true); 529 do_send_sig_info(SIGKILL, SEND_SIG_FORCED, victim, true);
527 put_task_struct(victim); 530 put_task_struct(victim);
528} 531}