aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorMichal Hocko <mhocko@suse.com>2016-05-27 17:27:35 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-05-27 17:49:37 -0400
commite2fe14564d3316d1625ed20bf1083995f4960893 (patch)
tree6839d455f54a74288b13b1d95c3f39594a6e1638 /mm
parentf65e91df25aa426289cbcb580ca3183e24979fb1 (diff)
oom_reaper: close race with exiting task
Tetsuo has reported: Out of memory: Kill process 443 (oleg's-test) score 855 or sacrifice child Killed process 443 (oleg's-test) total-vm:493248kB, anon-rss:423880kB, file-rss:4kB, shmem-rss:0kB sh invoked oom-killer: gfp_mask=0x24201ca(GFP_HIGHUSER_MOVABLE|__GFP_COLD), order=0, oom_score_adj=0 sh cpuset=/ mems_allowed=0 CPU: 2 PID: 1 Comm: sh Not tainted 4.6.0-rc7+ #51 Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 07/31/2013 Call Trace: dump_stack+0x85/0xc8 dump_header+0x5b/0x394 oom_reaper: reaped process 443 (oleg's-test), now anon-rss:0kB, file-rss:0kB, shmem-rss:0kB In other words: __oom_reap_task exit_mm atomic_inc_not_zero tsk->mm = NULL mmput atomic_dec_and_test # > 0 exit_oom_victim # New victim will be # selected <OOM killer invoked> # no TIF_MEMDIE task so we can select a new one unmap_page_range # to release the memory The race exists even without the oom_reaper because anybody who pins the address space and gets preempted might race with exit_mm but oom_reaper made this race more probable. We can address the oom_reaper part by using oom_lock for __oom_reap_task because this would guarantee that a new oom victim will not be selected if the oom reaper might race with the exit path. This doesn't solve the original issue, though, because somebody else still might be pinning mm_users and so __mmput won't be called to release the memory but that is not really realiably solvable because the task will get away from the oom sight as soon as it is unhashed from the task_list and so we cannot guarantee a new victim won't be selected. [akpm@linux-foundation.org: fix use of unused `mm', Per Stephen] [akpm@linux-foundation.org: coding-style fixes] Fixes: aac453635549 ("mm, oom: introduce oom reaper") Link: http://lkml.kernel.org/r/1464271493-20008-1-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko <mhocko@suse.com> Reported-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/oom_kill.c30
1 files changed, 24 insertions, 6 deletions
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 326dd14938f0..dfb1ab61fb23 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -443,13 +443,29 @@ static bool __oom_reap_task(struct task_struct *tsk)
443{ 443{
444 struct mmu_gather tlb; 444 struct mmu_gather tlb;
445 struct vm_area_struct *vma; 445 struct vm_area_struct *vma;
446 struct mm_struct *mm; 446 struct mm_struct *mm = NULL;
447 struct task_struct *p; 447 struct task_struct *p;
448 struct zap_details details = {.check_swap_entries = true, 448 struct zap_details details = {.check_swap_entries = true,
449 .ignore_dirty = true}; 449 .ignore_dirty = true};
450 bool ret = true; 450 bool ret = true;
451 451
452 /* 452 /*
453 * We have to make sure to not race with the victim exit path
454 * and cause premature new oom victim selection:
455 * __oom_reap_task exit_mm
456 * atomic_inc_not_zero
457 * mmput
458 * atomic_dec_and_test
459 * exit_oom_victim
460 * [...]
461 * out_of_memory
462 * select_bad_process
463 * # no TIF_MEMDIE task selects new victim
464 * unmap_page_range # frees some memory
465 */
466 mutex_lock(&oom_lock);
467
468 /*
453 * Make sure we find the associated mm_struct even when the particular 469 * Make sure we find the associated mm_struct even when the particular
454 * thread has already terminated and cleared its mm. 470 * thread has already terminated and cleared its mm.
455 * We might have race with exit path so consider our work done if there 471 * We might have race with exit path so consider our work done if there
@@ -457,19 +473,19 @@ static bool __oom_reap_task(struct task_struct *tsk)
457 */ 473 */
458 p = find_lock_task_mm(tsk); 474 p = find_lock_task_mm(tsk);
459 if (!p) 475 if (!p)
460 return true; 476 goto unlock_oom;
461 477
462 mm = p->mm; 478 mm = p->mm;
463 if (!atomic_inc_not_zero(&mm->mm_users)) { 479 if (!atomic_inc_not_zero(&mm->mm_users)) {
464 task_unlock(p); 480 task_unlock(p);
465 return true; 481 goto unlock_oom;
466 } 482 }
467 483
468 task_unlock(p); 484 task_unlock(p);
469 485
470 if (!down_read_trylock(&mm->mmap_sem)) { 486 if (!down_read_trylock(&mm->mmap_sem)) {
471 ret = false; 487 ret = false;
472 goto out; 488 goto unlock_oom;
473 } 489 }
474 490
475 tlb_gather_mmu(&tlb, mm, 0, -1); 491 tlb_gather_mmu(&tlb, mm, 0, -1);
@@ -511,13 +527,15 @@ static bool __oom_reap_task(struct task_struct *tsk)
511 * to release its memory. 527 * to release its memory.
512 */ 528 */
513 set_bit(MMF_OOM_REAPED, &mm->flags); 529 set_bit(MMF_OOM_REAPED, &mm->flags);
514out: 530unlock_oom:
531 mutex_unlock(&oom_lock);
515 /* 532 /*
516 * Drop our reference but make sure the mmput slow path is called from a 533 * Drop our reference but make sure the mmput slow path is called from a
517 * different context because we shouldn't risk we get stuck there and 534 * different context because we shouldn't risk we get stuck there and
518 * put the oom_reaper out of the way. 535 * put the oom_reaper out of the way.
519 */ 536 */
520 mmput_async(mm); 537 if (mm)
538 mmput_async(mm);
521 return ret; 539 return ret;
522} 540}
523 541