aboutsummaryrefslogtreecommitdiffstats
path: root/mm/oom_kill.c
diff options
context:
space:
mode:
authorMichal Hocko <mhocko@suse.com>2016-07-28 18:44:52 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-07-28 19:07:41 -0400
commit1af8bb43269563e458ebcf0ece812e9a970864b3 (patch)
treea254acd789b61f852f1c6817393e2acf253a1e8b /mm/oom_kill.c
parent97fd49c2355ffdede6526afc0c72bc314d05f42a (diff)
mm, oom: fortify task_will_free_mem()
task_will_free_mem is rather weak. It doesn't really tell whether the task has chance to drop its mm. 98748bd72200 ("oom: consider multi-threaded tasks in task_will_free_mem") made a first step into making it more robust for multi-threaded applications so now we know that the whole process is going down and probably drop the mm. This patch builds on top for more complex scenarios where mm is shared between different processes - CLONE_VM without CLONE_SIGHAND, or in kernel use_mm(). Make sure that all processes sharing the mm are killed or exiting. This will allow us to replace try_oom_reaper by wake_oom_reaper because task_will_free_mem implies the task is reapable now. Therefore all paths which bypass the oom killer are now reapable and so they shouldn't lock up the oom killer. Link: http://lkml.kernel.org/r/1466426628-15074-8-git-send-email-mhocko@kernel.org Signed-off-by: Michal Hocko <mhocko@suse.com> Acked-by: Oleg Nesterov <oleg@redhat.com> Cc: Vladimir Davydov <vdavydov@virtuozzo.com> Cc: David Rientjes <rientjes@google.com> Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/oom_kill.c')
-rw-r--r--mm/oom_kill.c133
1 files changed, 80 insertions, 53 deletions
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 38f89ac2df7f..8ee92fb76968 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -596,7 +596,7 @@ static int oom_reaper(void *unused)
596 return 0; 596 return 0;
597} 597}
598 598
599static void wake_oom_reaper(struct task_struct *tsk) 599void wake_oom_reaper(struct task_struct *tsk)
600{ 600{
601 if (!oom_reaper_th) 601 if (!oom_reaper_th)
602 return; 602 return;
@@ -614,46 +614,6 @@ static void wake_oom_reaper(struct task_struct *tsk)
614 wake_up(&oom_reaper_wait); 614 wake_up(&oom_reaper_wait);
615} 615}
616 616
617/* Check if we can reap the given task. This has to be called with stable
618 * tsk->mm
619 */
620void try_oom_reaper(struct task_struct *tsk)
621{
622 struct mm_struct *mm = tsk->mm;
623 struct task_struct *p;
624
625 if (!mm)
626 return;
627
628 /*
629 * There might be other threads/processes which are either not
630 * dying or even not killable.
631 */
632 if (atomic_read(&mm->mm_users) > 1) {
633 rcu_read_lock();
634 for_each_process(p) {
635 if (!process_shares_mm(p, mm))
636 continue;
637 if (fatal_signal_pending(p))
638 continue;
639
640 /*
641 * If the task is exiting make sure the whole thread group
642 * is exiting and cannot acces mm anymore.
643 */
644 if (signal_group_exit(p->signal))
645 continue;
646
647 /* Give up */
648 rcu_read_unlock();
649 return;
650 }
651 rcu_read_unlock();
652 }
653
654 wake_oom_reaper(tsk);
655}
656
657static int __init oom_init(void) 617static int __init oom_init(void)
658{ 618{
659 oom_reaper_th = kthread_run(oom_reaper, NULL, "oom_reaper"); 619 oom_reaper_th = kthread_run(oom_reaper, NULL, "oom_reaper");
@@ -665,10 +625,6 @@ static int __init oom_init(void)
665 return 0; 625 return 0;
666} 626}
667subsys_initcall(oom_init) 627subsys_initcall(oom_init)
668#else
669static void wake_oom_reaper(struct task_struct *tsk)
670{
671}
672#endif 628#endif
673 629
674/** 630/**
@@ -745,6 +701,81 @@ void oom_killer_enable(void)
745 oom_killer_disabled = false; 701 oom_killer_disabled = false;
746} 702}
747 703
704static inline bool __task_will_free_mem(struct task_struct *task)
705{
706 struct signal_struct *sig = task->signal;
707
708 /*
709 * A coredumping process may sleep for an extended period in exit_mm(),
710 * so the oom killer cannot assume that the process will promptly exit
711 * and release memory.
712 */
713 if (sig->flags & SIGNAL_GROUP_COREDUMP)
714 return false;
715
716 if (sig->flags & SIGNAL_GROUP_EXIT)
717 return true;
718
719 if (thread_group_empty(task) && (task->flags & PF_EXITING))
720 return true;
721
722 return false;
723}
724
725/*
726 * Checks whether the given task is dying or exiting and likely to
727 * release its address space. This means that all threads and processes
728 * sharing the same mm have to be killed or exiting.
729 */
730bool task_will_free_mem(struct task_struct *task)
731{
732 struct mm_struct *mm;
733 struct task_struct *p;
734 bool ret;
735
736 if (!__task_will_free_mem(task))
737 return false;
738
739 /*
740 * If the process has passed exit_mm we have to skip it because
741 * we have lost a link to other tasks sharing this mm, we do not
742 * have anything to reap and the task might then get stuck waiting
743 * for parent as zombie and we do not want it to hold TIF_MEMDIE
744 */
745 p = find_lock_task_mm(task);
746 if (!p)
747 return false;
748
749 mm = p->mm;
750 if (atomic_read(&mm->mm_users) <= 1) {
751 task_unlock(p);
752 return true;
753 }
754
755 /* pin the mm to not get freed and reused */
756 atomic_inc(&mm->mm_count);
757 task_unlock(p);
758
759 /*
760 * This is really pessimistic but we do not have any reliable way
761 * to check that external processes share with our mm
762 */
763 rcu_read_lock();
764 for_each_process(p) {
765 if (!process_shares_mm(p, mm))
766 continue;
767 if (same_thread_group(task, p))
768 continue;
769 ret = __task_will_free_mem(p);
770 if (!ret)
771 break;
772 }
773 rcu_read_unlock();
774 mmdrop(mm);
775
776 return ret;
777}
778
748/* 779/*
749 * Must be called while holding a reference to p, which will be released upon 780 * Must be called while holding a reference to p, which will be released upon
750 * returning. 781 * returning.
@@ -766,15 +797,12 @@ void oom_kill_process(struct oom_control *oc, struct task_struct *p,
766 * If the task is already exiting, don't alarm the sysadmin or kill 797 * If the task is already exiting, don't alarm the sysadmin or kill
767 * its children or threads, just set TIF_MEMDIE so it can die quickly 798 * its children or threads, just set TIF_MEMDIE so it can die quickly
768 */ 799 */
769 task_lock(p); 800 if (task_will_free_mem(p)) {
770 if (p->mm && task_will_free_mem(p)) {
771 mark_oom_victim(p); 801 mark_oom_victim(p);
772 try_oom_reaper(p); 802 wake_oom_reaper(p);
773 task_unlock(p);
774 put_task_struct(p); 803 put_task_struct(p);
775 return; 804 return;
776 } 805 }
777 task_unlock(p);
778 806
779 if (__ratelimit(&oom_rs)) 807 if (__ratelimit(&oom_rs))
780 dump_header(oc, p); 808 dump_header(oc, p);
@@ -944,10 +972,9 @@ bool out_of_memory(struct oom_control *oc)
944 * But don't select if current has already released its mm and cleared 972 * But don't select if current has already released its mm and cleared
945 * TIF_MEMDIE flag at exit_mm(), otherwise an OOM livelock may occur. 973 * TIF_MEMDIE flag at exit_mm(), otherwise an OOM livelock may occur.
946 */ 974 */
947 if (current->mm && 975 if (current->mm && task_will_free_mem(current)) {
948 (fatal_signal_pending(current) || task_will_free_mem(current))) {
949 mark_oom_victim(current); 976 mark_oom_victim(current);
950 try_oom_reaper(current); 977 wake_oom_reaper(current);
951 return true; 978 return true;
952 } 979 }
953 980