aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMichal Hocko <mhocko@suse.com>2016-05-20 19:57:21 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-05-20 20:58:30 -0400
commitec8d7c14ea14922fe21945b458a75e39f11dd832 (patch)
treefe45cbd94518218d2be1288a812dbca8c1e01d95
parentbb8a4b7fd1266ef888b3a80aa5f266062b224ef4 (diff)
mm, oom_reaper: do not mmput synchronously from the oom reaper context
Tetsuo has properly noted that mmput slow path might get blocked waiting for another party (e.g. exit_aio waits for an IO). If that happens the oom_reaper would be put out of the way and will not be able to process next oom victim. We should strive for making this context as reliable and independent on other subsystems as much as possible. Introduce mmput_async which will perform the slow path from an async (WQ) context. This will delay the operation but that shouldn't be a problem because the oom_reaper has reclaimed the victim's address space for most cases as much as possible and the remaining context shouldn't bind too much memory anymore. The only exception is when mmap_sem trylock has failed which shouldn't happen too often. The issue is only theoretical but not impossible. Signed-off-by: Michal Hocko <mhocko@suse.com> Reported-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp> Cc: David Rientjes <rientjes@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/mm_types.h2
-rw-r--r--include/linux/sched.h5
-rw-r--r--kernel/fork.c50
-rw-r--r--mm/oom_kill.c8
4 files changed, 48 insertions, 17 deletions
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 1fda9c99ef95..d553855503e6 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -12,6 +12,7 @@
12#include <linux/cpumask.h> 12#include <linux/cpumask.h>
13#include <linux/uprobes.h> 13#include <linux/uprobes.h>
14#include <linux/page-flags-layout.h> 14#include <linux/page-flags-layout.h>
15#include <linux/workqueue.h>
15#include <asm/page.h> 16#include <asm/page.h>
16#include <asm/mmu.h> 17#include <asm/mmu.h>
17 18
@@ -513,6 +514,7 @@ struct mm_struct {
513#ifdef CONFIG_HUGETLB_PAGE 514#ifdef CONFIG_HUGETLB_PAGE
514 atomic_long_t hugetlb_usage; 515 atomic_long_t hugetlb_usage;
515#endif 516#endif
517 struct work_struct async_put_work;
516}; 518};
517 519
518static inline void mm_init_cpumask(struct mm_struct *mm) 520static inline void mm_init_cpumask(struct mm_struct *mm)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 40eabf176ce2..479e3cade7e9 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2730,6 +2730,11 @@ static inline void mmdrop(struct mm_struct * mm)
2730 2730
2731/* mmput gets rid of the mappings and all user-space */ 2731/* mmput gets rid of the mappings and all user-space */
2732extern void mmput(struct mm_struct *); 2732extern void mmput(struct mm_struct *);
2733/* same as above but performs the slow path from the async kontext. Can
2734 * be called from the atomic context as well
2735 */
2736extern void mmput_async(struct mm_struct *);
2737
2733/* Grab a reference to a task's mm, if it is not already going away */ 2738/* Grab a reference to a task's mm, if it is not already going away */
2734extern struct mm_struct *get_task_mm(struct task_struct *task); 2739extern struct mm_struct *get_task_mm(struct task_struct *task);
2735/* 2740/*
diff --git a/kernel/fork.c b/kernel/fork.c
index 3e8451527cbe..8fbed7194af1 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -699,6 +699,26 @@ void __mmdrop(struct mm_struct *mm)
699} 699}
700EXPORT_SYMBOL_GPL(__mmdrop); 700EXPORT_SYMBOL_GPL(__mmdrop);
701 701
702static inline void __mmput(struct mm_struct *mm)
703{
704 VM_BUG_ON(atomic_read(&mm->mm_users));
705
706 uprobe_clear_state(mm);
707 exit_aio(mm);
708 ksm_exit(mm);
709 khugepaged_exit(mm); /* must run before exit_mmap */
710 exit_mmap(mm);
711 set_mm_exe_file(mm, NULL);
712 if (!list_empty(&mm->mmlist)) {
713 spin_lock(&mmlist_lock);
714 list_del(&mm->mmlist);
715 spin_unlock(&mmlist_lock);
716 }
717 if (mm->binfmt)
718 module_put(mm->binfmt->module);
719 mmdrop(mm);
720}
721
702/* 722/*
703 * Decrement the use count and release all resources for an mm. 723 * Decrement the use count and release all resources for an mm.
704 */ 724 */
@@ -706,24 +726,24 @@ void mmput(struct mm_struct *mm)
706{ 726{
707 might_sleep(); 727 might_sleep();
708 728
729 if (atomic_dec_and_test(&mm->mm_users))
730 __mmput(mm);
731}
732EXPORT_SYMBOL_GPL(mmput);
733
734static void mmput_async_fn(struct work_struct *work)
735{
736 struct mm_struct *mm = container_of(work, struct mm_struct, async_put_work);
737 __mmput(mm);
738}
739
740void mmput_async(struct mm_struct *mm)
741{
709 if (atomic_dec_and_test(&mm->mm_users)) { 742 if (atomic_dec_and_test(&mm->mm_users)) {
710 uprobe_clear_state(mm); 743 INIT_WORK(&mm->async_put_work, mmput_async_fn);
711 exit_aio(mm); 744 schedule_work(&mm->async_put_work);
712 ksm_exit(mm);
713 khugepaged_exit(mm); /* must run before exit_mmap */
714 exit_mmap(mm);
715 set_mm_exe_file(mm, NULL);
716 if (!list_empty(&mm->mmlist)) {
717 spin_lock(&mmlist_lock);
718 list_del(&mm->mmlist);
719 spin_unlock(&mmlist_lock);
720 }
721 if (mm->binfmt)
722 module_put(mm->binfmt->module);
723 mmdrop(mm);
724 } 745 }
725} 746}
726EXPORT_SYMBOL_GPL(mmput);
727 747
728/** 748/**
729 * set_mm_exe_file - change a reference to the mm's executable file 749 * set_mm_exe_file - change a reference to the mm's executable file
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index c0376efa79ec..c0e37dd1422f 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -446,7 +446,6 @@ static DECLARE_WAIT_QUEUE_HEAD(oom_reaper_wait);
446static struct task_struct *oom_reaper_list; 446static struct task_struct *oom_reaper_list;
447static DEFINE_SPINLOCK(oom_reaper_lock); 447static DEFINE_SPINLOCK(oom_reaper_lock);
448 448
449
450static bool __oom_reap_task(struct task_struct *tsk) 449static bool __oom_reap_task(struct task_struct *tsk)
451{ 450{
452 struct mmu_gather tlb; 451 struct mmu_gather tlb;
@@ -520,7 +519,12 @@ static bool __oom_reap_task(struct task_struct *tsk)
520 */ 519 */
521 set_bit(MMF_OOM_REAPED, &mm->flags); 520 set_bit(MMF_OOM_REAPED, &mm->flags);
522out: 521out:
523 mmput(mm); 522 /*
523 * Drop our reference but make sure the mmput slow path is called from a
524 * different context because we shouldn't risk we get stuck there and
525 * put the oom_reaper out of the way.
526 */
527 mmput_async(mm);
524 return ret; 528 return ret;
525} 529}
526 530