diff options
author | Johannes Weiner <hannes@cmpxchg.org> | 2013-09-12 18:13:42 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-09-12 18:38:01 -0400 |
commit | 519e52473ebe9db5cdef44670d5a97f1fd53d721 (patch) | |
tree | 635fce64ff3658250745b9c8dfebd47e981a5b16 /mm | |
parent | 3a13c4d761b4b979ba8767f42345fed3274991b0 (diff) |
mm: memcg: enable memcg OOM killer only for user faults
System calls and kernel faults (uaccess, gup) can handle an out of memory
situation gracefully and just return -ENOMEM.
Enable the memcg OOM killer only for user faults, where it's really the
only option available.
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: David Rientjes <rientjes@google.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: azurIt <azurit@pobox.sk>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/filemap.c | 11 | ||||
-rw-r--r-- | mm/memcontrol.c | 2 | ||||
-rw-r--r-- | mm/memory.c | 40 |
3 files changed, 41 insertions, 12 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index e607728db4a8..e3b6fc8c0b7b 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -1614,6 +1614,7 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
1614 | struct inode *inode = mapping->host; | 1614 | struct inode *inode = mapping->host; |
1615 | pgoff_t offset = vmf->pgoff; | 1615 | pgoff_t offset = vmf->pgoff; |
1616 | struct page *page; | 1616 | struct page *page; |
1617 | bool memcg_oom; | ||
1617 | pgoff_t size; | 1618 | pgoff_t size; |
1618 | int ret = 0; | 1619 | int ret = 0; |
1619 | 1620 | ||
@@ -1622,7 +1623,11 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
1622 | return VM_FAULT_SIGBUS; | 1623 | return VM_FAULT_SIGBUS; |
1623 | 1624 | ||
1624 | /* | 1625 | /* |
1625 | * Do we have something in the page cache already? | 1626 | * Do we have something in the page cache already? Either |
1627 | * way, try readahead, but disable the memcg OOM killer for it | ||
1628 | * as readahead is optional and no errors are propagated up | ||
1629 | * the fault stack. The OOM killer is enabled while trying to | ||
1630 | * instantiate the faulting page individually below. | ||
1626 | */ | 1631 | */ |
1627 | page = find_get_page(mapping, offset); | 1632 | page = find_get_page(mapping, offset); |
1628 | if (likely(page) && !(vmf->flags & FAULT_FLAG_TRIED)) { | 1633 | if (likely(page) && !(vmf->flags & FAULT_FLAG_TRIED)) { |
@@ -1630,10 +1635,14 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
1630 | * We found the page, so try async readahead before | 1635 | * We found the page, so try async readahead before |
1631 | * waiting for the lock. | 1636 | * waiting for the lock. |
1632 | */ | 1637 | */ |
1638 | memcg_oom = mem_cgroup_toggle_oom(false); | ||
1633 | do_async_mmap_readahead(vma, ra, file, page, offset); | 1639 | do_async_mmap_readahead(vma, ra, file, page, offset); |
1640 | mem_cgroup_toggle_oom(memcg_oom); | ||
1634 | } else if (!page) { | 1641 | } else if (!page) { |
1635 | /* No page in the page cache at all */ | 1642 | /* No page in the page cache at all */ |
1643 | memcg_oom = mem_cgroup_toggle_oom(false); | ||
1636 | do_sync_mmap_readahead(vma, ra, file, offset); | 1644 | do_sync_mmap_readahead(vma, ra, file, offset); |
1645 | mem_cgroup_toggle_oom(memcg_oom); | ||
1637 | count_vm_event(PGMAJFAULT); | 1646 | count_vm_event(PGMAJFAULT); |
1638 | mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT); | 1647 | mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT); |
1639 | ret = VM_FAULT_MAJOR; | 1648 | ret = VM_FAULT_MAJOR; |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index c4524458b7d0..0980bbf6438d 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -2454,7 +2454,7 @@ static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, | |||
2454 | return CHARGE_RETRY; | 2454 | return CHARGE_RETRY; |
2455 | 2455 | ||
2456 | /* If we don't need to call oom-killer at el, return immediately */ | 2456 | /* If we don't need to call oom-killer at el, return immediately */ |
2457 | if (!oom_check) | 2457 | if (!oom_check || !current->memcg_oom.may_oom) |
2458 | return CHARGE_NOMEM; | 2458 | return CHARGE_NOMEM; |
2459 | /* check OOM */ | 2459 | /* check OOM */ |
2460 | if (!mem_cgroup_handle_oom(mem_over_limit, gfp_mask, get_order(csize))) | 2460 | if (!mem_cgroup_handle_oom(mem_over_limit, gfp_mask, get_order(csize))) |
diff --git a/mm/memory.c b/mm/memory.c index 2b73dbde2274..a8f9deab8719 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -3754,22 +3754,14 @@ unlock: | |||
3754 | /* | 3754 | /* |
3755 | * By the time we get here, we already hold the mm semaphore | 3755 | * By the time we get here, we already hold the mm semaphore |
3756 | */ | 3756 | */ |
3757 | int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, | 3757 | static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, |
3758 | unsigned long address, unsigned int flags) | 3758 | unsigned long address, unsigned int flags) |
3759 | { | 3759 | { |
3760 | pgd_t *pgd; | 3760 | pgd_t *pgd; |
3761 | pud_t *pud; | 3761 | pud_t *pud; |
3762 | pmd_t *pmd; | 3762 | pmd_t *pmd; |
3763 | pte_t *pte; | 3763 | pte_t *pte; |
3764 | 3764 | ||
3765 | __set_current_state(TASK_RUNNING); | ||
3766 | |||
3767 | count_vm_event(PGFAULT); | ||
3768 | mem_cgroup_count_vm_event(mm, PGFAULT); | ||
3769 | |||
3770 | /* do counter updates before entering really critical section. */ | ||
3771 | check_sync_rss_stat(current); | ||
3772 | |||
3773 | if (unlikely(is_vm_hugetlb_page(vma))) | 3765 | if (unlikely(is_vm_hugetlb_page(vma))) |
3774 | return hugetlb_fault(mm, vma, address, flags); | 3766 | return hugetlb_fault(mm, vma, address, flags); |
3775 | 3767 | ||
@@ -3850,6 +3842,34 @@ retry: | |||
3850 | return handle_pte_fault(mm, vma, address, pte, pmd, flags); | 3842 | return handle_pte_fault(mm, vma, address, pte, pmd, flags); |
3851 | } | 3843 | } |
3852 | 3844 | ||
3845 | int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, | ||
3846 | unsigned long address, unsigned int flags) | ||
3847 | { | ||
3848 | int ret; | ||
3849 | |||
3850 | __set_current_state(TASK_RUNNING); | ||
3851 | |||
3852 | count_vm_event(PGFAULT); | ||
3853 | mem_cgroup_count_vm_event(mm, PGFAULT); | ||
3854 | |||
3855 | /* do counter updates before entering really critical section. */ | ||
3856 | check_sync_rss_stat(current); | ||
3857 | |||
3858 | /* | ||
3859 | * Enable the memcg OOM handling for faults triggered in user | ||
3860 | * space. Kernel faults are handled more gracefully. | ||
3861 | */ | ||
3862 | if (flags & FAULT_FLAG_USER) | ||
3863 | mem_cgroup_enable_oom(); | ||
3864 | |||
3865 | ret = __handle_mm_fault(mm, vma, address, flags); | ||
3866 | |||
3867 | if (flags & FAULT_FLAG_USER) | ||
3868 | mem_cgroup_disable_oom(); | ||
3869 | |||
3870 | return ret; | ||
3871 | } | ||
3872 | |||
3853 | #ifndef __PAGETABLE_PUD_FOLDED | 3873 | #ifndef __PAGETABLE_PUD_FOLDED |
3854 | /* | 3874 | /* |
3855 | * Allocate page upper directory. | 3875 | * Allocate page upper directory. |