diff options
author | Johannes Weiner <hannes@cmpxchg.org> | 2013-10-16 16:46:59 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-10-17 00:35:53 -0400 |
commit | 4942642080ea82d99ab5b653abb9a12b7ba31f4a (patch) | |
tree | 7ec12b61f0bfdd1f1466e5233b67432828b25c33 /mm/filemap.c | |
parent | c88b05b2cd07221cdefd56f7f7422c1459eb60c9 (diff) |
mm: memcg: handle non-error OOM situations more gracefully
Commit 3812c8c8f395 ("mm: memcg: do not trap chargers with full
callstack on OOM") assumed that only a few places that can trigger a
memcg OOM situation do not return VM_FAULT_OOM, like optional page cache
readahead. But there are many more and it's impractical to annotate
them all.
First of all, we don't want to invoke the OOM killer when the failed
allocation is gracefully handled, so defer the actual kill to the end of
the fault handling as well. This simplifies the code quite a bit for
added bonus.
Second, since a failed allocation might not be the abrupt end of the
fault, the memcg OOM handler needs to be re-entrant until the fault
finishes for subsequent allocation attempts. If an allocation is
attempted after the task already OOMed, allow it to bypass the limit so
that it can quickly finish the fault and invoke the OOM killer.
Reported-by: azurIt <azurit@pobox.sk>
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/filemap.c')
-rw-r--r-- | mm/filemap.c | 11 |
1 files changed, 1 insertions, 10 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index 1e6aec4a2d2e..ae4846ff4849 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -1616,7 +1616,6 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
1616 | struct inode *inode = mapping->host; | 1616 | struct inode *inode = mapping->host; |
1617 | pgoff_t offset = vmf->pgoff; | 1617 | pgoff_t offset = vmf->pgoff; |
1618 | struct page *page; | 1618 | struct page *page; |
1619 | bool memcg_oom; | ||
1620 | pgoff_t size; | 1619 | pgoff_t size; |
1621 | int ret = 0; | 1620 | int ret = 0; |
1622 | 1621 | ||
@@ -1625,11 +1624,7 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
1625 | return VM_FAULT_SIGBUS; | 1624 | return VM_FAULT_SIGBUS; |
1626 | 1625 | ||
1627 | /* | 1626 | /* |
1628 | * Do we have something in the page cache already? Either | 1627 | * Do we have something in the page cache already? |
1629 | * way, try readahead, but disable the memcg OOM killer for it | ||
1630 | * as readahead is optional and no errors are propagated up | ||
1631 | * the fault stack. The OOM killer is enabled while trying to | ||
1632 | * instantiate the faulting page individually below. | ||
1633 | */ | 1628 | */ |
1634 | page = find_get_page(mapping, offset); | 1629 | page = find_get_page(mapping, offset); |
1635 | if (likely(page) && !(vmf->flags & FAULT_FLAG_TRIED)) { | 1630 | if (likely(page) && !(vmf->flags & FAULT_FLAG_TRIED)) { |
@@ -1637,14 +1632,10 @@ int filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
1637 | * We found the page, so try async readahead before | 1632 | * We found the page, so try async readahead before |
1638 | * waiting for the lock. | 1633 | * waiting for the lock. |
1639 | */ | 1634 | */ |
1640 | memcg_oom = mem_cgroup_toggle_oom(false); | ||
1641 | do_async_mmap_readahead(vma, ra, file, page, offset); | 1635 | do_async_mmap_readahead(vma, ra, file, page, offset); |
1642 | mem_cgroup_toggle_oom(memcg_oom); | ||
1643 | } else if (!page) { | 1636 | } else if (!page) { |
1644 | /* No page in the page cache at all */ | 1637 | /* No page in the page cache at all */ |
1645 | memcg_oom = mem_cgroup_toggle_oom(false); | ||
1646 | do_sync_mmap_readahead(vma, ra, file, offset); | 1638 | do_sync_mmap_readahead(vma, ra, file, offset); |
1647 | mem_cgroup_toggle_oom(memcg_oom); | ||
1648 | count_vm_event(PGMAJFAULT); | 1639 | count_vm_event(PGMAJFAULT); |
1649 | mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT); | 1640 | mem_cgroup_count_vm_event(vma->vm_mm, PGMAJFAULT); |
1650 | ret = VM_FAULT_MAJOR; | 1641 | ret = VM_FAULT_MAJOR; |