aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memory.c
diff options
context:
space:
mode:
authorDavid Rientjes <rientjes@google.com>2012-05-29 18:06:23 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-05-29 19:22:19 -0400
commit1f1d06c34f7675026326cd9f39ff91e4555cf355 (patch)
treeb2493685179e3b222c915002648c3baba56318d2 /mm/memory.c
parentbde8bd8a1d5242589ddcaef8e017b48b207c4729 (diff)
thp, memcg: split hugepage for memcg oom on cow
On COW, a new hugepage is allocated and charged to the memcg. If the system is oom or the charge to the memcg fails, however, the fault handler will return VM_FAULT_OOM which results in an oom kill. Instead, it's possible to fallback to splitting the hugepage so that the COW results only in an order-0 page being allocated and charged to the memcg which has a higher liklihood to succeed. This is expensive because the hugepage must be split in the page fault handler, but it is much better than unnecessarily oom killing a process. Signed-off-by: David Rientjes <rientjes@google.com> Cc: Andrea Arcangeli <aarcange@redhat.com> Cc: Johannes Weiner <jweiner@redhat.com> Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Michal Hocko <mhocko@suse.cz> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memory.c')
-rw-r--r--mm/memory.c18
1 files changed, 15 insertions, 3 deletions
diff --git a/mm/memory.c b/mm/memory.c
index 2bf9e110437c..1b7dc662bf9f 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3486,6 +3486,7 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3486 if (unlikely(is_vm_hugetlb_page(vma))) 3486 if (unlikely(is_vm_hugetlb_page(vma)))
3487 return hugetlb_fault(mm, vma, address, flags); 3487 return hugetlb_fault(mm, vma, address, flags);
3488 3488
3489retry:
3489 pgd = pgd_offset(mm, address); 3490 pgd = pgd_offset(mm, address);
3490 pud = pud_alloc(mm, pgd, address); 3491 pud = pud_alloc(mm, pgd, address);
3491 if (!pud) 3492 if (!pud)
@@ -3499,13 +3500,24 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3499 pmd, flags); 3500 pmd, flags);
3500 } else { 3501 } else {
3501 pmd_t orig_pmd = *pmd; 3502 pmd_t orig_pmd = *pmd;
3503 int ret;
3504
3502 barrier(); 3505 barrier();
3503 if (pmd_trans_huge(orig_pmd)) { 3506 if (pmd_trans_huge(orig_pmd)) {
3504 if (flags & FAULT_FLAG_WRITE && 3507 if (flags & FAULT_FLAG_WRITE &&
3505 !pmd_write(orig_pmd) && 3508 !pmd_write(orig_pmd) &&
3506 !pmd_trans_splitting(orig_pmd)) 3509 !pmd_trans_splitting(orig_pmd)) {
3507 return do_huge_pmd_wp_page(mm, vma, address, 3510 ret = do_huge_pmd_wp_page(mm, vma, address, pmd,
3508 pmd, orig_pmd); 3511 orig_pmd);
3512 /*
3513 * If COW results in an oom, the huge pmd will
3514 * have been split, so retry the fault on the
3515 * pte for a smaller charge.
3516 */
3517 if (unlikely(ret & VM_FAULT_OOM))
3518 goto retry;
3519 return ret;
3520 }
3509 return 0; 3521 return 0;
3510 } 3522 }
3511 } 3523 }