summaryrefslogtreecommitdiffstats
path: root/mm/memory.c
diff options
context:
space:
mode:
authorHugh Dickins <hughd@google.com>2017-01-07 18:37:31 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2017-01-07 20:49:33 -0500
commitb0b9b3df27d100a975b4e8818f35382b64a5e35c (patch)
tree8d900d2be3801a7dd10c86c594b37e421167392c /mm/memory.c
parent87bc610730a944b49f1c53ab9f4230d85f35df0c (diff)
mm: stop leaking PageTables
4.10-rc loadtest (even on x86, and even without THPCache) fails with "fork: Cannot allocate memory" or some such; and /proc/meminfo shows PageTables growing. Commit 953c66c2b22a ("mm: THP page cache support for ppc64") that got merged in rc1 removed the freeing of an unused preallocated pagetable after do_fault_around() has called map_pages(). This is usually a good optimization, so that the followup doesn't have to reallocate one; but it's not sufficient to shift the freeing into alloc_set_pte(), since there are failure cases (most commonly VM_FAULT_RETRY) which never reach finish_fault(). Check and free it at the outer level in do_fault(), then we don't need to worry in alloc_set_pte(), and can restore that to how it was (I cannot find any reason to pte_free() under lock as it was doing). And fix a separate pagetable leak, or crash, introduced by the same change, that could only show up on some ppc64: why does do_set_pmd()'s failure case attempt to withdraw a pagetable when it never deposited one, at the same time overwriting (so leaking) the vmf->prealloc_pte? Residue of an earlier implementation, perhaps? Delete it. Fixes: 953c66c2b22a ("mm: THP page cache support for ppc64") Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Michael Neuling <mikey@neuling.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Balbir Singh <bsingharora@gmail.com> Cc: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Hugh Dickins <hughd@google.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memory.c')
-rw-r--r--mm/memory.c47
1 files changed, 20 insertions, 27 deletions
diff --git a/mm/memory.c b/mm/memory.c
index 7d23b5050248..9f2c15cdb32c 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3008,13 +3008,6 @@ static int do_set_pmd(struct vm_fault *vmf, struct page *page)
3008 ret = 0; 3008 ret = 0;
3009 count_vm_event(THP_FILE_MAPPED); 3009 count_vm_event(THP_FILE_MAPPED);
3010out: 3010out:
3011 /*
3012 * If we are going to fallback to pte mapping, do a
3013 * withdraw with pmd lock held.
3014 */
3015 if (arch_needs_pgtable_deposit() && ret == VM_FAULT_FALLBACK)
3016 vmf->prealloc_pte = pgtable_trans_huge_withdraw(vma->vm_mm,
3017 vmf->pmd);
3018 spin_unlock(vmf->ptl); 3011 spin_unlock(vmf->ptl);
3019 return ret; 3012 return ret;
3020} 3013}
@@ -3055,20 +3048,18 @@ int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
3055 3048
3056 ret = do_set_pmd(vmf, page); 3049 ret = do_set_pmd(vmf, page);
3057 if (ret != VM_FAULT_FALLBACK) 3050 if (ret != VM_FAULT_FALLBACK)
3058 goto fault_handled; 3051 return ret;
3059 } 3052 }
3060 3053
3061 if (!vmf->pte) { 3054 if (!vmf->pte) {
3062 ret = pte_alloc_one_map(vmf); 3055 ret = pte_alloc_one_map(vmf);
3063 if (ret) 3056 if (ret)
3064 goto fault_handled; 3057 return ret;
3065 } 3058 }
3066 3059
3067 /* Re-check under ptl */ 3060 /* Re-check under ptl */
3068 if (unlikely(!pte_none(*vmf->pte))) { 3061 if (unlikely(!pte_none(*vmf->pte)))
3069 ret = VM_FAULT_NOPAGE; 3062 return VM_FAULT_NOPAGE;
3070 goto fault_handled;
3071 }
3072 3063
3073 flush_icache_page(vma, page); 3064 flush_icache_page(vma, page);
3074 entry = mk_pte(page, vma->vm_page_prot); 3065 entry = mk_pte(page, vma->vm_page_prot);
@@ -3088,15 +3079,8 @@ int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg,
3088 3079
3089 /* no need to invalidate: a not-present page won't be cached */ 3080 /* no need to invalidate: a not-present page won't be cached */
3090 update_mmu_cache(vma, vmf->address, vmf->pte); 3081 update_mmu_cache(vma, vmf->address, vmf->pte);
3091 ret = 0;
3092 3082
3093fault_handled: 3083 return 0;
3094 /* preallocated pagetable is unused: free it */
3095 if (vmf->prealloc_pte) {
3096 pte_free(vmf->vma->vm_mm, vmf->prealloc_pte);
3097 vmf->prealloc_pte = 0;
3098 }
3099 return ret;
3100} 3084}
3101 3085
3102 3086
@@ -3360,15 +3344,24 @@ static int do_shared_fault(struct vm_fault *vmf)
3360static int do_fault(struct vm_fault *vmf) 3344static int do_fault(struct vm_fault *vmf)
3361{ 3345{
3362 struct vm_area_struct *vma = vmf->vma; 3346 struct vm_area_struct *vma = vmf->vma;
3347 int ret;
3363 3348
3364 /* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */ 3349 /* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */
3365 if (!vma->vm_ops->fault) 3350 if (!vma->vm_ops->fault)
3366 return VM_FAULT_SIGBUS; 3351 ret = VM_FAULT_SIGBUS;
3367 if (!(vmf->flags & FAULT_FLAG_WRITE)) 3352 else if (!(vmf->flags & FAULT_FLAG_WRITE))
3368 return do_read_fault(vmf); 3353 ret = do_read_fault(vmf);
3369 if (!(vma->vm_flags & VM_SHARED)) 3354 else if (!(vma->vm_flags & VM_SHARED))
3370 return do_cow_fault(vmf); 3355 ret = do_cow_fault(vmf);
3371 return do_shared_fault(vmf); 3356 else
3357 ret = do_shared_fault(vmf);
3358
3359 /* preallocated pagetable is unused: free it */
3360 if (vmf->prealloc_pte) {
3361 pte_free(vma->vm_mm, vmf->prealloc_pte);
3362 vmf->prealloc_pte = 0;
3363 }
3364 return ret;
3372} 3365}
3373 3366
3374static int numa_migrate_prep(struct page *page, struct vm_area_struct *vma, 3367static int numa_migrate_prep(struct page *page, struct vm_area_struct *vma,