diff options
author | Hugh Dickins <hughd@google.com> | 2017-01-07 18:37:31 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-01-07 20:49:33 -0500 |
commit | b0b9b3df27d100a975b4e8818f35382b64a5e35c (patch) | |
tree | 8d900d2be3801a7dd10c86c594b37e421167392c /mm/memory.c | |
parent | 87bc610730a944b49f1c53ab9f4230d85f35df0c (diff) |
mm: stop leaking PageTables
4.10-rc loadtest (even on x86, and even without THPCache) fails with
"fork: Cannot allocate memory" or some such; and /proc/meminfo shows
PageTables growing.
Commit 953c66c2b22a ("mm: THP page cache support for ppc64") that got
merged in rc1 removed the freeing of an unused preallocated pagetable
after do_fault_around() has called map_pages().
This is usually a good optimization, so that the followup doesn't have
to reallocate one; but it's not sufficient to shift the freeing into
alloc_set_pte(), since there are failure cases (most commonly
VM_FAULT_RETRY) which never reach finish_fault().
Check and free it at the outer level in do_fault(), then we don't need
to worry in alloc_set_pte(), and can restore that to how it was (I
cannot find any reason to pte_free() under lock as it was doing).
And fix a separate pagetable leak, or crash, introduced by the same
change, that could only show up on some ppc64: why does do_set_pmd()'s
failure case attempt to withdraw a pagetable when it never deposited
one, at the same time overwriting (so leaking) the vmf->prealloc_pte?
Residue of an earlier implementation, perhaps? Delete it.
Fixes: 953c66c2b22a ("mm: THP page cache support for ppc64")
Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Michael Neuling <mikey@neuling.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Balbir Singh <bsingharora@gmail.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Hugh Dickins <hughd@google.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 47 |
1 files changed, 20 insertions, 27 deletions
diff --git a/mm/memory.c b/mm/memory.c index 7d23b5050248..9f2c15cdb32c 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -3008,13 +3008,6 @@ static int do_set_pmd(struct vm_fault *vmf, struct page *page) | |||
3008 | ret = 0; | 3008 | ret = 0; |
3009 | count_vm_event(THP_FILE_MAPPED); | 3009 | count_vm_event(THP_FILE_MAPPED); |
3010 | out: | 3010 | out: |
3011 | /* | ||
3012 | * If we are going to fallback to pte mapping, do a | ||
3013 | * withdraw with pmd lock held. | ||
3014 | */ | ||
3015 | if (arch_needs_pgtable_deposit() && ret == VM_FAULT_FALLBACK) | ||
3016 | vmf->prealloc_pte = pgtable_trans_huge_withdraw(vma->vm_mm, | ||
3017 | vmf->pmd); | ||
3018 | spin_unlock(vmf->ptl); | 3011 | spin_unlock(vmf->ptl); |
3019 | return ret; | 3012 | return ret; |
3020 | } | 3013 | } |
@@ -3055,20 +3048,18 @@ int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg, | |||
3055 | 3048 | ||
3056 | ret = do_set_pmd(vmf, page); | 3049 | ret = do_set_pmd(vmf, page); |
3057 | if (ret != VM_FAULT_FALLBACK) | 3050 | if (ret != VM_FAULT_FALLBACK) |
3058 | goto fault_handled; | 3051 | return ret; |
3059 | } | 3052 | } |
3060 | 3053 | ||
3061 | if (!vmf->pte) { | 3054 | if (!vmf->pte) { |
3062 | ret = pte_alloc_one_map(vmf); | 3055 | ret = pte_alloc_one_map(vmf); |
3063 | if (ret) | 3056 | if (ret) |
3064 | goto fault_handled; | 3057 | return ret; |
3065 | } | 3058 | } |
3066 | 3059 | ||
3067 | /* Re-check under ptl */ | 3060 | /* Re-check under ptl */ |
3068 | if (unlikely(!pte_none(*vmf->pte))) { | 3061 | if (unlikely(!pte_none(*vmf->pte))) |
3069 | ret = VM_FAULT_NOPAGE; | 3062 | return VM_FAULT_NOPAGE; |
3070 | goto fault_handled; | ||
3071 | } | ||
3072 | 3063 | ||
3073 | flush_icache_page(vma, page); | 3064 | flush_icache_page(vma, page); |
3074 | entry = mk_pte(page, vma->vm_page_prot); | 3065 | entry = mk_pte(page, vma->vm_page_prot); |
@@ -3088,15 +3079,8 @@ int alloc_set_pte(struct vm_fault *vmf, struct mem_cgroup *memcg, | |||
3088 | 3079 | ||
3089 | /* no need to invalidate: a not-present page won't be cached */ | 3080 | /* no need to invalidate: a not-present page won't be cached */ |
3090 | update_mmu_cache(vma, vmf->address, vmf->pte); | 3081 | update_mmu_cache(vma, vmf->address, vmf->pte); |
3091 | ret = 0; | ||
3092 | 3082 | ||
3093 | fault_handled: | 3083 | return 0; |
3094 | /* preallocated pagetable is unused: free it */ | ||
3095 | if (vmf->prealloc_pte) { | ||
3096 | pte_free(vmf->vma->vm_mm, vmf->prealloc_pte); | ||
3097 | vmf->prealloc_pte = 0; | ||
3098 | } | ||
3099 | return ret; | ||
3100 | } | 3084 | } |
3101 | 3085 | ||
3102 | 3086 | ||
@@ -3360,15 +3344,24 @@ static int do_shared_fault(struct vm_fault *vmf) | |||
3360 | static int do_fault(struct vm_fault *vmf) | 3344 | static int do_fault(struct vm_fault *vmf) |
3361 | { | 3345 | { |
3362 | struct vm_area_struct *vma = vmf->vma; | 3346 | struct vm_area_struct *vma = vmf->vma; |
3347 | int ret; | ||
3363 | 3348 | ||
3364 | /* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */ | 3349 | /* The VMA was not fully populated on mmap() or missing VM_DONTEXPAND */ |
3365 | if (!vma->vm_ops->fault) | 3350 | if (!vma->vm_ops->fault) |
3366 | return VM_FAULT_SIGBUS; | 3351 | ret = VM_FAULT_SIGBUS; |
3367 | if (!(vmf->flags & FAULT_FLAG_WRITE)) | 3352 | else if (!(vmf->flags & FAULT_FLAG_WRITE)) |
3368 | return do_read_fault(vmf); | 3353 | ret = do_read_fault(vmf); |
3369 | if (!(vma->vm_flags & VM_SHARED)) | 3354 | else if (!(vma->vm_flags & VM_SHARED)) |
3370 | return do_cow_fault(vmf); | 3355 | ret = do_cow_fault(vmf); |
3371 | return do_shared_fault(vmf); | 3356 | else |
3357 | ret = do_shared_fault(vmf); | ||
3358 | |||
3359 | /* preallocated pagetable is unused: free it */ | ||
3360 | if (vmf->prealloc_pte) { | ||
3361 | pte_free(vma->vm_mm, vmf->prealloc_pte); | ||
3362 | vmf->prealloc_pte = 0; | ||
3363 | } | ||
3364 | return ret; | ||
3372 | } | 3365 | } |
3373 | 3366 | ||
3374 | static int numa_migrate_prep(struct page *page, struct vm_area_struct *vma, | 3367 | static int numa_migrate_prep(struct page *page, struct vm_area_struct *vma, |