summaryrefslogtreecommitdiffstats
path: root/mm/memory.c
diff options
context:
space:
mode:
authorAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>2016-12-12 19:44:32 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2016-12-12 21:55:08 -0500
commit953c66c2b22a304dbc3c3d7fc8e8c25cd97a03d8 (patch)
treec851d72f1be182bc4ae4857ff720e34835e68371 /mm/memory.c
parent1dd38b6c27d59414e89c08dd1ae9677a8e12cbc4 (diff)
mm: THP page cache support for ppc64
Add arch specific callback in the generic THP page cache code that will deposit and withdarw preallocated page table. Archs like ppc64 use this preallocated table to store the hash pte slot information. Testing: kernel build of the patch series on tmpfs mounted with option huge=always The related thp stat: thp_fault_alloc 72939 thp_fault_fallback 60547 thp_collapse_alloc 603 thp_collapse_alloc_failed 0 thp_file_alloc 253763 thp_file_mapped 4251 thp_split_page 51518 thp_split_page_failed 1 thp_deferred_split_page 73566 thp_split_pmd 665 thp_zero_page_alloc 3 thp_zero_page_alloc_failed 0 [akpm@linux-foundation.org: remove unneeded parentheses, per Kirill] Link: http://lkml.kernel.org/r/20161113150025.17942-2-aneesh.kumar@linux.vnet.ibm.com Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Michael Ellerman <mpe@ellerman.id.au> Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Michael Neuling <mikey@neuling.org> Cc: Paul Mackerras <paulus@samba.org> Cc: Balbir Singh <bsingharora@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memory.c')
-rw-r--r--mm/memory.c60
1 files changed, 50 insertions, 10 deletions
diff --git a/mm/memory.c b/mm/memory.c
index 0a72f821ccdc..32e9b7aec366 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2935,6 +2935,19 @@ static inline bool transhuge_vma_suitable(struct vm_area_struct *vma,
2935 return true; 2935 return true;
2936} 2936}
2937 2937
2938static void deposit_prealloc_pte(struct fault_env *fe)
2939{
2940 struct vm_area_struct *vma = fe->vma;
2941
2942 pgtable_trans_huge_deposit(vma->vm_mm, fe->pmd, fe->prealloc_pte);
2943 /*
2944 * We are going to consume the prealloc table,
2945 * count that as nr_ptes.
2946 */
2947 atomic_long_inc(&vma->vm_mm->nr_ptes);
2948 fe->prealloc_pte = 0;
2949}
2950
2938static int do_set_pmd(struct fault_env *fe, struct page *page) 2951static int do_set_pmd(struct fault_env *fe, struct page *page)
2939{ 2952{
2940 struct vm_area_struct *vma = fe->vma; 2953 struct vm_area_struct *vma = fe->vma;
@@ -2949,6 +2962,17 @@ static int do_set_pmd(struct fault_env *fe, struct page *page)
2949 ret = VM_FAULT_FALLBACK; 2962 ret = VM_FAULT_FALLBACK;
2950 page = compound_head(page); 2963 page = compound_head(page);
2951 2964
2965 /*
2966 * Archs like ppc64 need additonal space to store information
2967 * related to pte entry. Use the preallocated table for that.
2968 */
2969 if (arch_needs_pgtable_deposit() && !fe->prealloc_pte) {
2970 fe->prealloc_pte = pte_alloc_one(vma->vm_mm, fe->address);
2971 if (!fe->prealloc_pte)
2972 return VM_FAULT_OOM;
2973 smp_wmb(); /* See comment in __pte_alloc() */
2974 }
2975
2952 fe->ptl = pmd_lock(vma->vm_mm, fe->pmd); 2976 fe->ptl = pmd_lock(vma->vm_mm, fe->pmd);
2953 if (unlikely(!pmd_none(*fe->pmd))) 2977 if (unlikely(!pmd_none(*fe->pmd)))
2954 goto out; 2978 goto out;
@@ -2962,6 +2986,11 @@ static int do_set_pmd(struct fault_env *fe, struct page *page)
2962 2986
2963 add_mm_counter(vma->vm_mm, MM_FILEPAGES, HPAGE_PMD_NR); 2987 add_mm_counter(vma->vm_mm, MM_FILEPAGES, HPAGE_PMD_NR);
2964 page_add_file_rmap(page, true); 2988 page_add_file_rmap(page, true);
2989 /*
2990 * deposit and withdraw with pmd lock held
2991 */
2992 if (arch_needs_pgtable_deposit())
2993 deposit_prealloc_pte(fe);
2965 2994
2966 set_pmd_at(vma->vm_mm, haddr, fe->pmd, entry); 2995 set_pmd_at(vma->vm_mm, haddr, fe->pmd, entry);
2967 2996
@@ -2971,6 +3000,13 @@ static int do_set_pmd(struct fault_env *fe, struct page *page)
2971 ret = 0; 3000 ret = 0;
2972 count_vm_event(THP_FILE_MAPPED); 3001 count_vm_event(THP_FILE_MAPPED);
2973out: 3002out:
3003 /*
3004 * If we are going to fallback to pte mapping, do a
3005 * withdraw with pmd lock held.
3006 */
3007 if (arch_needs_pgtable_deposit() && ret == VM_FAULT_FALLBACK)
3008 fe->prealloc_pte = pgtable_trans_huge_withdraw(vma->vm_mm,
3009 fe->pmd);
2974 spin_unlock(fe->ptl); 3010 spin_unlock(fe->ptl);
2975 return ret; 3011 return ret;
2976} 3012}
@@ -3010,18 +3046,20 @@ int alloc_set_pte(struct fault_env *fe, struct mem_cgroup *memcg,
3010 3046
3011 ret = do_set_pmd(fe, page); 3047 ret = do_set_pmd(fe, page);
3012 if (ret != VM_FAULT_FALLBACK) 3048 if (ret != VM_FAULT_FALLBACK)
3013 return ret; 3049 goto fault_handled;
3014 } 3050 }
3015 3051
3016 if (!fe->pte) { 3052 if (!fe->pte) {
3017 ret = pte_alloc_one_map(fe); 3053 ret = pte_alloc_one_map(fe);
3018 if (ret) 3054 if (ret)
3019 return ret; 3055 goto fault_handled;
3020 } 3056 }
3021 3057
3022 /* Re-check under ptl */ 3058 /* Re-check under ptl */
3023 if (unlikely(!pte_none(*fe->pte))) 3059 if (unlikely(!pte_none(*fe->pte))) {
3024 return VM_FAULT_NOPAGE; 3060 ret = VM_FAULT_NOPAGE;
3061 goto fault_handled;
3062 }
3025 3063
3026 flush_icache_page(vma, page); 3064 flush_icache_page(vma, page);
3027 entry = mk_pte(page, vma->vm_page_prot); 3065 entry = mk_pte(page, vma->vm_page_prot);
@@ -3041,8 +3079,15 @@ int alloc_set_pte(struct fault_env *fe, struct mem_cgroup *memcg,
3041 3079
3042 /* no need to invalidate: a not-present page won't be cached */ 3080 /* no need to invalidate: a not-present page won't be cached */
3043 update_mmu_cache(vma, fe->address, fe->pte); 3081 update_mmu_cache(vma, fe->address, fe->pte);
3082 ret = 0;
3044 3083
3045 return 0; 3084fault_handled:
3085 /* preallocated pagetable is unused: free it */
3086 if (fe->prealloc_pte) {
3087 pte_free(fe->vma->vm_mm, fe->prealloc_pte);
3088 fe->prealloc_pte = 0;
3089 }
3090 return ret;
3046} 3091}
3047 3092
3048static unsigned long fault_around_bytes __read_mostly = 3093static unsigned long fault_around_bytes __read_mostly =
@@ -3141,11 +3186,6 @@ static int do_fault_around(struct fault_env *fe, pgoff_t start_pgoff)
3141 3186
3142 fe->vma->vm_ops->map_pages(fe, start_pgoff, end_pgoff); 3187 fe->vma->vm_ops->map_pages(fe, start_pgoff, end_pgoff);
3143 3188
3144 /* preallocated pagetable is unused: free it */
3145 if (fe->prealloc_pte) {
3146 pte_free(fe->vma->vm_mm, fe->prealloc_pte);
3147 fe->prealloc_pte = 0;
3148 }
3149 /* Huge page is mapped? Page fault is solved */ 3189 /* Huge page is mapped? Page fault is solved */
3150 if (pmd_trans_huge(*fe->pmd)) { 3190 if (pmd_trans_huge(*fe->pmd)) {
3151 ret = VM_FAULT_NOPAGE; 3191 ret = VM_FAULT_NOPAGE;