diff options
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 60 |
1 files changed, 50 insertions, 10 deletions
diff --git a/mm/memory.c b/mm/memory.c index 0a72f821ccdc..32e9b7aec366 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -2935,6 +2935,19 @@ static inline bool transhuge_vma_suitable(struct vm_area_struct *vma, | |||
2935 | return true; | 2935 | return true; |
2936 | } | 2936 | } |
2937 | 2937 | ||
2938 | static void deposit_prealloc_pte(struct fault_env *fe) | ||
2939 | { | ||
2940 | struct vm_area_struct *vma = fe->vma; | ||
2941 | |||
2942 | pgtable_trans_huge_deposit(vma->vm_mm, fe->pmd, fe->prealloc_pte); | ||
2943 | /* | ||
2944 | * We are going to consume the prealloc table, | ||
2945 | * count that as nr_ptes. | ||
2946 | */ | ||
2947 | atomic_long_inc(&vma->vm_mm->nr_ptes); | ||
2948 | fe->prealloc_pte = 0; | ||
2949 | } | ||
2950 | |||
2938 | static int do_set_pmd(struct fault_env *fe, struct page *page) | 2951 | static int do_set_pmd(struct fault_env *fe, struct page *page) |
2939 | { | 2952 | { |
2940 | struct vm_area_struct *vma = fe->vma; | 2953 | struct vm_area_struct *vma = fe->vma; |
@@ -2949,6 +2962,17 @@ static int do_set_pmd(struct fault_env *fe, struct page *page) | |||
2949 | ret = VM_FAULT_FALLBACK; | 2962 | ret = VM_FAULT_FALLBACK; |
2950 | page = compound_head(page); | 2963 | page = compound_head(page); |
2951 | 2964 | ||
2965 | /* | ||
2966 | * Archs like ppc64 need additonal space to store information | ||
2967 | * related to pte entry. Use the preallocated table for that. | ||
2968 | */ | ||
2969 | if (arch_needs_pgtable_deposit() && !fe->prealloc_pte) { | ||
2970 | fe->prealloc_pte = pte_alloc_one(vma->vm_mm, fe->address); | ||
2971 | if (!fe->prealloc_pte) | ||
2972 | return VM_FAULT_OOM; | ||
2973 | smp_wmb(); /* See comment in __pte_alloc() */ | ||
2974 | } | ||
2975 | |||
2952 | fe->ptl = pmd_lock(vma->vm_mm, fe->pmd); | 2976 | fe->ptl = pmd_lock(vma->vm_mm, fe->pmd); |
2953 | if (unlikely(!pmd_none(*fe->pmd))) | 2977 | if (unlikely(!pmd_none(*fe->pmd))) |
2954 | goto out; | 2978 | goto out; |
@@ -2962,6 +2986,11 @@ static int do_set_pmd(struct fault_env *fe, struct page *page) | |||
2962 | 2986 | ||
2963 | add_mm_counter(vma->vm_mm, MM_FILEPAGES, HPAGE_PMD_NR); | 2987 | add_mm_counter(vma->vm_mm, MM_FILEPAGES, HPAGE_PMD_NR); |
2964 | page_add_file_rmap(page, true); | 2988 | page_add_file_rmap(page, true); |
2989 | /* | ||
2990 | * deposit and withdraw with pmd lock held | ||
2991 | */ | ||
2992 | if (arch_needs_pgtable_deposit()) | ||
2993 | deposit_prealloc_pte(fe); | ||
2965 | 2994 | ||
2966 | set_pmd_at(vma->vm_mm, haddr, fe->pmd, entry); | 2995 | set_pmd_at(vma->vm_mm, haddr, fe->pmd, entry); |
2967 | 2996 | ||
@@ -2971,6 +3000,13 @@ static int do_set_pmd(struct fault_env *fe, struct page *page) | |||
2971 | ret = 0; | 3000 | ret = 0; |
2972 | count_vm_event(THP_FILE_MAPPED); | 3001 | count_vm_event(THP_FILE_MAPPED); |
2973 | out: | 3002 | out: |
3003 | /* | ||
3004 | * If we are going to fallback to pte mapping, do a | ||
3005 | * withdraw with pmd lock held. | ||
3006 | */ | ||
3007 | if (arch_needs_pgtable_deposit() && ret == VM_FAULT_FALLBACK) | ||
3008 | fe->prealloc_pte = pgtable_trans_huge_withdraw(vma->vm_mm, | ||
3009 | fe->pmd); | ||
2974 | spin_unlock(fe->ptl); | 3010 | spin_unlock(fe->ptl); |
2975 | return ret; | 3011 | return ret; |
2976 | } | 3012 | } |
@@ -3010,18 +3046,20 @@ int alloc_set_pte(struct fault_env *fe, struct mem_cgroup *memcg, | |||
3010 | 3046 | ||
3011 | ret = do_set_pmd(fe, page); | 3047 | ret = do_set_pmd(fe, page); |
3012 | if (ret != VM_FAULT_FALLBACK) | 3048 | if (ret != VM_FAULT_FALLBACK) |
3013 | return ret; | 3049 | goto fault_handled; |
3014 | } | 3050 | } |
3015 | 3051 | ||
3016 | if (!fe->pte) { | 3052 | if (!fe->pte) { |
3017 | ret = pte_alloc_one_map(fe); | 3053 | ret = pte_alloc_one_map(fe); |
3018 | if (ret) | 3054 | if (ret) |
3019 | return ret; | 3055 | goto fault_handled; |
3020 | } | 3056 | } |
3021 | 3057 | ||
3022 | /* Re-check under ptl */ | 3058 | /* Re-check under ptl */ |
3023 | if (unlikely(!pte_none(*fe->pte))) | 3059 | if (unlikely(!pte_none(*fe->pte))) { |
3024 | return VM_FAULT_NOPAGE; | 3060 | ret = VM_FAULT_NOPAGE; |
3061 | goto fault_handled; | ||
3062 | } | ||
3025 | 3063 | ||
3026 | flush_icache_page(vma, page); | 3064 | flush_icache_page(vma, page); |
3027 | entry = mk_pte(page, vma->vm_page_prot); | 3065 | entry = mk_pte(page, vma->vm_page_prot); |
@@ -3041,8 +3079,15 @@ int alloc_set_pte(struct fault_env *fe, struct mem_cgroup *memcg, | |||
3041 | 3079 | ||
3042 | /* no need to invalidate: a not-present page won't be cached */ | 3080 | /* no need to invalidate: a not-present page won't be cached */ |
3043 | update_mmu_cache(vma, fe->address, fe->pte); | 3081 | update_mmu_cache(vma, fe->address, fe->pte); |
3082 | ret = 0; | ||
3044 | 3083 | ||
3045 | return 0; | 3084 | fault_handled: |
3085 | /* preallocated pagetable is unused: free it */ | ||
3086 | if (fe->prealloc_pte) { | ||
3087 | pte_free(fe->vma->vm_mm, fe->prealloc_pte); | ||
3088 | fe->prealloc_pte = 0; | ||
3089 | } | ||
3090 | return ret; | ||
3046 | } | 3091 | } |
3047 | 3092 | ||
3048 | static unsigned long fault_around_bytes __read_mostly = | 3093 | static unsigned long fault_around_bytes __read_mostly = |
@@ -3141,11 +3186,6 @@ static int do_fault_around(struct fault_env *fe, pgoff_t start_pgoff) | |||
3141 | 3186 | ||
3142 | fe->vma->vm_ops->map_pages(fe, start_pgoff, end_pgoff); | 3187 | fe->vma->vm_ops->map_pages(fe, start_pgoff, end_pgoff); |
3143 | 3188 | ||
3144 | /* preallocated pagetable is unused: free it */ | ||
3145 | if (fe->prealloc_pte) { | ||
3146 | pte_free(fe->vma->vm_mm, fe->prealloc_pte); | ||
3147 | fe->prealloc_pte = 0; | ||
3148 | } | ||
3149 | /* Huge page is mapped? Page fault is solved */ | 3189 | /* Huge page is mapped? Page fault is solved */ |
3150 | if (pmd_trans_huge(*fe->pmd)) { | 3190 | if (pmd_trans_huge(*fe->pmd)) { |
3151 | ret = VM_FAULT_NOPAGE; | 3191 | ret = VM_FAULT_NOPAGE; |