diff options
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 84 |
1 files changed, 75 insertions, 9 deletions
diff --git a/mm/memory.c b/mm/memory.c index 60e1c68d8218..c50a195041ec 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -726,9 +726,9 @@ out_set_pte: | |||
726 | return 0; | 726 | return 0; |
727 | } | 727 | } |
728 | 728 | ||
729 | static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, | 729 | int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, |
730 | pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma, | 730 | pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma, |
731 | unsigned long addr, unsigned long end) | 731 | unsigned long addr, unsigned long end) |
732 | { | 732 | { |
733 | pte_t *orig_src_pte, *orig_dst_pte; | 733 | pte_t *orig_src_pte, *orig_dst_pte; |
734 | pte_t *src_pte, *dst_pte; | 734 | pte_t *src_pte, *dst_pte; |
@@ -802,6 +802,16 @@ static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src | |||
802 | src_pmd = pmd_offset(src_pud, addr); | 802 | src_pmd = pmd_offset(src_pud, addr); |
803 | do { | 803 | do { |
804 | next = pmd_addr_end(addr, end); | 804 | next = pmd_addr_end(addr, end); |
805 | if (pmd_trans_huge(*src_pmd)) { | ||
806 | int err; | ||
807 | err = copy_huge_pmd(dst_mm, src_mm, | ||
808 | dst_pmd, src_pmd, addr, vma); | ||
809 | if (err == -ENOMEM) | ||
810 | return -ENOMEM; | ||
811 | if (!err) | ||
812 | continue; | ||
813 | /* fall through */ | ||
814 | } | ||
805 | if (pmd_none_or_clear_bad(src_pmd)) | 815 | if (pmd_none_or_clear_bad(src_pmd)) |
806 | continue; | 816 | continue; |
807 | if (copy_pte_range(dst_mm, src_mm, dst_pmd, src_pmd, | 817 | if (copy_pte_range(dst_mm, src_mm, dst_pmd, src_pmd, |
@@ -1004,6 +1014,15 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb, | |||
1004 | pmd = pmd_offset(pud, addr); | 1014 | pmd = pmd_offset(pud, addr); |
1005 | do { | 1015 | do { |
1006 | next = pmd_addr_end(addr, end); | 1016 | next = pmd_addr_end(addr, end); |
1017 | if (pmd_trans_huge(*pmd)) { | ||
1018 | if (next-addr != HPAGE_PMD_SIZE) | ||
1019 | split_huge_page_pmd(vma->vm_mm, pmd); | ||
1020 | else if (zap_huge_pmd(tlb, vma, pmd)) { | ||
1021 | (*zap_work)--; | ||
1022 | continue; | ||
1023 | } | ||
1024 | /* fall through */ | ||
1025 | } | ||
1007 | if (pmd_none_or_clear_bad(pmd)) { | 1026 | if (pmd_none_or_clear_bad(pmd)) { |
1008 | (*zap_work)--; | 1027 | (*zap_work)--; |
1009 | continue; | 1028 | continue; |
@@ -1280,11 +1299,27 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, | |||
1280 | pmd = pmd_offset(pud, address); | 1299 | pmd = pmd_offset(pud, address); |
1281 | if (pmd_none(*pmd)) | 1300 | if (pmd_none(*pmd)) |
1282 | goto no_page_table; | 1301 | goto no_page_table; |
1283 | if (pmd_huge(*pmd)) { | 1302 | if (pmd_huge(*pmd) && vma->vm_flags & VM_HUGETLB) { |
1284 | BUG_ON(flags & FOLL_GET); | 1303 | BUG_ON(flags & FOLL_GET); |
1285 | page = follow_huge_pmd(mm, address, pmd, flags & FOLL_WRITE); | 1304 | page = follow_huge_pmd(mm, address, pmd, flags & FOLL_WRITE); |
1286 | goto out; | 1305 | goto out; |
1287 | } | 1306 | } |
1307 | if (pmd_trans_huge(*pmd)) { | ||
1308 | spin_lock(&mm->page_table_lock); | ||
1309 | if (likely(pmd_trans_huge(*pmd))) { | ||
1310 | if (unlikely(pmd_trans_splitting(*pmd))) { | ||
1311 | spin_unlock(&mm->page_table_lock); | ||
1312 | wait_split_huge_page(vma->anon_vma, pmd); | ||
1313 | } else { | ||
1314 | page = follow_trans_huge_pmd(mm, address, | ||
1315 | pmd, flags); | ||
1316 | spin_unlock(&mm->page_table_lock); | ||
1317 | goto out; | ||
1318 | } | ||
1319 | } else | ||
1320 | spin_unlock(&mm->page_table_lock); | ||
1321 | /* fall through */ | ||
1322 | } | ||
1288 | if (unlikely(pmd_bad(*pmd))) | 1323 | if (unlikely(pmd_bad(*pmd))) |
1289 | goto no_page_table; | 1324 | goto no_page_table; |
1290 | 1325 | ||
@@ -3179,9 +3214,9 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3179 | * but allow concurrent faults), and pte mapped but not yet locked. | 3214 | * but allow concurrent faults), and pte mapped but not yet locked. |
3180 | * We return with mmap_sem still held, but pte unmapped and unlocked. | 3215 | * We return with mmap_sem still held, but pte unmapped and unlocked. |
3181 | */ | 3216 | */ |
3182 | static inline int handle_pte_fault(struct mm_struct *mm, | 3217 | int handle_pte_fault(struct mm_struct *mm, |
3183 | struct vm_area_struct *vma, unsigned long address, | 3218 | struct vm_area_struct *vma, unsigned long address, |
3184 | pte_t *pte, pmd_t *pmd, unsigned int flags) | 3219 | pte_t *pte, pmd_t *pmd, unsigned int flags) |
3185 | { | 3220 | { |
3186 | pte_t entry; | 3221 | pte_t entry; |
3187 | spinlock_t *ptl; | 3222 | spinlock_t *ptl; |
@@ -3260,9 +3295,40 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, | |||
3260 | pmd = pmd_alloc(mm, pud, address); | 3295 | pmd = pmd_alloc(mm, pud, address); |
3261 | if (!pmd) | 3296 | if (!pmd) |
3262 | return VM_FAULT_OOM; | 3297 | return VM_FAULT_OOM; |
3263 | pte = pte_alloc_map(mm, vma, pmd, address); | 3298 | if (pmd_none(*pmd) && transparent_hugepage_enabled(vma)) { |
3264 | if (!pte) | 3299 | if (!vma->vm_ops) |
3300 | return do_huge_pmd_anonymous_page(mm, vma, address, | ||
3301 | pmd, flags); | ||
3302 | } else { | ||
3303 | pmd_t orig_pmd = *pmd; | ||
3304 | barrier(); | ||
3305 | if (pmd_trans_huge(orig_pmd)) { | ||
3306 | if (flags & FAULT_FLAG_WRITE && | ||
3307 | !pmd_write(orig_pmd) && | ||
3308 | !pmd_trans_splitting(orig_pmd)) | ||
3309 | return do_huge_pmd_wp_page(mm, vma, address, | ||
3310 | pmd, orig_pmd); | ||
3311 | return 0; | ||
3312 | } | ||
3313 | } | ||
3314 | |||
3315 | /* | ||
3316 | * Use __pte_alloc instead of pte_alloc_map, because we can't | ||
3317 | * run pte_offset_map on the pmd, if an huge pmd could | ||
3318 | * materialize from under us from a different thread. | ||
3319 | */ | ||
3320 | if (unlikely(__pte_alloc(mm, vma, pmd, address))) | ||
3265 | return VM_FAULT_OOM; | 3321 | return VM_FAULT_OOM; |
3322 | /* if an huge pmd materialized from under us just retry later */ | ||
3323 | if (unlikely(pmd_trans_huge(*pmd))) | ||
3324 | return 0; | ||
3325 | /* | ||
3326 | * A regular pmd is established and it can't morph into a huge pmd | ||
3327 | * from under us anymore at this point because we hold the mmap_sem | ||
3328 | * read mode and khugepaged takes it in write mode. So now it's | ||
3329 | * safe to run pte_offset_map(). | ||
3330 | */ | ||
3331 | pte = pte_offset_map(pmd, address); | ||
3266 | 3332 | ||
3267 | return handle_pte_fault(mm, vma, address, pte, pmd, flags); | 3333 | return handle_pte_fault(mm, vma, address, pte, pmd, flags); |
3268 | } | 3334 | } |