aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memory.c')
-rw-r--r--mm/memory.c84
1 files changed, 75 insertions, 9 deletions
diff --git a/mm/memory.c b/mm/memory.c
index 60e1c68d8218..c50a195041ec 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -726,9 +726,9 @@ out_set_pte:
726 return 0; 726 return 0;
727} 727}
728 728
729static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, 729int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
730 pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma, 730 pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma,
731 unsigned long addr, unsigned long end) 731 unsigned long addr, unsigned long end)
732{ 732{
733 pte_t *orig_src_pte, *orig_dst_pte; 733 pte_t *orig_src_pte, *orig_dst_pte;
734 pte_t *src_pte, *dst_pte; 734 pte_t *src_pte, *dst_pte;
@@ -802,6 +802,16 @@ static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src
802 src_pmd = pmd_offset(src_pud, addr); 802 src_pmd = pmd_offset(src_pud, addr);
803 do { 803 do {
804 next = pmd_addr_end(addr, end); 804 next = pmd_addr_end(addr, end);
805 if (pmd_trans_huge(*src_pmd)) {
806 int err;
807 err = copy_huge_pmd(dst_mm, src_mm,
808 dst_pmd, src_pmd, addr, vma);
809 if (err == -ENOMEM)
810 return -ENOMEM;
811 if (!err)
812 continue;
813 /* fall through */
814 }
805 if (pmd_none_or_clear_bad(src_pmd)) 815 if (pmd_none_or_clear_bad(src_pmd))
806 continue; 816 continue;
807 if (copy_pte_range(dst_mm, src_mm, dst_pmd, src_pmd, 817 if (copy_pte_range(dst_mm, src_mm, dst_pmd, src_pmd,
@@ -1004,6 +1014,15 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb,
1004 pmd = pmd_offset(pud, addr); 1014 pmd = pmd_offset(pud, addr);
1005 do { 1015 do {
1006 next = pmd_addr_end(addr, end); 1016 next = pmd_addr_end(addr, end);
1017 if (pmd_trans_huge(*pmd)) {
1018 if (next-addr != HPAGE_PMD_SIZE)
1019 split_huge_page_pmd(vma->vm_mm, pmd);
1020 else if (zap_huge_pmd(tlb, vma, pmd)) {
1021 (*zap_work)--;
1022 continue;
1023 }
1024 /* fall through */
1025 }
1007 if (pmd_none_or_clear_bad(pmd)) { 1026 if (pmd_none_or_clear_bad(pmd)) {
1008 (*zap_work)--; 1027 (*zap_work)--;
1009 continue; 1028 continue;
@@ -1280,11 +1299,27 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
1280 pmd = pmd_offset(pud, address); 1299 pmd = pmd_offset(pud, address);
1281 if (pmd_none(*pmd)) 1300 if (pmd_none(*pmd))
1282 goto no_page_table; 1301 goto no_page_table;
1283 if (pmd_huge(*pmd)) { 1302 if (pmd_huge(*pmd) && vma->vm_flags & VM_HUGETLB) {
1284 BUG_ON(flags & FOLL_GET); 1303 BUG_ON(flags & FOLL_GET);
1285 page = follow_huge_pmd(mm, address, pmd, flags & FOLL_WRITE); 1304 page = follow_huge_pmd(mm, address, pmd, flags & FOLL_WRITE);
1286 goto out; 1305 goto out;
1287 } 1306 }
1307 if (pmd_trans_huge(*pmd)) {
1308 spin_lock(&mm->page_table_lock);
1309 if (likely(pmd_trans_huge(*pmd))) {
1310 if (unlikely(pmd_trans_splitting(*pmd))) {
1311 spin_unlock(&mm->page_table_lock);
1312 wait_split_huge_page(vma->anon_vma, pmd);
1313 } else {
1314 page = follow_trans_huge_pmd(mm, address,
1315 pmd, flags);
1316 spin_unlock(&mm->page_table_lock);
1317 goto out;
1318 }
1319 } else
1320 spin_unlock(&mm->page_table_lock);
1321 /* fall through */
1322 }
1288 if (unlikely(pmd_bad(*pmd))) 1323 if (unlikely(pmd_bad(*pmd)))
1289 goto no_page_table; 1324 goto no_page_table;
1290 1325
@@ -3179,9 +3214,9 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3179 * but allow concurrent faults), and pte mapped but not yet locked. 3214 * but allow concurrent faults), and pte mapped but not yet locked.
3180 * We return with mmap_sem still held, but pte unmapped and unlocked. 3215 * We return with mmap_sem still held, but pte unmapped and unlocked.
3181 */ 3216 */
3182static inline int handle_pte_fault(struct mm_struct *mm, 3217int handle_pte_fault(struct mm_struct *mm,
3183 struct vm_area_struct *vma, unsigned long address, 3218 struct vm_area_struct *vma, unsigned long address,
3184 pte_t *pte, pmd_t *pmd, unsigned int flags) 3219 pte_t *pte, pmd_t *pmd, unsigned int flags)
3185{ 3220{
3186 pte_t entry; 3221 pte_t entry;
3187 spinlock_t *ptl; 3222 spinlock_t *ptl;
@@ -3260,9 +3295,40 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
3260 pmd = pmd_alloc(mm, pud, address); 3295 pmd = pmd_alloc(mm, pud, address);
3261 if (!pmd) 3296 if (!pmd)
3262 return VM_FAULT_OOM; 3297 return VM_FAULT_OOM;
3263 pte = pte_alloc_map(mm, vma, pmd, address); 3298 if (pmd_none(*pmd) && transparent_hugepage_enabled(vma)) {
3264 if (!pte) 3299 if (!vma->vm_ops)
3300 return do_huge_pmd_anonymous_page(mm, vma, address,
3301 pmd, flags);
3302 } else {
3303 pmd_t orig_pmd = *pmd;
3304 barrier();
3305 if (pmd_trans_huge(orig_pmd)) {
3306 if (flags & FAULT_FLAG_WRITE &&
3307 !pmd_write(orig_pmd) &&
3308 !pmd_trans_splitting(orig_pmd))
3309 return do_huge_pmd_wp_page(mm, vma, address,
3310 pmd, orig_pmd);
3311 return 0;
3312 }
3313 }
3314
3315 /*
3316 * Use __pte_alloc instead of pte_alloc_map, because we can't
3317 * run pte_offset_map on the pmd, if an huge pmd could
3318 * materialize from under us from a different thread.
3319 */
3320 if (unlikely(__pte_alloc(mm, vma, pmd, address)))
3265 return VM_FAULT_OOM; 3321 return VM_FAULT_OOM;
3322 /* if an huge pmd materialized from under us just retry later */
3323 if (unlikely(pmd_trans_huge(*pmd)))
3324 return 0;
3325 /*
3326 * A regular pmd is established and it can't morph into a huge pmd
3327 * from under us anymore at this point because we hold the mmap_sem
3328 * read mode and khugepaged takes it in write mode. So now it's
3329 * safe to run pte_offset_map().
3330 */
3331 pte = pte_offset_map(pmd, address);
3266 3332
3267 return handle_pte_fault(mm, vma, address, pte, pmd, flags); 3333 return handle_pte_fault(mm, vma, address, pte, pmd, flags);
3268} 3334}