aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memory.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memory.c')
-rw-r--r--mm/memory.c115
1 files changed, 65 insertions, 50 deletions
diff --git a/mm/memory.c b/mm/memory.c
index 57361708d1a5..fb135ba4aba9 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -712,7 +712,7 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr,
712 add_taint(TAINT_BAD_PAGE); 712 add_taint(TAINT_BAD_PAGE);
713} 713}
714 714
715static inline int is_cow_mapping(vm_flags_t flags) 715static inline bool is_cow_mapping(vm_flags_t flags)
716{ 716{
717 return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; 717 return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
718} 718}
@@ -1039,6 +1039,9 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
1039 unsigned long next; 1039 unsigned long next;
1040 unsigned long addr = vma->vm_start; 1040 unsigned long addr = vma->vm_start;
1041 unsigned long end = vma->vm_end; 1041 unsigned long end = vma->vm_end;
1042 unsigned long mmun_start; /* For mmu_notifiers */
1043 unsigned long mmun_end; /* For mmu_notifiers */
1044 bool is_cow;
1042 int ret; 1045 int ret;
1043 1046
1044 /* 1047 /*
@@ -1047,7 +1050,8 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
1047 * readonly mappings. The tradeoff is that copy_page_range is more 1050 * readonly mappings. The tradeoff is that copy_page_range is more
1048 * efficient than faulting. 1051 * efficient than faulting.
1049 */ 1052 */
1050 if (!(vma->vm_flags & (VM_HUGETLB|VM_NONLINEAR|VM_PFNMAP|VM_INSERTPAGE))) { 1053 if (!(vma->vm_flags & (VM_HUGETLB | VM_NONLINEAR |
1054 VM_PFNMAP | VM_MIXEDMAP))) {
1051 if (!vma->anon_vma) 1055 if (!vma->anon_vma)
1052 return 0; 1056 return 0;
1053 } 1057 }
@@ -1055,12 +1059,12 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
1055 if (is_vm_hugetlb_page(vma)) 1059 if (is_vm_hugetlb_page(vma))
1056 return copy_hugetlb_page_range(dst_mm, src_mm, vma); 1060 return copy_hugetlb_page_range(dst_mm, src_mm, vma);
1057 1061
1058 if (unlikely(is_pfn_mapping(vma))) { 1062 if (unlikely(vma->vm_flags & VM_PFNMAP)) {
1059 /* 1063 /*
1060 * We do not free on error cases below as remove_vma 1064 * We do not free on error cases below as remove_vma
1061 * gets called on error from higher level routine 1065 * gets called on error from higher level routine
1062 */ 1066 */
1063 ret = track_pfn_vma_copy(vma); 1067 ret = track_pfn_copy(vma);
1064 if (ret) 1068 if (ret)
1065 return ret; 1069 return ret;
1066 } 1070 }
@@ -1071,8 +1075,12 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
1071 * parent mm. And a permission downgrade will only happen if 1075 * parent mm. And a permission downgrade will only happen if
1072 * is_cow_mapping() returns true. 1076 * is_cow_mapping() returns true.
1073 */ 1077 */
1074 if (is_cow_mapping(vma->vm_flags)) 1078 is_cow = is_cow_mapping(vma->vm_flags);
1075 mmu_notifier_invalidate_range_start(src_mm, addr, end); 1079 mmun_start = addr;
1080 mmun_end = end;
1081 if (is_cow)
1082 mmu_notifier_invalidate_range_start(src_mm, mmun_start,
1083 mmun_end);
1076 1084
1077 ret = 0; 1085 ret = 0;
1078 dst_pgd = pgd_offset(dst_mm, addr); 1086 dst_pgd = pgd_offset(dst_mm, addr);
@@ -1088,9 +1096,8 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
1088 } 1096 }
1089 } while (dst_pgd++, src_pgd++, addr = next, addr != end); 1097 } while (dst_pgd++, src_pgd++, addr = next, addr != end);
1090 1098
1091 if (is_cow_mapping(vma->vm_flags)) 1099 if (is_cow)
1092 mmu_notifier_invalidate_range_end(src_mm, 1100 mmu_notifier_invalidate_range_end(src_mm, mmun_start, mmun_end);
1093 vma->vm_start, end);
1094 return ret; 1101 return ret;
1095} 1102}
1096 1103
@@ -1327,8 +1334,8 @@ static void unmap_single_vma(struct mmu_gather *tlb,
1327 if (vma->vm_file) 1334 if (vma->vm_file)
1328 uprobe_munmap(vma, start, end); 1335 uprobe_munmap(vma, start, end);
1329 1336
1330 if (unlikely(is_pfn_mapping(vma))) 1337 if (unlikely(vma->vm_flags & VM_PFNMAP))
1331 untrack_pfn_vma(vma, 0, 0); 1338 untrack_pfn(vma, 0, 0);
1332 1339
1333 if (start != end) { 1340 if (start != end) {
1334 if (unlikely(is_vm_hugetlb_page(vma))) { 1341 if (unlikely(is_vm_hugetlb_page(vma))) {
@@ -1521,7 +1528,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
1521 spin_unlock(&mm->page_table_lock); 1528 spin_unlock(&mm->page_table_lock);
1522 wait_split_huge_page(vma->anon_vma, pmd); 1529 wait_split_huge_page(vma->anon_vma, pmd);
1523 } else { 1530 } else {
1524 page = follow_trans_huge_pmd(mm, address, 1531 page = follow_trans_huge_pmd(vma, address,
1525 pmd, flags); 1532 pmd, flags);
1526 spin_unlock(&mm->page_table_lock); 1533 spin_unlock(&mm->page_table_lock);
1527 goto out; 1534 goto out;
@@ -1576,12 +1583,12 @@ split_fallthrough:
1576 if (page->mapping && trylock_page(page)) { 1583 if (page->mapping && trylock_page(page)) {
1577 lru_add_drain(); /* push cached pages to LRU */ 1584 lru_add_drain(); /* push cached pages to LRU */
1578 /* 1585 /*
1579 * Because we lock page here and migration is 1586 * Because we lock page here, and migration is
1580 * blocked by the pte's page reference, we need 1587 * blocked by the pte's page reference, and we
1581 * only check for file-cache page truncation. 1588 * know the page is still mapped, we don't even
1589 * need to check for file-cache page truncation.
1582 */ 1590 */
1583 if (page->mapping) 1591 mlock_vma_page(page);
1584 mlock_vma_page(page);
1585 unlock_page(page); 1592 unlock_page(page);
1586 } 1593 }
1587 } 1594 }
@@ -2085,6 +2092,11 @@ out:
2085 * ask for a shared writable mapping! 2092 * ask for a shared writable mapping!
2086 * 2093 *
2087 * The page does not need to be reserved. 2094 * The page does not need to be reserved.
2095 *
2096 * Usually this function is called from f_op->mmap() handler
2097 * under mm->mmap_sem write-lock, so it can change vma->vm_flags.
2098 * Caller must set VM_MIXEDMAP on vma if it wants to call this
2099 * function from other places, for example from page-fault handler.
2088 */ 2100 */
2089int vm_insert_page(struct vm_area_struct *vma, unsigned long addr, 2101int vm_insert_page(struct vm_area_struct *vma, unsigned long addr,
2090 struct page *page) 2102 struct page *page)
@@ -2093,7 +2105,11 @@ int vm_insert_page(struct vm_area_struct *vma, unsigned long addr,
2093 return -EFAULT; 2105 return -EFAULT;
2094 if (!page_count(page)) 2106 if (!page_count(page))
2095 return -EINVAL; 2107 return -EINVAL;
2096 vma->vm_flags |= VM_INSERTPAGE; 2108 if (!(vma->vm_flags & VM_MIXEDMAP)) {
2109 BUG_ON(down_read_trylock(&vma->vm_mm->mmap_sem));
2110 BUG_ON(vma->vm_flags & VM_PFNMAP);
2111 vma->vm_flags |= VM_MIXEDMAP;
2112 }
2097 return insert_page(vma, addr, page, vma->vm_page_prot); 2113 return insert_page(vma, addr, page, vma->vm_page_prot);
2098} 2114}
2099EXPORT_SYMBOL(vm_insert_page); 2115EXPORT_SYMBOL(vm_insert_page);
@@ -2132,7 +2148,7 @@ out:
2132 * @addr: target user address of this page 2148 * @addr: target user address of this page
2133 * @pfn: source kernel pfn 2149 * @pfn: source kernel pfn
2134 * 2150 *
2135 * Similar to vm_inert_page, this allows drivers to insert individual pages 2151 * Similar to vm_insert_page, this allows drivers to insert individual pages
2136 * they've allocated into a user vma. Same comments apply. 2152 * they've allocated into a user vma. Same comments apply.
2137 * 2153 *
2138 * This function should only be called from a vm_ops->fault handler, and 2154 * This function should only be called from a vm_ops->fault handler, and
@@ -2162,14 +2178,11 @@ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr,
2162 2178
2163 if (addr < vma->vm_start || addr >= vma->vm_end) 2179 if (addr < vma->vm_start || addr >= vma->vm_end)
2164 return -EFAULT; 2180 return -EFAULT;
2165 if (track_pfn_vma_new(vma, &pgprot, pfn, PAGE_SIZE)) 2181 if (track_pfn_insert(vma, &pgprot, pfn))
2166 return -EINVAL; 2182 return -EINVAL;
2167 2183
2168 ret = insert_pfn(vma, addr, pfn, pgprot); 2184 ret = insert_pfn(vma, addr, pfn, pgprot);
2169 2185
2170 if (ret)
2171 untrack_pfn_vma(vma, pfn, PAGE_SIZE);
2172
2173 return ret; 2186 return ret;
2174} 2187}
2175EXPORT_SYMBOL(vm_insert_pfn); 2188EXPORT_SYMBOL(vm_insert_pfn);
@@ -2290,37 +2303,30 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
2290 * rest of the world about it: 2303 * rest of the world about it:
2291 * VM_IO tells people not to look at these pages 2304 * VM_IO tells people not to look at these pages
2292 * (accesses can have side effects). 2305 * (accesses can have side effects).
2293 * VM_RESERVED is specified all over the place, because
2294 * in 2.4 it kept swapout's vma scan off this vma; but
2295 * in 2.6 the LRU scan won't even find its pages, so this
2296 * flag means no more than count its pages in reserved_vm,
2297 * and omit it from core dump, even when VM_IO turned off.
2298 * VM_PFNMAP tells the core MM that the base pages are just 2306 * VM_PFNMAP tells the core MM that the base pages are just
2299 * raw PFN mappings, and do not have a "struct page" associated 2307 * raw PFN mappings, and do not have a "struct page" associated
2300 * with them. 2308 * with them.
2309 * VM_DONTEXPAND
2310 * Disable vma merging and expanding with mremap().
2311 * VM_DONTDUMP
2312 * Omit vma from core dump, even when VM_IO turned off.
2301 * 2313 *
2302 * There's a horrible special case to handle copy-on-write 2314 * There's a horrible special case to handle copy-on-write
2303 * behaviour that some programs depend on. We mark the "original" 2315 * behaviour that some programs depend on. We mark the "original"
2304 * un-COW'ed pages by matching them up with "vma->vm_pgoff". 2316 * un-COW'ed pages by matching them up with "vma->vm_pgoff".
2317 * See vm_normal_page() for details.
2305 */ 2318 */
2306 if (addr == vma->vm_start && end == vma->vm_end) { 2319 if (is_cow_mapping(vma->vm_flags)) {
2320 if (addr != vma->vm_start || end != vma->vm_end)
2321 return -EINVAL;
2307 vma->vm_pgoff = pfn; 2322 vma->vm_pgoff = pfn;
2308 vma->vm_flags |= VM_PFN_AT_MMAP; 2323 }
2309 } else if (is_cow_mapping(vma->vm_flags))
2310 return -EINVAL;
2311
2312 vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP;
2313 2324
2314 err = track_pfn_vma_new(vma, &prot, pfn, PAGE_ALIGN(size)); 2325 err = track_pfn_remap(vma, &prot, pfn, addr, PAGE_ALIGN(size));
2315 if (err) { 2326 if (err)
2316 /*
2317 * To indicate that track_pfn related cleanup is not
2318 * needed from higher level routine calling unmap_vmas
2319 */
2320 vma->vm_flags &= ~(VM_IO | VM_RESERVED | VM_PFNMAP);
2321 vma->vm_flags &= ~VM_PFN_AT_MMAP;
2322 return -EINVAL; 2327 return -EINVAL;
2323 } 2328
2329 vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
2324 2330
2325 BUG_ON(addr >= end); 2331 BUG_ON(addr >= end);
2326 pfn -= addr >> PAGE_SHIFT; 2332 pfn -= addr >> PAGE_SHIFT;
@@ -2335,7 +2341,7 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
2335 } while (pgd++, addr = next, addr != end); 2341 } while (pgd++, addr = next, addr != end);
2336 2342
2337 if (err) 2343 if (err)
2338 untrack_pfn_vma(vma, pfn, PAGE_ALIGN(size)); 2344 untrack_pfn(vma, pfn, PAGE_ALIGN(size));
2339 2345
2340 return err; 2346 return err;
2341} 2347}
@@ -2516,11 +2522,14 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
2516 spinlock_t *ptl, pte_t orig_pte) 2522 spinlock_t *ptl, pte_t orig_pte)
2517 __releases(ptl) 2523 __releases(ptl)
2518{ 2524{
2519 struct page *old_page, *new_page; 2525 struct page *old_page, *new_page = NULL;
2520 pte_t entry; 2526 pte_t entry;
2521 int ret = 0; 2527 int ret = 0;
2522 int page_mkwrite = 0; 2528 int page_mkwrite = 0;
2523 struct page *dirty_page = NULL; 2529 struct page *dirty_page = NULL;
2530 unsigned long mmun_start; /* For mmu_notifiers */
2531 unsigned long mmun_end; /* For mmu_notifiers */
2532 bool mmun_called = false; /* For mmu_notifiers */
2524 2533
2525 old_page = vm_normal_page(vma, address, orig_pte); 2534 old_page = vm_normal_page(vma, address, orig_pte);
2526 if (!old_page) { 2535 if (!old_page) {
@@ -2698,6 +2707,11 @@ gotten:
2698 if (mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL)) 2707 if (mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))
2699 goto oom_free_new; 2708 goto oom_free_new;
2700 2709
2710 mmun_start = address & PAGE_MASK;
2711 mmun_end = (address & PAGE_MASK) + PAGE_SIZE;
2712 mmun_called = true;
2713 mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);
2714
2701 /* 2715 /*
2702 * Re-check the pte - we dropped the lock 2716 * Re-check the pte - we dropped the lock
2703 */ 2717 */
@@ -2764,6 +2778,8 @@ gotten:
2764 page_cache_release(new_page); 2778 page_cache_release(new_page);
2765unlock: 2779unlock:
2766 pte_unmap_unlock(page_table, ptl); 2780 pte_unmap_unlock(page_table, ptl);
2781 if (mmun_called)
2782 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
2767 if (old_page) { 2783 if (old_page) {
2768 /* 2784 /*
2769 * Don't let another task, with possibly unlocked vma, 2785 * Don't let another task, with possibly unlocked vma,
@@ -2801,14 +2817,13 @@ static void unmap_mapping_range_vma(struct vm_area_struct *vma,
2801 zap_page_range_single(vma, start_addr, end_addr - start_addr, details); 2817 zap_page_range_single(vma, start_addr, end_addr - start_addr, details);
2802} 2818}
2803 2819
2804static inline void unmap_mapping_range_tree(struct prio_tree_root *root, 2820static inline void unmap_mapping_range_tree(struct rb_root *root,
2805 struct zap_details *details) 2821 struct zap_details *details)
2806{ 2822{
2807 struct vm_area_struct *vma; 2823 struct vm_area_struct *vma;
2808 struct prio_tree_iter iter;
2809 pgoff_t vba, vea, zba, zea; 2824 pgoff_t vba, vea, zba, zea;
2810 2825
2811 vma_prio_tree_foreach(vma, &iter, root, 2826 vma_interval_tree_foreach(vma, root,
2812 details->first_index, details->last_index) { 2827 details->first_index, details->last_index) {
2813 2828
2814 vba = vma->vm_pgoff; 2829 vba = vma->vm_pgoff;
@@ -2839,7 +2854,7 @@ static inline void unmap_mapping_range_list(struct list_head *head,
2839 * across *all* the pages in each nonlinear VMA, not just the pages 2854 * across *all* the pages in each nonlinear VMA, not just the pages
2840 * whose virtual address lies outside the file truncation point. 2855 * whose virtual address lies outside the file truncation point.
2841 */ 2856 */
2842 list_for_each_entry(vma, head, shared.vm_set.list) { 2857 list_for_each_entry(vma, head, shared.nonlinear) {
2843 details->nonlinear_vma = vma; 2858 details->nonlinear_vma = vma;
2844 unmap_mapping_range_vma(vma, vma->vm_start, vma->vm_end, details); 2859 unmap_mapping_range_vma(vma, vma->vm_start, vma->vm_end, details);
2845 } 2860 }
@@ -2883,7 +2898,7 @@ void unmap_mapping_range(struct address_space *mapping,
2883 2898
2884 2899
2885 mutex_lock(&mapping->i_mmap_mutex); 2900 mutex_lock(&mapping->i_mmap_mutex);
2886 if (unlikely(!prio_tree_empty(&mapping->i_mmap))) 2901 if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap)))
2887 unmap_mapping_range_tree(&mapping->i_mmap, &details); 2902 unmap_mapping_range_tree(&mapping->i_mmap, &details);
2888 if (unlikely(!list_empty(&mapping->i_mmap_nonlinear))) 2903 if (unlikely(!list_empty(&mapping->i_mmap_nonlinear)))
2889 unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details); 2904 unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details);