diff options
Diffstat (limited to 'mm/memory.c')
-rw-r--r-- | mm/memory.c | 115 |
1 files changed, 65 insertions, 50 deletions
diff --git a/mm/memory.c b/mm/memory.c index 57361708d1a5..fb135ba4aba9 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -712,7 +712,7 @@ static void print_bad_pte(struct vm_area_struct *vma, unsigned long addr, | |||
712 | add_taint(TAINT_BAD_PAGE); | 712 | add_taint(TAINT_BAD_PAGE); |
713 | } | 713 | } |
714 | 714 | ||
715 | static inline int is_cow_mapping(vm_flags_t flags) | 715 | static inline bool is_cow_mapping(vm_flags_t flags) |
716 | { | 716 | { |
717 | return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; | 717 | return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; |
718 | } | 718 | } |
@@ -1039,6 +1039,9 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
1039 | unsigned long next; | 1039 | unsigned long next; |
1040 | unsigned long addr = vma->vm_start; | 1040 | unsigned long addr = vma->vm_start; |
1041 | unsigned long end = vma->vm_end; | 1041 | unsigned long end = vma->vm_end; |
1042 | unsigned long mmun_start; /* For mmu_notifiers */ | ||
1043 | unsigned long mmun_end; /* For mmu_notifiers */ | ||
1044 | bool is_cow; | ||
1042 | int ret; | 1045 | int ret; |
1043 | 1046 | ||
1044 | /* | 1047 | /* |
@@ -1047,7 +1050,8 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
1047 | * readonly mappings. The tradeoff is that copy_page_range is more | 1050 | * readonly mappings. The tradeoff is that copy_page_range is more |
1048 | * efficient than faulting. | 1051 | * efficient than faulting. |
1049 | */ | 1052 | */ |
1050 | if (!(vma->vm_flags & (VM_HUGETLB|VM_NONLINEAR|VM_PFNMAP|VM_INSERTPAGE))) { | 1053 | if (!(vma->vm_flags & (VM_HUGETLB | VM_NONLINEAR | |
1054 | VM_PFNMAP | VM_MIXEDMAP))) { | ||
1051 | if (!vma->anon_vma) | 1055 | if (!vma->anon_vma) |
1052 | return 0; | 1056 | return 0; |
1053 | } | 1057 | } |
@@ -1055,12 +1059,12 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
1055 | if (is_vm_hugetlb_page(vma)) | 1059 | if (is_vm_hugetlb_page(vma)) |
1056 | return copy_hugetlb_page_range(dst_mm, src_mm, vma); | 1060 | return copy_hugetlb_page_range(dst_mm, src_mm, vma); |
1057 | 1061 | ||
1058 | if (unlikely(is_pfn_mapping(vma))) { | 1062 | if (unlikely(vma->vm_flags & VM_PFNMAP)) { |
1059 | /* | 1063 | /* |
1060 | * We do not free on error cases below as remove_vma | 1064 | * We do not free on error cases below as remove_vma |
1061 | * gets called on error from higher level routine | 1065 | * gets called on error from higher level routine |
1062 | */ | 1066 | */ |
1063 | ret = track_pfn_vma_copy(vma); | 1067 | ret = track_pfn_copy(vma); |
1064 | if (ret) | 1068 | if (ret) |
1065 | return ret; | 1069 | return ret; |
1066 | } | 1070 | } |
@@ -1071,8 +1075,12 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
1071 | * parent mm. And a permission downgrade will only happen if | 1075 | * parent mm. And a permission downgrade will only happen if |
1072 | * is_cow_mapping() returns true. | 1076 | * is_cow_mapping() returns true. |
1073 | */ | 1077 | */ |
1074 | if (is_cow_mapping(vma->vm_flags)) | 1078 | is_cow = is_cow_mapping(vma->vm_flags); |
1075 | mmu_notifier_invalidate_range_start(src_mm, addr, end); | 1079 | mmun_start = addr; |
1080 | mmun_end = end; | ||
1081 | if (is_cow) | ||
1082 | mmu_notifier_invalidate_range_start(src_mm, mmun_start, | ||
1083 | mmun_end); | ||
1076 | 1084 | ||
1077 | ret = 0; | 1085 | ret = 0; |
1078 | dst_pgd = pgd_offset(dst_mm, addr); | 1086 | dst_pgd = pgd_offset(dst_mm, addr); |
@@ -1088,9 +1096,8 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, | |||
1088 | } | 1096 | } |
1089 | } while (dst_pgd++, src_pgd++, addr = next, addr != end); | 1097 | } while (dst_pgd++, src_pgd++, addr = next, addr != end); |
1090 | 1098 | ||
1091 | if (is_cow_mapping(vma->vm_flags)) | 1099 | if (is_cow) |
1092 | mmu_notifier_invalidate_range_end(src_mm, | 1100 | mmu_notifier_invalidate_range_end(src_mm, mmun_start, mmun_end); |
1093 | vma->vm_start, end); | ||
1094 | return ret; | 1101 | return ret; |
1095 | } | 1102 | } |
1096 | 1103 | ||
@@ -1327,8 +1334,8 @@ static void unmap_single_vma(struct mmu_gather *tlb, | |||
1327 | if (vma->vm_file) | 1334 | if (vma->vm_file) |
1328 | uprobe_munmap(vma, start, end); | 1335 | uprobe_munmap(vma, start, end); |
1329 | 1336 | ||
1330 | if (unlikely(is_pfn_mapping(vma))) | 1337 | if (unlikely(vma->vm_flags & VM_PFNMAP)) |
1331 | untrack_pfn_vma(vma, 0, 0); | 1338 | untrack_pfn(vma, 0, 0); |
1332 | 1339 | ||
1333 | if (start != end) { | 1340 | if (start != end) { |
1334 | if (unlikely(is_vm_hugetlb_page(vma))) { | 1341 | if (unlikely(is_vm_hugetlb_page(vma))) { |
@@ -1521,7 +1528,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address, | |||
1521 | spin_unlock(&mm->page_table_lock); | 1528 | spin_unlock(&mm->page_table_lock); |
1522 | wait_split_huge_page(vma->anon_vma, pmd); | 1529 | wait_split_huge_page(vma->anon_vma, pmd); |
1523 | } else { | 1530 | } else { |
1524 | page = follow_trans_huge_pmd(mm, address, | 1531 | page = follow_trans_huge_pmd(vma, address, |
1525 | pmd, flags); | 1532 | pmd, flags); |
1526 | spin_unlock(&mm->page_table_lock); | 1533 | spin_unlock(&mm->page_table_lock); |
1527 | goto out; | 1534 | goto out; |
@@ -1576,12 +1583,12 @@ split_fallthrough: | |||
1576 | if (page->mapping && trylock_page(page)) { | 1583 | if (page->mapping && trylock_page(page)) { |
1577 | lru_add_drain(); /* push cached pages to LRU */ | 1584 | lru_add_drain(); /* push cached pages to LRU */ |
1578 | /* | 1585 | /* |
1579 | * Because we lock page here and migration is | 1586 | * Because we lock page here, and migration is |
1580 | * blocked by the pte's page reference, we need | 1587 | * blocked by the pte's page reference, and we |
1581 | * only check for file-cache page truncation. | 1588 | * know the page is still mapped, we don't even |
1589 | * need to check for file-cache page truncation. | ||
1582 | */ | 1590 | */ |
1583 | if (page->mapping) | 1591 | mlock_vma_page(page); |
1584 | mlock_vma_page(page); | ||
1585 | unlock_page(page); | 1592 | unlock_page(page); |
1586 | } | 1593 | } |
1587 | } | 1594 | } |
@@ -2085,6 +2092,11 @@ out: | |||
2085 | * ask for a shared writable mapping! | 2092 | * ask for a shared writable mapping! |
2086 | * | 2093 | * |
2087 | * The page does not need to be reserved. | 2094 | * The page does not need to be reserved. |
2095 | * | ||
2096 | * Usually this function is called from f_op->mmap() handler | ||
2097 | * under mm->mmap_sem write-lock, so it can change vma->vm_flags. | ||
2098 | * Caller must set VM_MIXEDMAP on vma if it wants to call this | ||
2099 | * function from other places, for example from page-fault handler. | ||
2088 | */ | 2100 | */ |
2089 | int vm_insert_page(struct vm_area_struct *vma, unsigned long addr, | 2101 | int vm_insert_page(struct vm_area_struct *vma, unsigned long addr, |
2090 | struct page *page) | 2102 | struct page *page) |
@@ -2093,7 +2105,11 @@ int vm_insert_page(struct vm_area_struct *vma, unsigned long addr, | |||
2093 | return -EFAULT; | 2105 | return -EFAULT; |
2094 | if (!page_count(page)) | 2106 | if (!page_count(page)) |
2095 | return -EINVAL; | 2107 | return -EINVAL; |
2096 | vma->vm_flags |= VM_INSERTPAGE; | 2108 | if (!(vma->vm_flags & VM_MIXEDMAP)) { |
2109 | BUG_ON(down_read_trylock(&vma->vm_mm->mmap_sem)); | ||
2110 | BUG_ON(vma->vm_flags & VM_PFNMAP); | ||
2111 | vma->vm_flags |= VM_MIXEDMAP; | ||
2112 | } | ||
2097 | return insert_page(vma, addr, page, vma->vm_page_prot); | 2113 | return insert_page(vma, addr, page, vma->vm_page_prot); |
2098 | } | 2114 | } |
2099 | EXPORT_SYMBOL(vm_insert_page); | 2115 | EXPORT_SYMBOL(vm_insert_page); |
@@ -2132,7 +2148,7 @@ out: | |||
2132 | * @addr: target user address of this page | 2148 | * @addr: target user address of this page |
2133 | * @pfn: source kernel pfn | 2149 | * @pfn: source kernel pfn |
2134 | * | 2150 | * |
2135 | * Similar to vm_inert_page, this allows drivers to insert individual pages | 2151 | * Similar to vm_insert_page, this allows drivers to insert individual pages |
2136 | * they've allocated into a user vma. Same comments apply. | 2152 | * they've allocated into a user vma. Same comments apply. |
2137 | * | 2153 | * |
2138 | * This function should only be called from a vm_ops->fault handler, and | 2154 | * This function should only be called from a vm_ops->fault handler, and |
@@ -2162,14 +2178,11 @@ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, | |||
2162 | 2178 | ||
2163 | if (addr < vma->vm_start || addr >= vma->vm_end) | 2179 | if (addr < vma->vm_start || addr >= vma->vm_end) |
2164 | return -EFAULT; | 2180 | return -EFAULT; |
2165 | if (track_pfn_vma_new(vma, &pgprot, pfn, PAGE_SIZE)) | 2181 | if (track_pfn_insert(vma, &pgprot, pfn)) |
2166 | return -EINVAL; | 2182 | return -EINVAL; |
2167 | 2183 | ||
2168 | ret = insert_pfn(vma, addr, pfn, pgprot); | 2184 | ret = insert_pfn(vma, addr, pfn, pgprot); |
2169 | 2185 | ||
2170 | if (ret) | ||
2171 | untrack_pfn_vma(vma, pfn, PAGE_SIZE); | ||
2172 | |||
2173 | return ret; | 2186 | return ret; |
2174 | } | 2187 | } |
2175 | EXPORT_SYMBOL(vm_insert_pfn); | 2188 | EXPORT_SYMBOL(vm_insert_pfn); |
@@ -2290,37 +2303,30 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, | |||
2290 | * rest of the world about it: | 2303 | * rest of the world about it: |
2291 | * VM_IO tells people not to look at these pages | 2304 | * VM_IO tells people not to look at these pages |
2292 | * (accesses can have side effects). | 2305 | * (accesses can have side effects). |
2293 | * VM_RESERVED is specified all over the place, because | ||
2294 | * in 2.4 it kept swapout's vma scan off this vma; but | ||
2295 | * in 2.6 the LRU scan won't even find its pages, so this | ||
2296 | * flag means no more than count its pages in reserved_vm, | ||
2297 | * and omit it from core dump, even when VM_IO turned off. | ||
2298 | * VM_PFNMAP tells the core MM that the base pages are just | 2306 | * VM_PFNMAP tells the core MM that the base pages are just |
2299 | * raw PFN mappings, and do not have a "struct page" associated | 2307 | * raw PFN mappings, and do not have a "struct page" associated |
2300 | * with them. | 2308 | * with them. |
2309 | * VM_DONTEXPAND | ||
2310 | * Disable vma merging and expanding with mremap(). | ||
2311 | * VM_DONTDUMP | ||
2312 | * Omit vma from core dump, even when VM_IO turned off. | ||
2301 | * | 2313 | * |
2302 | * There's a horrible special case to handle copy-on-write | 2314 | * There's a horrible special case to handle copy-on-write |
2303 | * behaviour that some programs depend on. We mark the "original" | 2315 | * behaviour that some programs depend on. We mark the "original" |
2304 | * un-COW'ed pages by matching them up with "vma->vm_pgoff". | 2316 | * un-COW'ed pages by matching them up with "vma->vm_pgoff". |
2317 | * See vm_normal_page() for details. | ||
2305 | */ | 2318 | */ |
2306 | if (addr == vma->vm_start && end == vma->vm_end) { | 2319 | if (is_cow_mapping(vma->vm_flags)) { |
2320 | if (addr != vma->vm_start || end != vma->vm_end) | ||
2321 | return -EINVAL; | ||
2307 | vma->vm_pgoff = pfn; | 2322 | vma->vm_pgoff = pfn; |
2308 | vma->vm_flags |= VM_PFN_AT_MMAP; | 2323 | } |
2309 | } else if (is_cow_mapping(vma->vm_flags)) | ||
2310 | return -EINVAL; | ||
2311 | |||
2312 | vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; | ||
2313 | 2324 | ||
2314 | err = track_pfn_vma_new(vma, &prot, pfn, PAGE_ALIGN(size)); | 2325 | err = track_pfn_remap(vma, &prot, pfn, addr, PAGE_ALIGN(size)); |
2315 | if (err) { | 2326 | if (err) |
2316 | /* | ||
2317 | * To indicate that track_pfn related cleanup is not | ||
2318 | * needed from higher level routine calling unmap_vmas | ||
2319 | */ | ||
2320 | vma->vm_flags &= ~(VM_IO | VM_RESERVED | VM_PFNMAP); | ||
2321 | vma->vm_flags &= ~VM_PFN_AT_MMAP; | ||
2322 | return -EINVAL; | 2327 | return -EINVAL; |
2323 | } | 2328 | |
2329 | vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP; | ||
2324 | 2330 | ||
2325 | BUG_ON(addr >= end); | 2331 | BUG_ON(addr >= end); |
2326 | pfn -= addr >> PAGE_SHIFT; | 2332 | pfn -= addr >> PAGE_SHIFT; |
@@ -2335,7 +2341,7 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, | |||
2335 | } while (pgd++, addr = next, addr != end); | 2341 | } while (pgd++, addr = next, addr != end); |
2336 | 2342 | ||
2337 | if (err) | 2343 | if (err) |
2338 | untrack_pfn_vma(vma, pfn, PAGE_ALIGN(size)); | 2344 | untrack_pfn(vma, pfn, PAGE_ALIGN(size)); |
2339 | 2345 | ||
2340 | return err; | 2346 | return err; |
2341 | } | 2347 | } |
@@ -2516,11 +2522,14 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, | |||
2516 | spinlock_t *ptl, pte_t orig_pte) | 2522 | spinlock_t *ptl, pte_t orig_pte) |
2517 | __releases(ptl) | 2523 | __releases(ptl) |
2518 | { | 2524 | { |
2519 | struct page *old_page, *new_page; | 2525 | struct page *old_page, *new_page = NULL; |
2520 | pte_t entry; | 2526 | pte_t entry; |
2521 | int ret = 0; | 2527 | int ret = 0; |
2522 | int page_mkwrite = 0; | 2528 | int page_mkwrite = 0; |
2523 | struct page *dirty_page = NULL; | 2529 | struct page *dirty_page = NULL; |
2530 | unsigned long mmun_start; /* For mmu_notifiers */ | ||
2531 | unsigned long mmun_end; /* For mmu_notifiers */ | ||
2532 | bool mmun_called = false; /* For mmu_notifiers */ | ||
2524 | 2533 | ||
2525 | old_page = vm_normal_page(vma, address, orig_pte); | 2534 | old_page = vm_normal_page(vma, address, orig_pte); |
2526 | if (!old_page) { | 2535 | if (!old_page) { |
@@ -2698,6 +2707,11 @@ gotten: | |||
2698 | if (mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL)) | 2707 | if (mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL)) |
2699 | goto oom_free_new; | 2708 | goto oom_free_new; |
2700 | 2709 | ||
2710 | mmun_start = address & PAGE_MASK; | ||
2711 | mmun_end = (address & PAGE_MASK) + PAGE_SIZE; | ||
2712 | mmun_called = true; | ||
2713 | mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); | ||
2714 | |||
2701 | /* | 2715 | /* |
2702 | * Re-check the pte - we dropped the lock | 2716 | * Re-check the pte - we dropped the lock |
2703 | */ | 2717 | */ |
@@ -2764,6 +2778,8 @@ gotten: | |||
2764 | page_cache_release(new_page); | 2778 | page_cache_release(new_page); |
2765 | unlock: | 2779 | unlock: |
2766 | pte_unmap_unlock(page_table, ptl); | 2780 | pte_unmap_unlock(page_table, ptl); |
2781 | if (mmun_called) | ||
2782 | mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); | ||
2767 | if (old_page) { | 2783 | if (old_page) { |
2768 | /* | 2784 | /* |
2769 | * Don't let another task, with possibly unlocked vma, | 2785 | * Don't let another task, with possibly unlocked vma, |
@@ -2801,14 +2817,13 @@ static void unmap_mapping_range_vma(struct vm_area_struct *vma, | |||
2801 | zap_page_range_single(vma, start_addr, end_addr - start_addr, details); | 2817 | zap_page_range_single(vma, start_addr, end_addr - start_addr, details); |
2802 | } | 2818 | } |
2803 | 2819 | ||
2804 | static inline void unmap_mapping_range_tree(struct prio_tree_root *root, | 2820 | static inline void unmap_mapping_range_tree(struct rb_root *root, |
2805 | struct zap_details *details) | 2821 | struct zap_details *details) |
2806 | { | 2822 | { |
2807 | struct vm_area_struct *vma; | 2823 | struct vm_area_struct *vma; |
2808 | struct prio_tree_iter iter; | ||
2809 | pgoff_t vba, vea, zba, zea; | 2824 | pgoff_t vba, vea, zba, zea; |
2810 | 2825 | ||
2811 | vma_prio_tree_foreach(vma, &iter, root, | 2826 | vma_interval_tree_foreach(vma, root, |
2812 | details->first_index, details->last_index) { | 2827 | details->first_index, details->last_index) { |
2813 | 2828 | ||
2814 | vba = vma->vm_pgoff; | 2829 | vba = vma->vm_pgoff; |
@@ -2839,7 +2854,7 @@ static inline void unmap_mapping_range_list(struct list_head *head, | |||
2839 | * across *all* the pages in each nonlinear VMA, not just the pages | 2854 | * across *all* the pages in each nonlinear VMA, not just the pages |
2840 | * whose virtual address lies outside the file truncation point. | 2855 | * whose virtual address lies outside the file truncation point. |
2841 | */ | 2856 | */ |
2842 | list_for_each_entry(vma, head, shared.vm_set.list) { | 2857 | list_for_each_entry(vma, head, shared.nonlinear) { |
2843 | details->nonlinear_vma = vma; | 2858 | details->nonlinear_vma = vma; |
2844 | unmap_mapping_range_vma(vma, vma->vm_start, vma->vm_end, details); | 2859 | unmap_mapping_range_vma(vma, vma->vm_start, vma->vm_end, details); |
2845 | } | 2860 | } |
@@ -2883,7 +2898,7 @@ void unmap_mapping_range(struct address_space *mapping, | |||
2883 | 2898 | ||
2884 | 2899 | ||
2885 | mutex_lock(&mapping->i_mmap_mutex); | 2900 | mutex_lock(&mapping->i_mmap_mutex); |
2886 | if (unlikely(!prio_tree_empty(&mapping->i_mmap))) | 2901 | if (unlikely(!RB_EMPTY_ROOT(&mapping->i_mmap))) |
2887 | unmap_mapping_range_tree(&mapping->i_mmap, &details); | 2902 | unmap_mapping_range_tree(&mapping->i_mmap, &details); |
2888 | if (unlikely(!list_empty(&mapping->i_mmap_nonlinear))) | 2903 | if (unlikely(!list_empty(&mapping->i_mmap_nonlinear))) |
2889 | unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details); | 2904 | unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details); |