diff options
-rw-r--r-- | fs/exec.c | 2 | ||||
-rw-r--r-- | include/linux/mm.h | 6 | ||||
-rw-r--r-- | mm/mmap.c | 7 | ||||
-rw-r--r-- | mm/mremap.c | 57 |
4 files changed, 49 insertions, 23 deletions
@@ -603,7 +603,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) | |||
603 | * process cleanup to remove whatever mess we made. | 603 | * process cleanup to remove whatever mess we made. |
604 | */ | 604 | */ |
605 | if (length != move_page_tables(vma, old_start, | 605 | if (length != move_page_tables(vma, old_start, |
606 | vma, new_start, length)) | 606 | vma, new_start, length, false)) |
607 | return -ENOMEM; | 607 | return -ENOMEM; |
608 | 608 | ||
609 | lru_add_drain(); | 609 | lru_add_drain(); |
diff --git a/include/linux/mm.h b/include/linux/mm.h index 0e6f9c9f2123..0d5f823ce3fc 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -1060,7 +1060,8 @@ vm_is_stack(struct task_struct *task, struct vm_area_struct *vma, int in_group); | |||
1060 | 1060 | ||
1061 | extern unsigned long move_page_tables(struct vm_area_struct *vma, | 1061 | extern unsigned long move_page_tables(struct vm_area_struct *vma, |
1062 | unsigned long old_addr, struct vm_area_struct *new_vma, | 1062 | unsigned long old_addr, struct vm_area_struct *new_vma, |
1063 | unsigned long new_addr, unsigned long len); | 1063 | unsigned long new_addr, unsigned long len, |
1064 | bool need_rmap_locks); | ||
1064 | extern unsigned long do_mremap(unsigned long addr, | 1065 | extern unsigned long do_mremap(unsigned long addr, |
1065 | unsigned long old_len, unsigned long new_len, | 1066 | unsigned long old_len, unsigned long new_len, |
1066 | unsigned long flags, unsigned long new_addr); | 1067 | unsigned long flags, unsigned long new_addr); |
@@ -1410,7 +1411,8 @@ extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *, | |||
1410 | struct rb_node **, struct rb_node *); | 1411 | struct rb_node **, struct rb_node *); |
1411 | extern void unlink_file_vma(struct vm_area_struct *); | 1412 | extern void unlink_file_vma(struct vm_area_struct *); |
1412 | extern struct vm_area_struct *copy_vma(struct vm_area_struct **, | 1413 | extern struct vm_area_struct *copy_vma(struct vm_area_struct **, |
1413 | unsigned long addr, unsigned long len, pgoff_t pgoff); | 1414 | unsigned long addr, unsigned long len, pgoff_t pgoff, |
1415 | bool *need_rmap_locks); | ||
1414 | extern void exit_mmap(struct mm_struct *); | 1416 | extern void exit_mmap(struct mm_struct *); |
1415 | 1417 | ||
1416 | extern int mm_take_all_locks(struct mm_struct *mm); | 1418 | extern int mm_take_all_locks(struct mm_struct *mm); |
@@ -2371,7 +2371,8 @@ int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma) | |||
2371 | * prior to moving page table entries, to effect an mremap move. | 2371 | * prior to moving page table entries, to effect an mremap move. |
2372 | */ | 2372 | */ |
2373 | struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, | 2373 | struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, |
2374 | unsigned long addr, unsigned long len, pgoff_t pgoff) | 2374 | unsigned long addr, unsigned long len, pgoff_t pgoff, |
2375 | bool *need_rmap_locks) | ||
2375 | { | 2376 | { |
2376 | struct vm_area_struct *vma = *vmap; | 2377 | struct vm_area_struct *vma = *vmap; |
2377 | unsigned long vma_start = vma->vm_start; | 2378 | unsigned long vma_start = vma->vm_start; |
@@ -2413,8 +2414,9 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, | |||
2413 | * linear if there are no pages mapped yet. | 2414 | * linear if there are no pages mapped yet. |
2414 | */ | 2415 | */ |
2415 | VM_BUG_ON(faulted_in_anon_vma); | 2416 | VM_BUG_ON(faulted_in_anon_vma); |
2416 | *vmap = new_vma; | 2417 | *vmap = vma = new_vma; |
2417 | } | 2418 | } |
2419 | *need_rmap_locks = (new_vma->vm_pgoff <= vma->vm_pgoff); | ||
2418 | } else { | 2420 | } else { |
2419 | new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); | 2421 | new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); |
2420 | if (new_vma) { | 2422 | if (new_vma) { |
@@ -2434,6 +2436,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, | |||
2434 | if (new_vma->vm_ops && new_vma->vm_ops->open) | 2436 | if (new_vma->vm_ops && new_vma->vm_ops->open) |
2435 | new_vma->vm_ops->open(new_vma); | 2437 | new_vma->vm_ops->open(new_vma); |
2436 | vma_link(mm, new_vma, prev, rb_link, rb_parent); | 2438 | vma_link(mm, new_vma, prev, rb_link, rb_parent); |
2439 | *need_rmap_locks = false; | ||
2437 | } | 2440 | } |
2438 | } | 2441 | } |
2439 | return new_vma; | 2442 | return new_vma; |
diff --git a/mm/mremap.c b/mm/mremap.c index 5588bb6e9295..3b639a4b26bd 100644 --- a/mm/mremap.c +++ b/mm/mremap.c | |||
@@ -71,26 +71,42 @@ static pmd_t *alloc_new_pmd(struct mm_struct *mm, struct vm_area_struct *vma, | |||
71 | static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, | 71 | static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, |
72 | unsigned long old_addr, unsigned long old_end, | 72 | unsigned long old_addr, unsigned long old_end, |
73 | struct vm_area_struct *new_vma, pmd_t *new_pmd, | 73 | struct vm_area_struct *new_vma, pmd_t *new_pmd, |
74 | unsigned long new_addr) | 74 | unsigned long new_addr, bool need_rmap_locks) |
75 | { | 75 | { |
76 | struct address_space *mapping = NULL; | 76 | struct address_space *mapping = NULL; |
77 | struct anon_vma *anon_vma = vma->anon_vma; | 77 | struct anon_vma *anon_vma = NULL; |
78 | struct mm_struct *mm = vma->vm_mm; | 78 | struct mm_struct *mm = vma->vm_mm; |
79 | pte_t *old_pte, *new_pte, pte; | 79 | pte_t *old_pte, *new_pte, pte; |
80 | spinlock_t *old_ptl, *new_ptl; | 80 | spinlock_t *old_ptl, *new_ptl; |
81 | 81 | ||
82 | if (vma->vm_file) { | 82 | /* |
83 | /* | 83 | * When need_rmap_locks is true, we take the i_mmap_mutex and anon_vma |
84 | * Subtle point from Rajesh Venkatasubramanian: before | 84 | * locks to ensure that rmap will always observe either the old or the |
85 | * moving file-based ptes, we must lock truncate_pagecache | 85 | * new ptes. This is the easiest way to avoid races with |
86 | * out, since it might clean the dst vma before the src vma, | 86 | * truncate_pagecache(), page migration, etc... |
87 | * and we propagate stale pages into the dst afterward. | 87 | * |
88 | */ | 88 | * When need_rmap_locks is false, we use other ways to avoid |
89 | mapping = vma->vm_file->f_mapping; | 89 | * such races: |
90 | mutex_lock(&mapping->i_mmap_mutex); | 90 | * |
91 | * - During exec() shift_arg_pages(), we use a specially tagged vma | ||
92 | * which rmap call sites look for using is_vma_temporary_stack(). | ||
93 | * | ||
94 | * - During mremap(), new_vma is often known to be placed after vma | ||
95 | * in rmap traversal order. This ensures rmap will always observe | ||
96 | * either the old pte, or the new pte, or both (the page table locks | ||
97 | * serialize access to individual ptes, but only rmap traversal | ||
98 | * order guarantees that we won't miss both the old and new ptes). | ||
99 | */ | ||
100 | if (need_rmap_locks) { | ||
101 | if (vma->vm_file) { | ||
102 | mapping = vma->vm_file->f_mapping; | ||
103 | mutex_lock(&mapping->i_mmap_mutex); | ||
104 | } | ||
105 | if (vma->anon_vma) { | ||
106 | anon_vma = vma->anon_vma; | ||
107 | anon_vma_lock(anon_vma); | ||
108 | } | ||
91 | } | 109 | } |
92 | if (anon_vma) | ||
93 | anon_vma_lock(anon_vma); | ||
94 | 110 | ||
95 | /* | 111 | /* |
96 | * We don't have to worry about the ordering of src and dst | 112 | * We don't have to worry about the ordering of src and dst |
@@ -127,7 +143,8 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, | |||
127 | 143 | ||
128 | unsigned long move_page_tables(struct vm_area_struct *vma, | 144 | unsigned long move_page_tables(struct vm_area_struct *vma, |
129 | unsigned long old_addr, struct vm_area_struct *new_vma, | 145 | unsigned long old_addr, struct vm_area_struct *new_vma, |
130 | unsigned long new_addr, unsigned long len) | 146 | unsigned long new_addr, unsigned long len, |
147 | bool need_rmap_locks) | ||
131 | { | 148 | { |
132 | unsigned long extent, next, old_end; | 149 | unsigned long extent, next, old_end; |
133 | pmd_t *old_pmd, *new_pmd; | 150 | pmd_t *old_pmd, *new_pmd; |
@@ -174,7 +191,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma, | |||
174 | if (extent > LATENCY_LIMIT) | 191 | if (extent > LATENCY_LIMIT) |
175 | extent = LATENCY_LIMIT; | 192 | extent = LATENCY_LIMIT; |
176 | move_ptes(vma, old_pmd, old_addr, old_addr + extent, | 193 | move_ptes(vma, old_pmd, old_addr, old_addr + extent, |
177 | new_vma, new_pmd, new_addr); | 194 | new_vma, new_pmd, new_addr, need_rmap_locks); |
178 | need_flush = true; | 195 | need_flush = true; |
179 | } | 196 | } |
180 | if (likely(need_flush)) | 197 | if (likely(need_flush)) |
@@ -198,6 +215,7 @@ static unsigned long move_vma(struct vm_area_struct *vma, | |||
198 | unsigned long hiwater_vm; | 215 | unsigned long hiwater_vm; |
199 | int split = 0; | 216 | int split = 0; |
200 | int err; | 217 | int err; |
218 | bool need_rmap_locks; | ||
201 | 219 | ||
202 | /* | 220 | /* |
203 | * We'd prefer to avoid failure later on in do_munmap: | 221 | * We'd prefer to avoid failure later on in do_munmap: |
@@ -219,18 +237,21 @@ static unsigned long move_vma(struct vm_area_struct *vma, | |||
219 | return err; | 237 | return err; |
220 | 238 | ||
221 | new_pgoff = vma->vm_pgoff + ((old_addr - vma->vm_start) >> PAGE_SHIFT); | 239 | new_pgoff = vma->vm_pgoff + ((old_addr - vma->vm_start) >> PAGE_SHIFT); |
222 | new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff); | 240 | new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff, |
241 | &need_rmap_locks); | ||
223 | if (!new_vma) | 242 | if (!new_vma) |
224 | return -ENOMEM; | 243 | return -ENOMEM; |
225 | 244 | ||
226 | moved_len = move_page_tables(vma, old_addr, new_vma, new_addr, old_len); | 245 | moved_len = move_page_tables(vma, old_addr, new_vma, new_addr, old_len, |
246 | need_rmap_locks); | ||
227 | if (moved_len < old_len) { | 247 | if (moved_len < old_len) { |
228 | /* | 248 | /* |
229 | * On error, move entries back from new area to old, | 249 | * On error, move entries back from new area to old, |
230 | * which will succeed since page tables still there, | 250 | * which will succeed since page tables still there, |
231 | * and then proceed to unmap new area instead of old. | 251 | * and then proceed to unmap new area instead of old. |
232 | */ | 252 | */ |
233 | move_page_tables(new_vma, new_addr, vma, old_addr, moved_len); | 253 | move_page_tables(new_vma, new_addr, vma, old_addr, moved_len, |
254 | true); | ||
234 | vma = new_vma; | 255 | vma = new_vma; |
235 | old_len = new_len; | 256 | old_len = new_len; |
236 | old_addr = new_addr; | 257 | old_addr = new_addr; |