aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/exec.c2
-rw-r--r--include/linux/mm.h6
-rw-r--r--mm/mmap.c7
-rw-r--r--mm/mremap.c57
4 files changed, 49 insertions, 23 deletions
diff --git a/fs/exec.c b/fs/exec.c
index 19f4fb80cd17..4f2bebc276c5 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -603,7 +603,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
603 * process cleanup to remove whatever mess we made. 603 * process cleanup to remove whatever mess we made.
604 */ 604 */
605 if (length != move_page_tables(vma, old_start, 605 if (length != move_page_tables(vma, old_start,
606 vma, new_start, length)) 606 vma, new_start, length, false))
607 return -ENOMEM; 607 return -ENOMEM;
608 608
609 lru_add_drain(); 609 lru_add_drain();
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0e6f9c9f2123..0d5f823ce3fc 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1060,7 +1060,8 @@ vm_is_stack(struct task_struct *task, struct vm_area_struct *vma, int in_group);
1060 1060
1061extern unsigned long move_page_tables(struct vm_area_struct *vma, 1061extern unsigned long move_page_tables(struct vm_area_struct *vma,
1062 unsigned long old_addr, struct vm_area_struct *new_vma, 1062 unsigned long old_addr, struct vm_area_struct *new_vma,
1063 unsigned long new_addr, unsigned long len); 1063 unsigned long new_addr, unsigned long len,
1064 bool need_rmap_locks);
1064extern unsigned long do_mremap(unsigned long addr, 1065extern unsigned long do_mremap(unsigned long addr,
1065 unsigned long old_len, unsigned long new_len, 1066 unsigned long old_len, unsigned long new_len,
1066 unsigned long flags, unsigned long new_addr); 1067 unsigned long flags, unsigned long new_addr);
@@ -1410,7 +1411,8 @@ extern void __vma_link_rb(struct mm_struct *, struct vm_area_struct *,
1410 struct rb_node **, struct rb_node *); 1411 struct rb_node **, struct rb_node *);
1411extern void unlink_file_vma(struct vm_area_struct *); 1412extern void unlink_file_vma(struct vm_area_struct *);
1412extern struct vm_area_struct *copy_vma(struct vm_area_struct **, 1413extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
1413 unsigned long addr, unsigned long len, pgoff_t pgoff); 1414 unsigned long addr, unsigned long len, pgoff_t pgoff,
1415 bool *need_rmap_locks);
1414extern void exit_mmap(struct mm_struct *); 1416extern void exit_mmap(struct mm_struct *);
1415 1417
1416extern int mm_take_all_locks(struct mm_struct *mm); 1418extern int mm_take_all_locks(struct mm_struct *mm);
diff --git a/mm/mmap.c b/mm/mmap.c
index 81248992120d..2d942353d681 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2371,7 +2371,8 @@ int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
2371 * prior to moving page table entries, to effect an mremap move. 2371 * prior to moving page table entries, to effect an mremap move.
2372 */ 2372 */
2373struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, 2373struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
2374 unsigned long addr, unsigned long len, pgoff_t pgoff) 2374 unsigned long addr, unsigned long len, pgoff_t pgoff,
2375 bool *need_rmap_locks)
2375{ 2376{
2376 struct vm_area_struct *vma = *vmap; 2377 struct vm_area_struct *vma = *vmap;
2377 unsigned long vma_start = vma->vm_start; 2378 unsigned long vma_start = vma->vm_start;
@@ -2413,8 +2414,9 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
2413 * linear if there are no pages mapped yet. 2414 * linear if there are no pages mapped yet.
2414 */ 2415 */
2415 VM_BUG_ON(faulted_in_anon_vma); 2416 VM_BUG_ON(faulted_in_anon_vma);
2416 *vmap = new_vma; 2417 *vmap = vma = new_vma;
2417 } 2418 }
2419 *need_rmap_locks = (new_vma->vm_pgoff <= vma->vm_pgoff);
2418 } else { 2420 } else {
2419 new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); 2421 new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
2420 if (new_vma) { 2422 if (new_vma) {
@@ -2434,6 +2436,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
2434 if (new_vma->vm_ops && new_vma->vm_ops->open) 2436 if (new_vma->vm_ops && new_vma->vm_ops->open)
2435 new_vma->vm_ops->open(new_vma); 2437 new_vma->vm_ops->open(new_vma);
2436 vma_link(mm, new_vma, prev, rb_link, rb_parent); 2438 vma_link(mm, new_vma, prev, rb_link, rb_parent);
2439 *need_rmap_locks = false;
2437 } 2440 }
2438 } 2441 }
2439 return new_vma; 2442 return new_vma;
diff --git a/mm/mremap.c b/mm/mremap.c
index 5588bb6e9295..3b639a4b26bd 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -71,26 +71,42 @@ static pmd_t *alloc_new_pmd(struct mm_struct *mm, struct vm_area_struct *vma,
71static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, 71static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
72 unsigned long old_addr, unsigned long old_end, 72 unsigned long old_addr, unsigned long old_end,
73 struct vm_area_struct *new_vma, pmd_t *new_pmd, 73 struct vm_area_struct *new_vma, pmd_t *new_pmd,
74 unsigned long new_addr) 74 unsigned long new_addr, bool need_rmap_locks)
75{ 75{
76 struct address_space *mapping = NULL; 76 struct address_space *mapping = NULL;
77 struct anon_vma *anon_vma = vma->anon_vma; 77 struct anon_vma *anon_vma = NULL;
78 struct mm_struct *mm = vma->vm_mm; 78 struct mm_struct *mm = vma->vm_mm;
79 pte_t *old_pte, *new_pte, pte; 79 pte_t *old_pte, *new_pte, pte;
80 spinlock_t *old_ptl, *new_ptl; 80 spinlock_t *old_ptl, *new_ptl;
81 81
82 if (vma->vm_file) { 82 /*
83 /* 83 * When need_rmap_locks is true, we take the i_mmap_mutex and anon_vma
84 * Subtle point from Rajesh Venkatasubramanian: before 84 * locks to ensure that rmap will always observe either the old or the
85 * moving file-based ptes, we must lock truncate_pagecache 85 * new ptes. This is the easiest way to avoid races with
86 * out, since it might clean the dst vma before the src vma, 86 * truncate_pagecache(), page migration, etc...
87 * and we propagate stale pages into the dst afterward. 87 *
88 */ 88 * When need_rmap_locks is false, we use other ways to avoid
89 mapping = vma->vm_file->f_mapping; 89 * such races:
90 mutex_lock(&mapping->i_mmap_mutex); 90 *
91 * - During exec() shift_arg_pages(), we use a specially tagged vma
92 * which rmap call sites look for using is_vma_temporary_stack().
93 *
94 * - During mremap(), new_vma is often known to be placed after vma
95 * in rmap traversal order. This ensures rmap will always observe
96 * either the old pte, or the new pte, or both (the page table locks
97 * serialize access to individual ptes, but only rmap traversal
98 * order guarantees that we won't miss both the old and new ptes).
99 */
100 if (need_rmap_locks) {
101 if (vma->vm_file) {
102 mapping = vma->vm_file->f_mapping;
103 mutex_lock(&mapping->i_mmap_mutex);
104 }
105 if (vma->anon_vma) {
106 anon_vma = vma->anon_vma;
107 anon_vma_lock(anon_vma);
108 }
91 } 109 }
92 if (anon_vma)
93 anon_vma_lock(anon_vma);
94 110
95 /* 111 /*
96 * We don't have to worry about the ordering of src and dst 112 * We don't have to worry about the ordering of src and dst
@@ -127,7 +143,8 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
127 143
128unsigned long move_page_tables(struct vm_area_struct *vma, 144unsigned long move_page_tables(struct vm_area_struct *vma,
129 unsigned long old_addr, struct vm_area_struct *new_vma, 145 unsigned long old_addr, struct vm_area_struct *new_vma,
130 unsigned long new_addr, unsigned long len) 146 unsigned long new_addr, unsigned long len,
147 bool need_rmap_locks)
131{ 148{
132 unsigned long extent, next, old_end; 149 unsigned long extent, next, old_end;
133 pmd_t *old_pmd, *new_pmd; 150 pmd_t *old_pmd, *new_pmd;
@@ -174,7 +191,7 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
174 if (extent > LATENCY_LIMIT) 191 if (extent > LATENCY_LIMIT)
175 extent = LATENCY_LIMIT; 192 extent = LATENCY_LIMIT;
176 move_ptes(vma, old_pmd, old_addr, old_addr + extent, 193 move_ptes(vma, old_pmd, old_addr, old_addr + extent,
177 new_vma, new_pmd, new_addr); 194 new_vma, new_pmd, new_addr, need_rmap_locks);
178 need_flush = true; 195 need_flush = true;
179 } 196 }
180 if (likely(need_flush)) 197 if (likely(need_flush))
@@ -198,6 +215,7 @@ static unsigned long move_vma(struct vm_area_struct *vma,
198 unsigned long hiwater_vm; 215 unsigned long hiwater_vm;
199 int split = 0; 216 int split = 0;
200 int err; 217 int err;
218 bool need_rmap_locks;
201 219
202 /* 220 /*
203 * We'd prefer to avoid failure later on in do_munmap: 221 * We'd prefer to avoid failure later on in do_munmap:
@@ -219,18 +237,21 @@ static unsigned long move_vma(struct vm_area_struct *vma,
219 return err; 237 return err;
220 238
221 new_pgoff = vma->vm_pgoff + ((old_addr - vma->vm_start) >> PAGE_SHIFT); 239 new_pgoff = vma->vm_pgoff + ((old_addr - vma->vm_start) >> PAGE_SHIFT);
222 new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff); 240 new_vma = copy_vma(&vma, new_addr, new_len, new_pgoff,
241 &need_rmap_locks);
223 if (!new_vma) 242 if (!new_vma)
224 return -ENOMEM; 243 return -ENOMEM;
225 244
226 moved_len = move_page_tables(vma, old_addr, new_vma, new_addr, old_len); 245 moved_len = move_page_tables(vma, old_addr, new_vma, new_addr, old_len,
246 need_rmap_locks);
227 if (moved_len < old_len) { 247 if (moved_len < old_len) {
228 /* 248 /*
229 * On error, move entries back from new area to old, 249 * On error, move entries back from new area to old,
230 * which will succeed since page tables still there, 250 * which will succeed since page tables still there,
231 * and then proceed to unmap new area instead of old. 251 * and then proceed to unmap new area instead of old.
232 */ 252 */
233 move_page_tables(new_vma, new_addr, vma, old_addr, moved_len); 253 move_page_tables(new_vma, new_addr, vma, old_addr, moved_len,
254 true);
234 vma = new_vma; 255 vma = new_vma;
235 old_len = new_len; 256 old_len = new_len;
236 old_addr = new_addr; 257 old_addr = new_addr;