aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/rmap.h1
-rw-r--r--mm/mmap.c24
-rw-r--r--mm/mremap.c9
-rw-r--r--mm/rmap.c45
4 files changed, 76 insertions, 3 deletions
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 2148b122779b..1afb9954bbf1 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -120,6 +120,7 @@ void anon_vma_init(void); /* create anon_vma_cachep */
120int anon_vma_prepare(struct vm_area_struct *); 120int anon_vma_prepare(struct vm_area_struct *);
121void unlink_anon_vmas(struct vm_area_struct *); 121void unlink_anon_vmas(struct vm_area_struct *);
122int anon_vma_clone(struct vm_area_struct *, struct vm_area_struct *); 122int anon_vma_clone(struct vm_area_struct *, struct vm_area_struct *);
123void anon_vma_moveto_tail(struct vm_area_struct *);
123int anon_vma_fork(struct vm_area_struct *, struct vm_area_struct *); 124int anon_vma_fork(struct vm_area_struct *, struct vm_area_struct *);
124void __anon_vma_link(struct vm_area_struct *); 125void __anon_vma_link(struct vm_area_struct *);
125 126
diff --git a/mm/mmap.c b/mm/mmap.c
index eae90af60ea6..adea3b8880e3 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2322,13 +2322,16 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
2322 struct vm_area_struct *new_vma, *prev; 2322 struct vm_area_struct *new_vma, *prev;
2323 struct rb_node **rb_link, *rb_parent; 2323 struct rb_node **rb_link, *rb_parent;
2324 struct mempolicy *pol; 2324 struct mempolicy *pol;
2325 bool faulted_in_anon_vma = true;
2325 2326
2326 /* 2327 /*
2327 * If anonymous vma has not yet been faulted, update new pgoff 2328 * If anonymous vma has not yet been faulted, update new pgoff
2328 * to match new location, to increase its chance of merging. 2329 * to match new location, to increase its chance of merging.
2329 */ 2330 */
2330 if (!vma->vm_file && !vma->anon_vma) 2331 if (unlikely(!vma->vm_file && !vma->anon_vma)) {
2331 pgoff = addr >> PAGE_SHIFT; 2332 pgoff = addr >> PAGE_SHIFT;
2333 faulted_in_anon_vma = false;
2334 }
2332 2335
2333 find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent); 2336 find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
2334 new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags, 2337 new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
@@ -2337,9 +2340,24 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
2337 /* 2340 /*
2338 * Source vma may have been merged into new_vma 2341 * Source vma may have been merged into new_vma
2339 */ 2342 */
2340 if (vma_start >= new_vma->vm_start && 2343 if (unlikely(vma_start >= new_vma->vm_start &&
2341 vma_start < new_vma->vm_end) 2344 vma_start < new_vma->vm_end)) {
2345 /*
2346 * The only way we can get a vma_merge with
2347 * self during an mremap is if the vma hasn't
2348 * been faulted in yet and we were allowed to
2349 * reset the dst vma->vm_pgoff to the
2350 * destination address of the mremap to allow
2351 * the merge to happen. mremap must change the
2352 * vm_pgoff linearity between src and dst vmas
2353 * (in turn preventing a vma_merge) to be
2354 * safe. It is only safe to keep the vm_pgoff
2355 * linear if there are no pages mapped yet.
2356 */
2357 VM_BUG_ON(faulted_in_anon_vma);
2342 *vmap = new_vma; 2358 *vmap = new_vma;
2359 } else
2360 anon_vma_moveto_tail(new_vma);
2343 } else { 2361 } else {
2344 new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); 2362 new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
2345 if (new_vma) { 2363 if (new_vma) {
diff --git a/mm/mremap.c b/mm/mremap.c
index d6959cb4df58..87bb8393e7d2 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -221,6 +221,15 @@ static unsigned long move_vma(struct vm_area_struct *vma,
221 moved_len = move_page_tables(vma, old_addr, new_vma, new_addr, old_len); 221 moved_len = move_page_tables(vma, old_addr, new_vma, new_addr, old_len);
222 if (moved_len < old_len) { 222 if (moved_len < old_len) {
223 /* 223 /*
224 * Before moving the page tables from the new vma to
225 * the old vma, we need to be sure the old vma is
226 * queued after new vma in the same_anon_vma list to
227 * prevent SMP races with rmap_walk (that could lead
228 * rmap_walk to miss some page table).
229 */
230 anon_vma_moveto_tail(vma);
231
232 /*
224 * On error, move entries back from new area to old, 233 * On error, move entries back from new area to old,
225 * which will succeed since page tables still there, 234 * which will succeed since page tables still there,
226 * and then proceed to unmap new area instead of old. 235 * and then proceed to unmap new area instead of old.
diff --git a/mm/rmap.c b/mm/rmap.c
index a4fd3680038b..a2e5ce1fa081 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -272,6 +272,51 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
272} 272}
273 273
274/* 274/*
275 * Some rmap walk that needs to find all ptes/hugepmds without false
276 * negatives (like migrate and split_huge_page) running concurrent
277 * with operations that copy or move pagetables (like mremap() and
278 * fork()) to be safe. They depend on the anon_vma "same_anon_vma"
279 * list to be in a certain order: the dst_vma must be placed after the
280 * src_vma in the list. This is always guaranteed by fork() but
281 * mremap() needs to call this function to enforce it in case the
282 * dst_vma isn't newly allocated and chained with the anon_vma_clone()
283 * function but just an extension of a pre-existing vma through
284 * vma_merge.
285 *
286 * NOTE: the same_anon_vma list can still be changed by other
287 * processes while mremap runs because mremap doesn't hold the
288 * anon_vma mutex to prevent modifications to the list while it
289 * runs. All we need to enforce is that the relative order of this
290 * process vmas isn't changing (we don't care about other vmas
291 * order). Each vma corresponds to an anon_vma_chain structure so
292 * there's no risk that other processes calling anon_vma_moveto_tail()
293 * and changing the same_anon_vma list under mremap() will screw with
294 * the relative order of this process vmas in the list, because we
295 * they can't alter the order of any vma that belongs to this
296 * process. And there can't be another anon_vma_moveto_tail() running
297 * concurrently with mremap() coming from this process because we hold
298 * the mmap_sem for the whole mremap(). fork() ordering dependency
299 * also shouldn't be affected because fork() only cares that the
300 * parent vmas are placed in the list before the child vmas and
301 * anon_vma_moveto_tail() won't reorder vmas from either the fork()
302 * parent or child.
303 */
304void anon_vma_moveto_tail(struct vm_area_struct *dst)
305{
306 struct anon_vma_chain *pavc;
307 struct anon_vma *root = NULL;
308
309 list_for_each_entry_reverse(pavc, &dst->anon_vma_chain, same_vma) {
310 struct anon_vma *anon_vma = pavc->anon_vma;
311 VM_BUG_ON(pavc->vma != dst);
312 root = lock_anon_vma_root(root, anon_vma);
313 list_del(&pavc->same_anon_vma);
314 list_add_tail(&pavc->same_anon_vma, &anon_vma->head);
315 }
316 unlock_anon_vma_root(root);
317}
318
319/*
275 * Attach vma to its own anon_vma, as well as to the anon_vmas that 320 * Attach vma to its own anon_vma, as well as to the anon_vmas that
276 * the corresponding VMA in the parent process is attached to. 321 * the corresponding VMA in the parent process is attached to.
277 * Returns 0 on success, non-zero on failure. 322 * Returns 0 on success, non-zero on failure.