diff options
-rw-r--r-- | include/linux/rmap.h | 1 | ||||
-rw-r--r-- | mm/mmap.c | 24 | ||||
-rw-r--r-- | mm/mremap.c | 9 | ||||
-rw-r--r-- | mm/rmap.c | 45 |
4 files changed, 76 insertions, 3 deletions
diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 2148b122779b..1afb9954bbf1 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h | |||
@@ -120,6 +120,7 @@ void anon_vma_init(void); /* create anon_vma_cachep */ | |||
120 | int anon_vma_prepare(struct vm_area_struct *); | 120 | int anon_vma_prepare(struct vm_area_struct *); |
121 | void unlink_anon_vmas(struct vm_area_struct *); | 121 | void unlink_anon_vmas(struct vm_area_struct *); |
122 | int anon_vma_clone(struct vm_area_struct *, struct vm_area_struct *); | 122 | int anon_vma_clone(struct vm_area_struct *, struct vm_area_struct *); |
123 | void anon_vma_moveto_tail(struct vm_area_struct *); | ||
123 | int anon_vma_fork(struct vm_area_struct *, struct vm_area_struct *); | 124 | int anon_vma_fork(struct vm_area_struct *, struct vm_area_struct *); |
124 | void __anon_vma_link(struct vm_area_struct *); | 125 | void __anon_vma_link(struct vm_area_struct *); |
125 | 126 | ||
@@ -2322,13 +2322,16 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, | |||
2322 | struct vm_area_struct *new_vma, *prev; | 2322 | struct vm_area_struct *new_vma, *prev; |
2323 | struct rb_node **rb_link, *rb_parent; | 2323 | struct rb_node **rb_link, *rb_parent; |
2324 | struct mempolicy *pol; | 2324 | struct mempolicy *pol; |
2325 | bool faulted_in_anon_vma = true; | ||
2325 | 2326 | ||
2326 | /* | 2327 | /* |
2327 | * If anonymous vma has not yet been faulted, update new pgoff | 2328 | * If anonymous vma has not yet been faulted, update new pgoff |
2328 | * to match new location, to increase its chance of merging. | 2329 | * to match new location, to increase its chance of merging. |
2329 | */ | 2330 | */ |
2330 | if (!vma->vm_file && !vma->anon_vma) | 2331 | if (unlikely(!vma->vm_file && !vma->anon_vma)) { |
2331 | pgoff = addr >> PAGE_SHIFT; | 2332 | pgoff = addr >> PAGE_SHIFT; |
2333 | faulted_in_anon_vma = false; | ||
2334 | } | ||
2332 | 2335 | ||
2333 | find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent); | 2336 | find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent); |
2334 | new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags, | 2337 | new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags, |
@@ -2337,9 +2340,24 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, | |||
2337 | /* | 2340 | /* |
2338 | * Source vma may have been merged into new_vma | 2341 | * Source vma may have been merged into new_vma |
2339 | */ | 2342 | */ |
2340 | if (vma_start >= new_vma->vm_start && | 2343 | if (unlikely(vma_start >= new_vma->vm_start && |
2341 | vma_start < new_vma->vm_end) | 2344 | vma_start < new_vma->vm_end)) { |
2345 | /* | ||
2346 | * The only way we can get a vma_merge with | ||
2347 | * self during an mremap is if the vma hasn't | ||
2348 | * been faulted in yet and we were allowed to | ||
2349 | * reset the dst vma->vm_pgoff to the | ||
2350 | * destination address of the mremap to allow | ||
2351 | * the merge to happen. mremap must change the | ||
2352 | * vm_pgoff linearity between src and dst vmas | ||
2353 | * (in turn preventing a vma_merge) to be | ||
2354 | * safe. It is only safe to keep the vm_pgoff | ||
2355 | * linear if there are no pages mapped yet. | ||
2356 | */ | ||
2357 | VM_BUG_ON(faulted_in_anon_vma); | ||
2342 | *vmap = new_vma; | 2358 | *vmap = new_vma; |
2359 | } else | ||
2360 | anon_vma_moveto_tail(new_vma); | ||
2343 | } else { | 2361 | } else { |
2344 | new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); | 2362 | new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); |
2345 | if (new_vma) { | 2363 | if (new_vma) { |
diff --git a/mm/mremap.c b/mm/mremap.c index d6959cb4df58..87bb8393e7d2 100644 --- a/mm/mremap.c +++ b/mm/mremap.c | |||
@@ -221,6 +221,15 @@ static unsigned long move_vma(struct vm_area_struct *vma, | |||
221 | moved_len = move_page_tables(vma, old_addr, new_vma, new_addr, old_len); | 221 | moved_len = move_page_tables(vma, old_addr, new_vma, new_addr, old_len); |
222 | if (moved_len < old_len) { | 222 | if (moved_len < old_len) { |
223 | /* | 223 | /* |
224 | * Before moving the page tables from the new vma to | ||
225 | * the old vma, we need to be sure the old vma is | ||
226 | * queued after new vma in the same_anon_vma list to | ||
227 | * prevent SMP races with rmap_walk (that could lead | ||
228 | * rmap_walk to miss some page table). | ||
229 | */ | ||
230 | anon_vma_moveto_tail(vma); | ||
231 | |||
232 | /* | ||
224 | * On error, move entries back from new area to old, | 233 | * On error, move entries back from new area to old, |
225 | * which will succeed since page tables still there, | 234 | * which will succeed since page tables still there, |
226 | * and then proceed to unmap new area instead of old. | 235 | * and then proceed to unmap new area instead of old. |
@@ -272,6 +272,51 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src) | |||
272 | } | 272 | } |
273 | 273 | ||
274 | /* | 274 | /* |
275 | * Some rmap walk that needs to find all ptes/hugepmds without false | ||
276 | * negatives (like migrate and split_huge_page) running concurrent | ||
277 | * with operations that copy or move pagetables (like mremap() and | ||
278 | * fork()) to be safe. They depend on the anon_vma "same_anon_vma" | ||
279 | * list to be in a certain order: the dst_vma must be placed after the | ||
280 | * src_vma in the list. This is always guaranteed by fork() but | ||
281 | * mremap() needs to call this function to enforce it in case the | ||
282 | * dst_vma isn't newly allocated and chained with the anon_vma_clone() | ||
283 | * function but just an extension of a pre-existing vma through | ||
284 | * vma_merge. | ||
285 | * | ||
286 | * NOTE: the same_anon_vma list can still be changed by other | ||
287 | * processes while mremap runs because mremap doesn't hold the | ||
288 | * anon_vma mutex to prevent modifications to the list while it | ||
289 | * runs. All we need to enforce is that the relative order of this | ||
290 | * process vmas isn't changing (we don't care about other vmas | ||
291 | * order). Each vma corresponds to an anon_vma_chain structure so | ||
292 | * there's no risk that other processes calling anon_vma_moveto_tail() | ||
293 | * and changing the same_anon_vma list under mremap() will screw with | ||
294 | * the relative order of this process vmas in the list, because we | ||
295 | * they can't alter the order of any vma that belongs to this | ||
296 | * process. And there can't be another anon_vma_moveto_tail() running | ||
297 | * concurrently with mremap() coming from this process because we hold | ||
298 | * the mmap_sem for the whole mremap(). fork() ordering dependency | ||
299 | * also shouldn't be affected because fork() only cares that the | ||
300 | * parent vmas are placed in the list before the child vmas and | ||
301 | * anon_vma_moveto_tail() won't reorder vmas from either the fork() | ||
302 | * parent or child. | ||
303 | */ | ||
304 | void anon_vma_moveto_tail(struct vm_area_struct *dst) | ||
305 | { | ||
306 | struct anon_vma_chain *pavc; | ||
307 | struct anon_vma *root = NULL; | ||
308 | |||
309 | list_for_each_entry_reverse(pavc, &dst->anon_vma_chain, same_vma) { | ||
310 | struct anon_vma *anon_vma = pavc->anon_vma; | ||
311 | VM_BUG_ON(pavc->vma != dst); | ||
312 | root = lock_anon_vma_root(root, anon_vma); | ||
313 | list_del(&pavc->same_anon_vma); | ||
314 | list_add_tail(&pavc->same_anon_vma, &anon_vma->head); | ||
315 | } | ||
316 | unlock_anon_vma_root(root); | ||
317 | } | ||
318 | |||
319 | /* | ||
275 | * Attach vma to its own anon_vma, as well as to the anon_vmas that | 320 | * Attach vma to its own anon_vma, as well as to the anon_vmas that |
276 | * the corresponding VMA in the parent process is attached to. | 321 | * the corresponding VMA in the parent process is attached to. |
277 | * Returns 0 on success, non-zero on failure. | 322 | * Returns 0 on success, non-zero on failure. |