diff options
-rw-r--r-- | include/linux/rmap.h | 1 | ||||
-rw-r--r-- | mm/mmap.c | 3 | ||||
-rw-r--r-- | mm/mremap.c | 14 | ||||
-rw-r--r-- | mm/rmap.c | 45 |
4 files changed, 6 insertions, 57 deletions
diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 3fce545df394..7f32cec57e67 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h | |||
@@ -120,7 +120,6 @@ void anon_vma_init(void); /* create anon_vma_cachep */ | |||
120 | int anon_vma_prepare(struct vm_area_struct *); | 120 | int anon_vma_prepare(struct vm_area_struct *); |
121 | void unlink_anon_vmas(struct vm_area_struct *); | 121 | void unlink_anon_vmas(struct vm_area_struct *); |
122 | int anon_vma_clone(struct vm_area_struct *, struct vm_area_struct *); | 122 | int anon_vma_clone(struct vm_area_struct *, struct vm_area_struct *); |
123 | void anon_vma_moveto_tail(struct vm_area_struct *); | ||
124 | int anon_vma_fork(struct vm_area_struct *, struct vm_area_struct *); | 123 | int anon_vma_fork(struct vm_area_struct *, struct vm_area_struct *); |
125 | 124 | ||
126 | static inline void anon_vma_merge(struct vm_area_struct *vma, | 125 | static inline void anon_vma_merge(struct vm_area_struct *vma, |
@@ -2378,8 +2378,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, | |||
2378 | */ | 2378 | */ |
2379 | VM_BUG_ON(faulted_in_anon_vma); | 2379 | VM_BUG_ON(faulted_in_anon_vma); |
2380 | *vmap = new_vma; | 2380 | *vmap = new_vma; |
2381 | } else | 2381 | } |
2382 | anon_vma_moveto_tail(new_vma); | ||
2383 | } else { | 2382 | } else { |
2384 | new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); | 2383 | new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); |
2385 | if (new_vma) { | 2384 | if (new_vma) { |
diff --git a/mm/mremap.c b/mm/mremap.c index cc06d0e48d05..5588bb6e9295 100644 --- a/mm/mremap.c +++ b/mm/mremap.c | |||
@@ -74,6 +74,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, | |||
74 | unsigned long new_addr) | 74 | unsigned long new_addr) |
75 | { | 75 | { |
76 | struct address_space *mapping = NULL; | 76 | struct address_space *mapping = NULL; |
77 | struct anon_vma *anon_vma = vma->anon_vma; | ||
77 | struct mm_struct *mm = vma->vm_mm; | 78 | struct mm_struct *mm = vma->vm_mm; |
78 | pte_t *old_pte, *new_pte, pte; | 79 | pte_t *old_pte, *new_pte, pte; |
79 | spinlock_t *old_ptl, *new_ptl; | 80 | spinlock_t *old_ptl, *new_ptl; |
@@ -88,6 +89,8 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, | |||
88 | mapping = vma->vm_file->f_mapping; | 89 | mapping = vma->vm_file->f_mapping; |
89 | mutex_lock(&mapping->i_mmap_mutex); | 90 | mutex_lock(&mapping->i_mmap_mutex); |
90 | } | 91 | } |
92 | if (anon_vma) | ||
93 | anon_vma_lock(anon_vma); | ||
91 | 94 | ||
92 | /* | 95 | /* |
93 | * We don't have to worry about the ordering of src and dst | 96 | * We don't have to worry about the ordering of src and dst |
@@ -114,6 +117,8 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd, | |||
114 | spin_unlock(new_ptl); | 117 | spin_unlock(new_ptl); |
115 | pte_unmap(new_pte - 1); | 118 | pte_unmap(new_pte - 1); |
116 | pte_unmap_unlock(old_pte - 1, old_ptl); | 119 | pte_unmap_unlock(old_pte - 1, old_ptl); |
120 | if (anon_vma) | ||
121 | anon_vma_unlock(anon_vma); | ||
117 | if (mapping) | 122 | if (mapping) |
118 | mutex_unlock(&mapping->i_mmap_mutex); | 123 | mutex_unlock(&mapping->i_mmap_mutex); |
119 | } | 124 | } |
@@ -221,15 +226,6 @@ static unsigned long move_vma(struct vm_area_struct *vma, | |||
221 | moved_len = move_page_tables(vma, old_addr, new_vma, new_addr, old_len); | 226 | moved_len = move_page_tables(vma, old_addr, new_vma, new_addr, old_len); |
222 | if (moved_len < old_len) { | 227 | if (moved_len < old_len) { |
223 | /* | 228 | /* |
224 | * Before moving the page tables from the new vma to | ||
225 | * the old vma, we need to be sure the old vma is | ||
226 | * queued after new vma in the same_anon_vma list to | ||
227 | * prevent SMP races with rmap_walk (that could lead | ||
228 | * rmap_walk to miss some page table). | ||
229 | */ | ||
230 | anon_vma_moveto_tail(vma); | ||
231 | |||
232 | /* | ||
233 | * On error, move entries back from new area to old, | 229 | * On error, move entries back from new area to old, |
234 | * which will succeed since page tables still there, | 230 | * which will succeed since page tables still there, |
235 | * and then proceed to unmap new area instead of old. | 231 | * and then proceed to unmap new area instead of old. |
@@ -269,51 +269,6 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src) | |||
269 | } | 269 | } |
270 | 270 | ||
271 | /* | 271 | /* |
272 | * Some rmap walk that needs to find all ptes/hugepmds without false | ||
273 | * negatives (like migrate and split_huge_page) running concurrent | ||
274 | * with operations that copy or move pagetables (like mremap() and | ||
275 | * fork()) to be safe. They depend on the anon_vma "same_anon_vma" | ||
276 | * list to be in a certain order: the dst_vma must be placed after the | ||
277 | * src_vma in the list. This is always guaranteed by fork() but | ||
278 | * mremap() needs to call this function to enforce it in case the | ||
279 | * dst_vma isn't newly allocated and chained with the anon_vma_clone() | ||
280 | * function but just an extension of a pre-existing vma through | ||
281 | * vma_merge. | ||
282 | * | ||
283 | * NOTE: the same_anon_vma list can still be changed by other | ||
284 | * processes while mremap runs because mremap doesn't hold the | ||
285 | * anon_vma mutex to prevent modifications to the list while it | ||
286 | * runs. All we need to enforce is that the relative order of this | ||
287 | * process vmas isn't changing (we don't care about other vmas | ||
288 | * order). Each vma corresponds to an anon_vma_chain structure so | ||
289 | * there's no risk that other processes calling anon_vma_moveto_tail() | ||
290 | * and changing the same_anon_vma list under mremap() will screw with | ||
291 | * the relative order of this process vmas in the list, because we | ||
292 | * they can't alter the order of any vma that belongs to this | ||
293 | * process. And there can't be another anon_vma_moveto_tail() running | ||
294 | * concurrently with mremap() coming from this process because we hold | ||
295 | * the mmap_sem for the whole mremap(). fork() ordering dependency | ||
296 | * also shouldn't be affected because fork() only cares that the | ||
297 | * parent vmas are placed in the list before the child vmas and | ||
298 | * anon_vma_moveto_tail() won't reorder vmas from either the fork() | ||
299 | * parent or child. | ||
300 | */ | ||
301 | void anon_vma_moveto_tail(struct vm_area_struct *dst) | ||
302 | { | ||
303 | struct anon_vma_chain *pavc; | ||
304 | struct anon_vma *root = NULL; | ||
305 | |||
306 | list_for_each_entry_reverse(pavc, &dst->anon_vma_chain, same_vma) { | ||
307 | struct anon_vma *anon_vma = pavc->anon_vma; | ||
308 | VM_BUG_ON(pavc->vma != dst); | ||
309 | root = lock_anon_vma_root(root, anon_vma); | ||
310 | list_del(&pavc->same_anon_vma); | ||
311 | list_add_tail(&pavc->same_anon_vma, &anon_vma->head); | ||
312 | } | ||
313 | unlock_anon_vma_root(root); | ||
314 | } | ||
315 | |||
316 | /* | ||
317 | * Attach vma to its own anon_vma, as well as to the anon_vmas that | 272 | * Attach vma to its own anon_vma, as well as to the anon_vmas that |
318 | * the corresponding VMA in the parent process is attached to. | 273 | * the corresponding VMA in the parent process is attached to. |
319 | * Returns 0 on success, non-zero on failure. | 274 | * Returns 0 on success, non-zero on failure. |