diff options
-rw-r--r-- | arch/ia64/kernel/perfmon.c | 1 | ||||
-rw-r--r-- | arch/ia64/mm/init.c | 2 | ||||
-rw-r--r-- | fs/exec.c | 6 | ||||
-rw-r--r-- | include/linux/mm.h | 6 | ||||
-rw-r--r-- | include/linux/mm_types.h | 3 | ||||
-rw-r--r-- | include/linux/rmap.h | 35 | ||||
-rw-r--r-- | kernel/fork.c | 6 | ||||
-rw-r--r-- | mm/ksm.c | 12 | ||||
-rw-r--r-- | mm/memory-failure.c | 5 | ||||
-rw-r--r-- | mm/memory.c | 4 | ||||
-rw-r--r-- | mm/mmap.c | 138 | ||||
-rw-r--r-- | mm/mremap.c | 7 | ||||
-rw-r--r-- | mm/nommu.c | 2 | ||||
-rw-r--r-- | mm/rmap.c | 156 |
14 files changed, 298 insertions, 85 deletions
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index b81e46b1629b..703062c44fb9 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c | |||
@@ -2315,6 +2315,7 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t | |||
2315 | DPRINT(("Cannot allocate vma\n")); | 2315 | DPRINT(("Cannot allocate vma\n")); |
2316 | goto error_kmem; | 2316 | goto error_kmem; |
2317 | } | 2317 | } |
2318 | INIT_LIST_HEAD(&vma->anon_vma_chain); | ||
2318 | 2319 | ||
2319 | /* | 2320 | /* |
2320 | * partially initialize the vma for the sampling buffer | 2321 | * partially initialize the vma for the sampling buffer |
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index ca3335ea56cc..ed41759efcac 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c | |||
@@ -117,6 +117,7 @@ ia64_init_addr_space (void) | |||
117 | */ | 117 | */ |
118 | vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); | 118 | vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); |
119 | if (vma) { | 119 | if (vma) { |
120 | INIT_LIST_HEAD(&vma->anon_vma_chain); | ||
120 | vma->vm_mm = current->mm; | 121 | vma->vm_mm = current->mm; |
121 | vma->vm_start = current->thread.rbs_bot & PAGE_MASK; | 122 | vma->vm_start = current->thread.rbs_bot & PAGE_MASK; |
122 | vma->vm_end = vma->vm_start + PAGE_SIZE; | 123 | vma->vm_end = vma->vm_start + PAGE_SIZE; |
@@ -135,6 +136,7 @@ ia64_init_addr_space (void) | |||
135 | if (!(current->personality & MMAP_PAGE_ZERO)) { | 136 | if (!(current->personality & MMAP_PAGE_ZERO)) { |
136 | vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); | 137 | vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); |
137 | if (vma) { | 138 | if (vma) { |
139 | INIT_LIST_HEAD(&vma->anon_vma_chain); | ||
138 | vma->vm_mm = current->mm; | 140 | vma->vm_mm = current->mm; |
139 | vma->vm_end = PAGE_SIZE; | 141 | vma->vm_end = PAGE_SIZE; |
140 | vma->vm_page_prot = __pgprot(pgprot_val(PAGE_READONLY) | _PAGE_MA_NAT); | 142 | vma->vm_page_prot = __pgprot(pgprot_val(PAGE_READONLY) | _PAGE_MA_NAT); |
@@ -246,6 +246,7 @@ static int __bprm_mm_init(struct linux_binprm *bprm) | |||
246 | vma->vm_start = vma->vm_end - PAGE_SIZE; | 246 | vma->vm_start = vma->vm_end - PAGE_SIZE; |
247 | vma->vm_flags = VM_STACK_FLAGS; | 247 | vma->vm_flags = VM_STACK_FLAGS; |
248 | vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); | 248 | vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); |
249 | INIT_LIST_HEAD(&vma->anon_vma_chain); | ||
249 | err = insert_vm_struct(mm, vma); | 250 | err = insert_vm_struct(mm, vma); |
250 | if (err) | 251 | if (err) |
251 | goto err; | 252 | goto err; |
@@ -516,7 +517,8 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) | |||
516 | /* | 517 | /* |
517 | * cover the whole range: [new_start, old_end) | 518 | * cover the whole range: [new_start, old_end) |
518 | */ | 519 | */ |
519 | vma_adjust(vma, new_start, old_end, vma->vm_pgoff, NULL); | 520 | if (vma_adjust(vma, new_start, old_end, vma->vm_pgoff, NULL)) |
521 | return -ENOMEM; | ||
520 | 522 | ||
521 | /* | 523 | /* |
522 | * move the page tables downwards, on failure we rely on | 524 | * move the page tables downwards, on failure we rely on |
@@ -547,7 +549,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift) | |||
547 | tlb_finish_mmu(tlb, new_end, old_end); | 549 | tlb_finish_mmu(tlb, new_end, old_end); |
548 | 550 | ||
549 | /* | 551 | /* |
550 | * shrink the vma to just the new range. | 552 | * Shrink the vma to just the new range. Always succeeds. |
551 | */ | 553 | */ |
552 | vma_adjust(vma, new_start, new_end, vma->vm_pgoff, NULL); | 554 | vma_adjust(vma, new_start, new_end, vma->vm_pgoff, NULL); |
553 | 555 | ||
diff --git a/include/linux/mm.h b/include/linux/mm.h index 8e580c07d171..8e2841a2f441 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h | |||
@@ -97,7 +97,11 @@ extern unsigned int kobjsize(const void *objp); | |||
97 | #define VM_NORESERVE 0x00200000 /* should the VM suppress accounting */ | 97 | #define VM_NORESERVE 0x00200000 /* should the VM suppress accounting */ |
98 | #define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */ | 98 | #define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */ |
99 | #define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */ | 99 | #define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */ |
100 | #ifdef CONFIG_MMU | ||
101 | #define VM_LOCK_RMAP 0x01000000 /* Do not follow this rmap (mmu mmap) */ | ||
102 | #else | ||
100 | #define VM_MAPPED_COPY 0x01000000 /* T if mapped copy of data (nommu mmap) */ | 103 | #define VM_MAPPED_COPY 0x01000000 /* T if mapped copy of data (nommu mmap) */ |
104 | #endif | ||
101 | #define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it */ | 105 | #define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it */ |
102 | #define VM_ALWAYSDUMP 0x04000000 /* Always include in core dumps */ | 106 | #define VM_ALWAYSDUMP 0x04000000 /* Always include in core dumps */ |
103 | 107 | ||
@@ -1216,7 +1220,7 @@ static inline void vma_nonlinear_insert(struct vm_area_struct *vma, | |||
1216 | 1220 | ||
1217 | /* mmap.c */ | 1221 | /* mmap.c */ |
1218 | extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin); | 1222 | extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin); |
1219 | extern void vma_adjust(struct vm_area_struct *vma, unsigned long start, | 1223 | extern int vma_adjust(struct vm_area_struct *vma, unsigned long start, |
1220 | unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert); | 1224 | unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert); |
1221 | extern struct vm_area_struct *vma_merge(struct mm_struct *, | 1225 | extern struct vm_area_struct *vma_merge(struct mm_struct *, |
1222 | struct vm_area_struct *prev, unsigned long addr, unsigned long end, | 1226 | struct vm_area_struct *prev, unsigned long addr, unsigned long end, |
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 19549d7275ab..048b46270aa5 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h | |||
@@ -163,7 +163,8 @@ struct vm_area_struct { | |||
163 | * can only be in the i_mmap tree. An anonymous MAP_PRIVATE, stack | 163 | * can only be in the i_mmap tree. An anonymous MAP_PRIVATE, stack |
164 | * or brk vma (with NULL file) can only be in an anon_vma list. | 164 | * or brk vma (with NULL file) can only be in an anon_vma list. |
165 | */ | 165 | */ |
166 | struct list_head anon_vma_node; /* Serialized by anon_vma->lock */ | 166 | struct list_head anon_vma_chain; /* Serialized by mmap_sem & |
167 | * page_table_lock */ | ||
167 | struct anon_vma *anon_vma; /* Serialized by page_table_lock */ | 168 | struct anon_vma *anon_vma; /* Serialized by page_table_lock */ |
168 | 169 | ||
169 | /* Function pointers to deal with this struct. */ | 170 | /* Function pointers to deal with this struct. */ |
diff --git a/include/linux/rmap.h b/include/linux/rmap.h index b019ae64e2ab..62da2001d55c 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h | |||
@@ -37,7 +37,27 @@ struct anon_vma { | |||
37 | * is serialized by a system wide lock only visible to | 37 | * is serialized by a system wide lock only visible to |
38 | * mm_take_all_locks() (mm_all_locks_mutex). | 38 | * mm_take_all_locks() (mm_all_locks_mutex). |
39 | */ | 39 | */ |
40 | struct list_head head; /* List of private "related" vmas */ | 40 | struct list_head head; /* Chain of private "related" vmas */ |
41 | }; | ||
42 | |||
43 | /* | ||
44 | * The copy-on-write semantics of fork mean that an anon_vma | ||
45 | * can become associated with multiple processes. Furthermore, | ||
46 | * each child process will have its own anon_vma, where new | ||
47 | * pages for that process are instantiated. | ||
48 | * | ||
49 | * This structure allows us to find the anon_vmas associated | ||
50 | * with a VMA, or the VMAs associated with an anon_vma. | ||
51 | * The "same_vma" list contains the anon_vma_chains linking | ||
52 | * all the anon_vmas associated with this VMA. | ||
53 | * The "same_anon_vma" list contains the anon_vma_chains | ||
54 | * which link all the VMAs associated with this anon_vma. | ||
55 | */ | ||
56 | struct anon_vma_chain { | ||
57 | struct vm_area_struct *vma; | ||
58 | struct anon_vma *anon_vma; | ||
59 | struct list_head same_vma; /* locked by mmap_sem & page_table_lock */ | ||
60 | struct list_head same_anon_vma; /* locked by anon_vma->lock */ | ||
41 | }; | 61 | }; |
42 | 62 | ||
43 | #ifdef CONFIG_MMU | 63 | #ifdef CONFIG_MMU |
@@ -89,12 +109,19 @@ static inline void anon_vma_unlock(struct vm_area_struct *vma) | |||
89 | */ | 109 | */ |
90 | void anon_vma_init(void); /* create anon_vma_cachep */ | 110 | void anon_vma_init(void); /* create anon_vma_cachep */ |
91 | int anon_vma_prepare(struct vm_area_struct *); | 111 | int anon_vma_prepare(struct vm_area_struct *); |
92 | void __anon_vma_merge(struct vm_area_struct *, struct vm_area_struct *); | 112 | void unlink_anon_vmas(struct vm_area_struct *); |
93 | void anon_vma_unlink(struct vm_area_struct *); | 113 | int anon_vma_clone(struct vm_area_struct *, struct vm_area_struct *); |
94 | void anon_vma_link(struct vm_area_struct *); | 114 | int anon_vma_fork(struct vm_area_struct *, struct vm_area_struct *); |
95 | void __anon_vma_link(struct vm_area_struct *); | 115 | void __anon_vma_link(struct vm_area_struct *); |
96 | void anon_vma_free(struct anon_vma *); | 116 | void anon_vma_free(struct anon_vma *); |
97 | 117 | ||
118 | static inline void anon_vma_merge(struct vm_area_struct *vma, | ||
119 | struct vm_area_struct *next) | ||
120 | { | ||
121 | VM_BUG_ON(vma->anon_vma != next->anon_vma); | ||
122 | unlink_anon_vmas(next); | ||
123 | } | ||
124 | |||
98 | /* | 125 | /* |
99 | * rmap interfaces called when adding or removing pte of page | 126 | * rmap interfaces called when adding or removing pte of page |
100 | */ | 127 | */ |
diff --git a/kernel/fork.c b/kernel/fork.c index 7616bcf107b9..bab7b254ad39 100644 --- a/kernel/fork.c +++ b/kernel/fork.c | |||
@@ -329,15 +329,17 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) | |||
329 | if (!tmp) | 329 | if (!tmp) |
330 | goto fail_nomem; | 330 | goto fail_nomem; |
331 | *tmp = *mpnt; | 331 | *tmp = *mpnt; |
332 | INIT_LIST_HEAD(&tmp->anon_vma_chain); | ||
332 | pol = mpol_dup(vma_policy(mpnt)); | 333 | pol = mpol_dup(vma_policy(mpnt)); |
333 | retval = PTR_ERR(pol); | 334 | retval = PTR_ERR(pol); |
334 | if (IS_ERR(pol)) | 335 | if (IS_ERR(pol)) |
335 | goto fail_nomem_policy; | 336 | goto fail_nomem_policy; |
336 | vma_set_policy(tmp, pol); | 337 | vma_set_policy(tmp, pol); |
338 | if (anon_vma_fork(tmp, mpnt)) | ||
339 | goto fail_nomem_anon_vma_fork; | ||
337 | tmp->vm_flags &= ~VM_LOCKED; | 340 | tmp->vm_flags &= ~VM_LOCKED; |
338 | tmp->vm_mm = mm; | 341 | tmp->vm_mm = mm; |
339 | tmp->vm_next = NULL; | 342 | tmp->vm_next = NULL; |
340 | anon_vma_link(tmp); | ||
341 | file = tmp->vm_file; | 343 | file = tmp->vm_file; |
342 | if (file) { | 344 | if (file) { |
343 | struct inode *inode = file->f_path.dentry->d_inode; | 345 | struct inode *inode = file->f_path.dentry->d_inode; |
@@ -392,6 +394,8 @@ out: | |||
392 | flush_tlb_mm(oldmm); | 394 | flush_tlb_mm(oldmm); |
393 | up_write(&oldmm->mmap_sem); | 395 | up_write(&oldmm->mmap_sem); |
394 | return retval; | 396 | return retval; |
397 | fail_nomem_anon_vma_fork: | ||
398 | mpol_put(pol); | ||
395 | fail_nomem_policy: | 399 | fail_nomem_policy: |
396 | kmem_cache_free(vm_area_cachep, tmp); | 400 | kmem_cache_free(vm_area_cachep, tmp); |
397 | fail_nomem: | 401 | fail_nomem: |
@@ -1563,10 +1563,12 @@ int page_referenced_ksm(struct page *page, struct mem_cgroup *memcg, | |||
1563 | again: | 1563 | again: |
1564 | hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) { | 1564 | hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) { |
1565 | struct anon_vma *anon_vma = rmap_item->anon_vma; | 1565 | struct anon_vma *anon_vma = rmap_item->anon_vma; |
1566 | struct anon_vma_chain *vmac; | ||
1566 | struct vm_area_struct *vma; | 1567 | struct vm_area_struct *vma; |
1567 | 1568 | ||
1568 | spin_lock(&anon_vma->lock); | 1569 | spin_lock(&anon_vma->lock); |
1569 | list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { | 1570 | list_for_each_entry(vmac, &anon_vma->head, same_anon_vma) { |
1571 | vma = vmac->vma; | ||
1570 | if (rmap_item->address < vma->vm_start || | 1572 | if (rmap_item->address < vma->vm_start || |
1571 | rmap_item->address >= vma->vm_end) | 1573 | rmap_item->address >= vma->vm_end) |
1572 | continue; | 1574 | continue; |
@@ -1614,10 +1616,12 @@ int try_to_unmap_ksm(struct page *page, enum ttu_flags flags) | |||
1614 | again: | 1616 | again: |
1615 | hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) { | 1617 | hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) { |
1616 | struct anon_vma *anon_vma = rmap_item->anon_vma; | 1618 | struct anon_vma *anon_vma = rmap_item->anon_vma; |
1619 | struct anon_vma_chain *vmac; | ||
1617 | struct vm_area_struct *vma; | 1620 | struct vm_area_struct *vma; |
1618 | 1621 | ||
1619 | spin_lock(&anon_vma->lock); | 1622 | spin_lock(&anon_vma->lock); |
1620 | list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { | 1623 | list_for_each_entry(vmac, &anon_vma->head, same_anon_vma) { |
1624 | vma = vmac->vma; | ||
1621 | if (rmap_item->address < vma->vm_start || | 1625 | if (rmap_item->address < vma->vm_start || |
1622 | rmap_item->address >= vma->vm_end) | 1626 | rmap_item->address >= vma->vm_end) |
1623 | continue; | 1627 | continue; |
@@ -1664,10 +1668,12 @@ int rmap_walk_ksm(struct page *page, int (*rmap_one)(struct page *, | |||
1664 | again: | 1668 | again: |
1665 | hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) { | 1669 | hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) { |
1666 | struct anon_vma *anon_vma = rmap_item->anon_vma; | 1670 | struct anon_vma *anon_vma = rmap_item->anon_vma; |
1671 | struct anon_vma_chain *vmac; | ||
1667 | struct vm_area_struct *vma; | 1672 | struct vm_area_struct *vma; |
1668 | 1673 | ||
1669 | spin_lock(&anon_vma->lock); | 1674 | spin_lock(&anon_vma->lock); |
1670 | list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { | 1675 | list_for_each_entry(vmac, &anon_vma->head, same_anon_vma) { |
1676 | vma = vmac->vma; | ||
1671 | if (rmap_item->address < vma->vm_start || | 1677 | if (rmap_item->address < vma->vm_start || |
1672 | rmap_item->address >= vma->vm_end) | 1678 | rmap_item->address >= vma->vm_end) |
1673 | continue; | 1679 | continue; |
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 17299fd4577c..d1f335162976 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c | |||
@@ -383,9 +383,12 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill, | |||
383 | if (av == NULL) /* Not actually mapped anymore */ | 383 | if (av == NULL) /* Not actually mapped anymore */ |
384 | goto out; | 384 | goto out; |
385 | for_each_process (tsk) { | 385 | for_each_process (tsk) { |
386 | struct anon_vma_chain *vmac; | ||
387 | |||
386 | if (!task_early_kill(tsk)) | 388 | if (!task_early_kill(tsk)) |
387 | continue; | 389 | continue; |
388 | list_for_each_entry (vma, &av->head, anon_vma_node) { | 390 | list_for_each_entry(vmac, &av->head, same_anon_vma) { |
391 | vma = vmac->vma; | ||
389 | if (!page_mapped_in_vma(page, vma)) | 392 | if (!page_mapped_in_vma(page, vma)) |
390 | continue; | 393 | continue; |
391 | if (vma->vm_mm == tsk->mm) | 394 | if (vma->vm_mm == tsk->mm) |
diff --git a/mm/memory.c b/mm/memory.c index 77d9f840936b..dc785b438d70 100644 --- a/mm/memory.c +++ b/mm/memory.c | |||
@@ -374,7 +374,7 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma, | |||
374 | * Hide vma from rmap and truncate_pagecache before freeing | 374 | * Hide vma from rmap and truncate_pagecache before freeing |
375 | * pgtables | 375 | * pgtables |
376 | */ | 376 | */ |
377 | anon_vma_unlink(vma); | 377 | unlink_anon_vmas(vma); |
378 | unlink_file_vma(vma); | 378 | unlink_file_vma(vma); |
379 | 379 | ||
380 | if (is_vm_hugetlb_page(vma)) { | 380 | if (is_vm_hugetlb_page(vma)) { |
@@ -388,7 +388,7 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma, | |||
388 | && !is_vm_hugetlb_page(next)) { | 388 | && !is_vm_hugetlb_page(next)) { |
389 | vma = next; | 389 | vma = next; |
390 | next = vma->vm_next; | 390 | next = vma->vm_next; |
391 | anon_vma_unlink(vma); | 391 | unlink_anon_vmas(vma); |
392 | unlink_file_vma(vma); | 392 | unlink_file_vma(vma); |
393 | } | 393 | } |
394 | free_pgd_range(tlb, addr, vma->vm_end, | 394 | free_pgd_range(tlb, addr, vma->vm_end, |
@@ -437,7 +437,6 @@ __vma_link(struct mm_struct *mm, struct vm_area_struct *vma, | |||
437 | { | 437 | { |
438 | __vma_link_list(mm, vma, prev, rb_parent); | 438 | __vma_link_list(mm, vma, prev, rb_parent); |
439 | __vma_link_rb(mm, vma, rb_link, rb_parent); | 439 | __vma_link_rb(mm, vma, rb_link, rb_parent); |
440 | __anon_vma_link(vma); | ||
441 | } | 440 | } |
442 | 441 | ||
443 | static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma, | 442 | static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma, |
@@ -499,7 +498,7 @@ __vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma, | |||
499 | * are necessary. The "insert" vma (if any) is to be inserted | 498 | * are necessary. The "insert" vma (if any) is to be inserted |
500 | * before we drop the necessary locks. | 499 | * before we drop the necessary locks. |
501 | */ | 500 | */ |
502 | void vma_adjust(struct vm_area_struct *vma, unsigned long start, | 501 | int vma_adjust(struct vm_area_struct *vma, unsigned long start, |
503 | unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert) | 502 | unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert) |
504 | { | 503 | { |
505 | struct mm_struct *mm = vma->vm_mm; | 504 | struct mm_struct *mm = vma->vm_mm; |
@@ -542,6 +541,28 @@ again: remove_next = 1 + (end > next->vm_end); | |||
542 | } | 541 | } |
543 | } | 542 | } |
544 | 543 | ||
544 | /* | ||
545 | * When changing only vma->vm_end, we don't really need anon_vma lock. | ||
546 | */ | ||
547 | if (vma->anon_vma && (insert || importer || start != vma->vm_start)) | ||
548 | anon_vma = vma->anon_vma; | ||
549 | if (anon_vma) { | ||
550 | /* | ||
551 | * Easily overlooked: when mprotect shifts the boundary, | ||
552 | * make sure the expanding vma has anon_vma set if the | ||
553 | * shrinking vma had, to cover any anon pages imported. | ||
554 | */ | ||
555 | if (importer && !importer->anon_vma) { | ||
556 | /* Block reverse map lookups until things are set up. */ | ||
557 | importer->vm_flags |= VM_LOCK_RMAP; | ||
558 | if (anon_vma_clone(importer, vma)) { | ||
559 | importer->vm_flags &= ~VM_LOCK_RMAP; | ||
560 | return -ENOMEM; | ||
561 | } | ||
562 | importer->anon_vma = anon_vma; | ||
563 | } | ||
564 | } | ||
565 | |||
545 | if (file) { | 566 | if (file) { |
546 | mapping = file->f_mapping; | 567 | mapping = file->f_mapping; |
547 | if (!(vma->vm_flags & VM_NONLINEAR)) | 568 | if (!(vma->vm_flags & VM_NONLINEAR)) |
@@ -567,25 +588,6 @@ again: remove_next = 1 + (end > next->vm_end); | |||
567 | } | 588 | } |
568 | } | 589 | } |
569 | 590 | ||
570 | /* | ||
571 | * When changing only vma->vm_end, we don't really need | ||
572 | * anon_vma lock. | ||
573 | */ | ||
574 | if (vma->anon_vma && (insert || importer || start != vma->vm_start)) | ||
575 | anon_vma = vma->anon_vma; | ||
576 | if (anon_vma) { | ||
577 | spin_lock(&anon_vma->lock); | ||
578 | /* | ||
579 | * Easily overlooked: when mprotect shifts the boundary, | ||
580 | * make sure the expanding vma has anon_vma set if the | ||
581 | * shrinking vma had, to cover any anon pages imported. | ||
582 | */ | ||
583 | if (importer && !importer->anon_vma) { | ||
584 | importer->anon_vma = anon_vma; | ||
585 | __anon_vma_link(importer); | ||
586 | } | ||
587 | } | ||
588 | |||
589 | if (root) { | 591 | if (root) { |
590 | flush_dcache_mmap_lock(mapping); | 592 | flush_dcache_mmap_lock(mapping); |
591 | vma_prio_tree_remove(vma, root); | 593 | vma_prio_tree_remove(vma, root); |
@@ -616,8 +618,11 @@ again: remove_next = 1 + (end > next->vm_end); | |||
616 | __vma_unlink(mm, next, vma); | 618 | __vma_unlink(mm, next, vma); |
617 | if (file) | 619 | if (file) |
618 | __remove_shared_vm_struct(next, file, mapping); | 620 | __remove_shared_vm_struct(next, file, mapping); |
619 | if (next->anon_vma) | 621 | /* |
620 | __anon_vma_merge(vma, next); | 622 | * This VMA is now dead, no need for rmap to follow it. |
623 | * Call anon_vma_merge below, outside of i_mmap_lock. | ||
624 | */ | ||
625 | next->vm_flags |= VM_LOCK_RMAP; | ||
621 | } else if (insert) { | 626 | } else if (insert) { |
622 | /* | 627 | /* |
623 | * split_vma has split insert from vma, and needs | 628 | * split_vma has split insert from vma, and needs |
@@ -627,17 +632,25 @@ again: remove_next = 1 + (end > next->vm_end); | |||
627 | __insert_vm_struct(mm, insert); | 632 | __insert_vm_struct(mm, insert); |
628 | } | 633 | } |
629 | 634 | ||
630 | if (anon_vma) | ||
631 | spin_unlock(&anon_vma->lock); | ||
632 | if (mapping) | 635 | if (mapping) |
633 | spin_unlock(&mapping->i_mmap_lock); | 636 | spin_unlock(&mapping->i_mmap_lock); |
634 | 637 | ||
638 | /* | ||
639 | * The current VMA has been set up. It is now safe for the | ||
640 | * rmap code to get from the pages to the ptes. | ||
641 | */ | ||
642 | if (anon_vma && importer) | ||
643 | importer->vm_flags &= ~VM_LOCK_RMAP; | ||
644 | |||
635 | if (remove_next) { | 645 | if (remove_next) { |
636 | if (file) { | 646 | if (file) { |
637 | fput(file); | 647 | fput(file); |
638 | if (next->vm_flags & VM_EXECUTABLE) | 648 | if (next->vm_flags & VM_EXECUTABLE) |
639 | removed_exe_file_vma(mm); | 649 | removed_exe_file_vma(mm); |
640 | } | 650 | } |
651 | /* Protected by mmap_sem and VM_LOCK_RMAP. */ | ||
652 | if (next->anon_vma) | ||
653 | anon_vma_merge(vma, next); | ||
641 | mm->map_count--; | 654 | mm->map_count--; |
642 | mpol_put(vma_policy(next)); | 655 | mpol_put(vma_policy(next)); |
643 | kmem_cache_free(vm_area_cachep, next); | 656 | kmem_cache_free(vm_area_cachep, next); |
@@ -653,6 +666,8 @@ again: remove_next = 1 + (end > next->vm_end); | |||
653 | } | 666 | } |
654 | 667 | ||
655 | validate_mm(mm); | 668 | validate_mm(mm); |
669 | |||
670 | return 0; | ||
656 | } | 671 | } |
657 | 672 | ||
658 | /* | 673 | /* |
@@ -759,6 +774,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, | |||
759 | { | 774 | { |
760 | pgoff_t pglen = (end - addr) >> PAGE_SHIFT; | 775 | pgoff_t pglen = (end - addr) >> PAGE_SHIFT; |
761 | struct vm_area_struct *area, *next; | 776 | struct vm_area_struct *area, *next; |
777 | int err; | ||
762 | 778 | ||
763 | /* | 779 | /* |
764 | * We later require that vma->vm_flags == vm_flags, | 780 | * We later require that vma->vm_flags == vm_flags, |
@@ -792,11 +808,13 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, | |||
792 | is_mergeable_anon_vma(prev->anon_vma, | 808 | is_mergeable_anon_vma(prev->anon_vma, |
793 | next->anon_vma)) { | 809 | next->anon_vma)) { |
794 | /* cases 1, 6 */ | 810 | /* cases 1, 6 */ |
795 | vma_adjust(prev, prev->vm_start, | 811 | err = vma_adjust(prev, prev->vm_start, |
796 | next->vm_end, prev->vm_pgoff, NULL); | 812 | next->vm_end, prev->vm_pgoff, NULL); |
797 | } else /* cases 2, 5, 7 */ | 813 | } else /* cases 2, 5, 7 */ |
798 | vma_adjust(prev, prev->vm_start, | 814 | err = vma_adjust(prev, prev->vm_start, |
799 | end, prev->vm_pgoff, NULL); | 815 | end, prev->vm_pgoff, NULL); |
816 | if (err) | ||
817 | return NULL; | ||
800 | return prev; | 818 | return prev; |
801 | } | 819 | } |
802 | 820 | ||
@@ -808,11 +826,13 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, | |||
808 | can_vma_merge_before(next, vm_flags, | 826 | can_vma_merge_before(next, vm_flags, |
809 | anon_vma, file, pgoff+pglen)) { | 827 | anon_vma, file, pgoff+pglen)) { |
810 | if (prev && addr < prev->vm_end) /* case 4 */ | 828 | if (prev && addr < prev->vm_end) /* case 4 */ |
811 | vma_adjust(prev, prev->vm_start, | 829 | err = vma_adjust(prev, prev->vm_start, |
812 | addr, prev->vm_pgoff, NULL); | 830 | addr, prev->vm_pgoff, NULL); |
813 | else /* cases 3, 8 */ | 831 | else /* cases 3, 8 */ |
814 | vma_adjust(area, addr, next->vm_end, | 832 | err = vma_adjust(area, addr, next->vm_end, |
815 | next->vm_pgoff - pglen, NULL); | 833 | next->vm_pgoff - pglen, NULL); |
834 | if (err) | ||
835 | return NULL; | ||
816 | return area; | 836 | return area; |
817 | } | 837 | } |
818 | 838 | ||
@@ -1205,6 +1225,7 @@ munmap_back: | |||
1205 | vma->vm_flags = vm_flags; | 1225 | vma->vm_flags = vm_flags; |
1206 | vma->vm_page_prot = vm_get_page_prot(vm_flags); | 1226 | vma->vm_page_prot = vm_get_page_prot(vm_flags); |
1207 | vma->vm_pgoff = pgoff; | 1227 | vma->vm_pgoff = pgoff; |
1228 | INIT_LIST_HEAD(&vma->anon_vma_chain); | ||
1208 | 1229 | ||
1209 | if (file) { | 1230 | if (file) { |
1210 | error = -EINVAL; | 1231 | error = -EINVAL; |
@@ -1865,6 +1886,7 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma, | |||
1865 | { | 1886 | { |
1866 | struct mempolicy *pol; | 1887 | struct mempolicy *pol; |
1867 | struct vm_area_struct *new; | 1888 | struct vm_area_struct *new; |
1889 | int err = -ENOMEM; | ||
1868 | 1890 | ||
1869 | if (is_vm_hugetlb_page(vma) && (addr & | 1891 | if (is_vm_hugetlb_page(vma) && (addr & |
1870 | ~(huge_page_mask(hstate_vma(vma))))) | 1892 | ~(huge_page_mask(hstate_vma(vma))))) |
@@ -1872,11 +1894,13 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma, | |||
1872 | 1894 | ||
1873 | new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); | 1895 | new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); |
1874 | if (!new) | 1896 | if (!new) |
1875 | return -ENOMEM; | 1897 | goto out_err; |
1876 | 1898 | ||
1877 | /* most fields are the same, copy all, and then fixup */ | 1899 | /* most fields are the same, copy all, and then fixup */ |
1878 | *new = *vma; | 1900 | *new = *vma; |
1879 | 1901 | ||
1902 | INIT_LIST_HEAD(&new->anon_vma_chain); | ||
1903 | |||
1880 | if (new_below) | 1904 | if (new_below) |
1881 | new->vm_end = addr; | 1905 | new->vm_end = addr; |
1882 | else { | 1906 | else { |
@@ -1886,11 +1910,14 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma, | |||
1886 | 1910 | ||
1887 | pol = mpol_dup(vma_policy(vma)); | 1911 | pol = mpol_dup(vma_policy(vma)); |
1888 | if (IS_ERR(pol)) { | 1912 | if (IS_ERR(pol)) { |
1889 | kmem_cache_free(vm_area_cachep, new); | 1913 | err = PTR_ERR(pol); |
1890 | return PTR_ERR(pol); | 1914 | goto out_free_vma; |
1891 | } | 1915 | } |
1892 | vma_set_policy(new, pol); | 1916 | vma_set_policy(new, pol); |
1893 | 1917 | ||
1918 | if (anon_vma_clone(new, vma)) | ||
1919 | goto out_free_mpol; | ||
1920 | |||
1894 | if (new->vm_file) { | 1921 | if (new->vm_file) { |
1895 | get_file(new->vm_file); | 1922 | get_file(new->vm_file); |
1896 | if (vma->vm_flags & VM_EXECUTABLE) | 1923 | if (vma->vm_flags & VM_EXECUTABLE) |
@@ -1901,12 +1928,28 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma, | |||
1901 | new->vm_ops->open(new); | 1928 | new->vm_ops->open(new); |
1902 | 1929 | ||
1903 | if (new_below) | 1930 | if (new_below) |
1904 | vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff + | 1931 | err = vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff + |
1905 | ((addr - new->vm_start) >> PAGE_SHIFT), new); | 1932 | ((addr - new->vm_start) >> PAGE_SHIFT), new); |
1906 | else | 1933 | else |
1907 | vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new); | 1934 | err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new); |
1908 | 1935 | ||
1909 | return 0; | 1936 | /* Success. */ |
1937 | if (!err) | ||
1938 | return 0; | ||
1939 | |||
1940 | /* Clean everything up if vma_adjust failed. */ | ||
1941 | new->vm_ops->close(new); | ||
1942 | if (new->vm_file) { | ||
1943 | if (vma->vm_flags & VM_EXECUTABLE) | ||
1944 | removed_exe_file_vma(mm); | ||
1945 | fput(new->vm_file); | ||
1946 | } | ||
1947 | out_free_mpol: | ||
1948 | mpol_put(pol); | ||
1949 | out_free_vma: | ||
1950 | kmem_cache_free(vm_area_cachep, new); | ||
1951 | out_err: | ||
1952 | return err; | ||
1910 | } | 1953 | } |
1911 | 1954 | ||
1912 | /* | 1955 | /* |
@@ -2116,6 +2159,7 @@ unsigned long do_brk(unsigned long addr, unsigned long len) | |||
2116 | return -ENOMEM; | 2159 | return -ENOMEM; |
2117 | } | 2160 | } |
2118 | 2161 | ||
2162 | INIT_LIST_HEAD(&vma->anon_vma_chain); | ||
2119 | vma->vm_mm = mm; | 2163 | vma->vm_mm = mm; |
2120 | vma->vm_start = addr; | 2164 | vma->vm_start = addr; |
2121 | vma->vm_end = addr + len; | 2165 | vma->vm_end = addr + len; |
@@ -2252,10 +2296,11 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, | |||
2252 | if (new_vma) { | 2296 | if (new_vma) { |
2253 | *new_vma = *vma; | 2297 | *new_vma = *vma; |
2254 | pol = mpol_dup(vma_policy(vma)); | 2298 | pol = mpol_dup(vma_policy(vma)); |
2255 | if (IS_ERR(pol)) { | 2299 | if (IS_ERR(pol)) |
2256 | kmem_cache_free(vm_area_cachep, new_vma); | 2300 | goto out_free_vma; |
2257 | return NULL; | 2301 | INIT_LIST_HEAD(&new_vma->anon_vma_chain); |
2258 | } | 2302 | if (anon_vma_clone(new_vma, vma)) |
2303 | goto out_free_mempol; | ||
2259 | vma_set_policy(new_vma, pol); | 2304 | vma_set_policy(new_vma, pol); |
2260 | new_vma->vm_start = addr; | 2305 | new_vma->vm_start = addr; |
2261 | new_vma->vm_end = addr + len; | 2306 | new_vma->vm_end = addr + len; |
@@ -2271,6 +2316,12 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, | |||
2271 | } | 2316 | } |
2272 | } | 2317 | } |
2273 | return new_vma; | 2318 | return new_vma; |
2319 | |||
2320 | out_free_mempol: | ||
2321 | mpol_put(pol); | ||
2322 | out_free_vma: | ||
2323 | kmem_cache_free(vm_area_cachep, new_vma); | ||
2324 | return NULL; | ||
2274 | } | 2325 | } |
2275 | 2326 | ||
2276 | /* | 2327 | /* |
@@ -2348,6 +2399,7 @@ int install_special_mapping(struct mm_struct *mm, | |||
2348 | if (unlikely(vma == NULL)) | 2399 | if (unlikely(vma == NULL)) |
2349 | return -ENOMEM; | 2400 | return -ENOMEM; |
2350 | 2401 | ||
2402 | INIT_LIST_HEAD(&vma->anon_vma_chain); | ||
2351 | vma->vm_mm = mm; | 2403 | vma->vm_mm = mm; |
2352 | vma->vm_start = addr; | 2404 | vma->vm_start = addr; |
2353 | vma->vm_end = addr + len; | 2405 | vma->vm_end = addr + len; |
@@ -2448,6 +2500,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping) | |||
2448 | int mm_take_all_locks(struct mm_struct *mm) | 2500 | int mm_take_all_locks(struct mm_struct *mm) |
2449 | { | 2501 | { |
2450 | struct vm_area_struct *vma; | 2502 | struct vm_area_struct *vma; |
2503 | struct anon_vma_chain *avc; | ||
2451 | int ret = -EINTR; | 2504 | int ret = -EINTR; |
2452 | 2505 | ||
2453 | BUG_ON(down_read_trylock(&mm->mmap_sem)); | 2506 | BUG_ON(down_read_trylock(&mm->mmap_sem)); |
@@ -2465,7 +2518,8 @@ int mm_take_all_locks(struct mm_struct *mm) | |||
2465 | if (signal_pending(current)) | 2518 | if (signal_pending(current)) |
2466 | goto out_unlock; | 2519 | goto out_unlock; |
2467 | if (vma->anon_vma) | 2520 | if (vma->anon_vma) |
2468 | vm_lock_anon_vma(mm, vma->anon_vma); | 2521 | list_for_each_entry(avc, &vma->anon_vma_chain, same_vma) |
2522 | vm_lock_anon_vma(mm, avc->anon_vma); | ||
2469 | } | 2523 | } |
2470 | 2524 | ||
2471 | ret = 0; | 2525 | ret = 0; |
@@ -2520,13 +2574,15 @@ static void vm_unlock_mapping(struct address_space *mapping) | |||
2520 | void mm_drop_all_locks(struct mm_struct *mm) | 2574 | void mm_drop_all_locks(struct mm_struct *mm) |
2521 | { | 2575 | { |
2522 | struct vm_area_struct *vma; | 2576 | struct vm_area_struct *vma; |
2577 | struct anon_vma_chain *avc; | ||
2523 | 2578 | ||
2524 | BUG_ON(down_read_trylock(&mm->mmap_sem)); | 2579 | BUG_ON(down_read_trylock(&mm->mmap_sem)); |
2525 | BUG_ON(!mutex_is_locked(&mm_all_locks_mutex)); | 2580 | BUG_ON(!mutex_is_locked(&mm_all_locks_mutex)); |
2526 | 2581 | ||
2527 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | 2582 | for (vma = mm->mmap; vma; vma = vma->vm_next) { |
2528 | if (vma->anon_vma) | 2583 | if (vma->anon_vma) |
2529 | vm_unlock_anon_vma(vma->anon_vma); | 2584 | list_for_each_entry(avc, &vma->anon_vma_chain, same_vma) |
2585 | vm_unlock_anon_vma(avc->anon_vma); | ||
2530 | if (vma->vm_file && vma->vm_file->f_mapping) | 2586 | if (vma->vm_file && vma->vm_file->f_mapping) |
2531 | vm_unlock_mapping(vma->vm_file->f_mapping); | 2587 | vm_unlock_mapping(vma->vm_file->f_mapping); |
2532 | } | 2588 | } |
diff --git a/mm/mremap.c b/mm/mremap.c index 4c4c803453f3..e9c75efce609 100644 --- a/mm/mremap.c +++ b/mm/mremap.c | |||
@@ -460,8 +460,11 @@ unsigned long do_mremap(unsigned long addr, | |||
460 | if (vma_expandable(vma, new_len - old_len)) { | 460 | if (vma_expandable(vma, new_len - old_len)) { |
461 | int pages = (new_len - old_len) >> PAGE_SHIFT; | 461 | int pages = (new_len - old_len) >> PAGE_SHIFT; |
462 | 462 | ||
463 | vma_adjust(vma, vma->vm_start, | 463 | if (vma_adjust(vma, vma->vm_start, addr + new_len, |
464 | addr + new_len, vma->vm_pgoff, NULL); | 464 | vma->vm_pgoff, NULL)) { |
465 | ret = -ENOMEM; | ||
466 | goto out; | ||
467 | } | ||
465 | 468 | ||
466 | mm->total_vm += pages; | 469 | mm->total_vm += pages; |
467 | vm_stat_account(mm, vma->vm_flags, vma->vm_file, pages); | 470 | vm_stat_account(mm, vma->vm_flags, vma->vm_file, pages); |
diff --git a/mm/nommu.c b/mm/nommu.c index 48a2ecfaf059..55727a74af98 100644 --- a/mm/nommu.c +++ b/mm/nommu.c | |||
@@ -1209,7 +1209,7 @@ unsigned long do_mmap_pgoff(struct file *file, | |||
1209 | region->vm_flags = vm_flags; | 1209 | region->vm_flags = vm_flags; |
1210 | region->vm_pgoff = pgoff; | 1210 | region->vm_pgoff = pgoff; |
1211 | 1211 | ||
1212 | INIT_LIST_HEAD(&vma->anon_vma_node); | 1212 | INIT_LIST_HEAD(&vma->anon_vma_chain); |
1213 | vma->vm_flags = vm_flags; | 1213 | vma->vm_flags = vm_flags; |
1214 | vma->vm_pgoff = pgoff; | 1214 | vma->vm_pgoff = pgoff; |
1215 | 1215 | ||
@@ -62,6 +62,7 @@ | |||
62 | #include "internal.h" | 62 | #include "internal.h" |
63 | 63 | ||
64 | static struct kmem_cache *anon_vma_cachep; | 64 | static struct kmem_cache *anon_vma_cachep; |
65 | static struct kmem_cache *anon_vma_chain_cachep; | ||
65 | 66 | ||
66 | static inline struct anon_vma *anon_vma_alloc(void) | 67 | static inline struct anon_vma *anon_vma_alloc(void) |
67 | { | 68 | { |
@@ -73,6 +74,16 @@ void anon_vma_free(struct anon_vma *anon_vma) | |||
73 | kmem_cache_free(anon_vma_cachep, anon_vma); | 74 | kmem_cache_free(anon_vma_cachep, anon_vma); |
74 | } | 75 | } |
75 | 76 | ||
77 | static inline struct anon_vma_chain *anon_vma_chain_alloc(void) | ||
78 | { | ||
79 | return kmem_cache_alloc(anon_vma_chain_cachep, GFP_KERNEL); | ||
80 | } | ||
81 | |||
82 | void anon_vma_chain_free(struct anon_vma_chain *anon_vma_chain) | ||
83 | { | ||
84 | kmem_cache_free(anon_vma_chain_cachep, anon_vma_chain); | ||
85 | } | ||
86 | |||
76 | /** | 87 | /** |
77 | * anon_vma_prepare - attach an anon_vma to a memory region | 88 | * anon_vma_prepare - attach an anon_vma to a memory region |
78 | * @vma: the memory region in question | 89 | * @vma: the memory region in question |
@@ -103,18 +114,23 @@ void anon_vma_free(struct anon_vma *anon_vma) | |||
103 | int anon_vma_prepare(struct vm_area_struct *vma) | 114 | int anon_vma_prepare(struct vm_area_struct *vma) |
104 | { | 115 | { |
105 | struct anon_vma *anon_vma = vma->anon_vma; | 116 | struct anon_vma *anon_vma = vma->anon_vma; |
117 | struct anon_vma_chain *avc; | ||
106 | 118 | ||
107 | might_sleep(); | 119 | might_sleep(); |
108 | if (unlikely(!anon_vma)) { | 120 | if (unlikely(!anon_vma)) { |
109 | struct mm_struct *mm = vma->vm_mm; | 121 | struct mm_struct *mm = vma->vm_mm; |
110 | struct anon_vma *allocated; | 122 | struct anon_vma *allocated; |
111 | 123 | ||
124 | avc = anon_vma_chain_alloc(); | ||
125 | if (!avc) | ||
126 | goto out_enomem; | ||
127 | |||
112 | anon_vma = find_mergeable_anon_vma(vma); | 128 | anon_vma = find_mergeable_anon_vma(vma); |
113 | allocated = NULL; | 129 | allocated = NULL; |
114 | if (!anon_vma) { | 130 | if (!anon_vma) { |
115 | anon_vma = anon_vma_alloc(); | 131 | anon_vma = anon_vma_alloc(); |
116 | if (unlikely(!anon_vma)) | 132 | if (unlikely(!anon_vma)) |
117 | return -ENOMEM; | 133 | goto out_enomem_free_avc; |
118 | allocated = anon_vma; | 134 | allocated = anon_vma; |
119 | } | 135 | } |
120 | spin_lock(&anon_vma->lock); | 136 | spin_lock(&anon_vma->lock); |
@@ -123,53 +139,113 @@ int anon_vma_prepare(struct vm_area_struct *vma) | |||
123 | spin_lock(&mm->page_table_lock); | 139 | spin_lock(&mm->page_table_lock); |
124 | if (likely(!vma->anon_vma)) { | 140 | if (likely(!vma->anon_vma)) { |
125 | vma->anon_vma = anon_vma; | 141 | vma->anon_vma = anon_vma; |
126 | list_add_tail(&vma->anon_vma_node, &anon_vma->head); | 142 | avc->anon_vma = anon_vma; |
143 | avc->vma = vma; | ||
144 | list_add(&avc->same_vma, &vma->anon_vma_chain); | ||
145 | list_add(&avc->same_anon_vma, &anon_vma->head); | ||
127 | allocated = NULL; | 146 | allocated = NULL; |
128 | } | 147 | } |
129 | spin_unlock(&mm->page_table_lock); | 148 | spin_unlock(&mm->page_table_lock); |
130 | 149 | ||
131 | spin_unlock(&anon_vma->lock); | 150 | spin_unlock(&anon_vma->lock); |
132 | if (unlikely(allocated)) | 151 | if (unlikely(allocated)) { |
133 | anon_vma_free(allocated); | 152 | anon_vma_free(allocated); |
153 | anon_vma_chain_free(avc); | ||
154 | } | ||
134 | } | 155 | } |
135 | return 0; | 156 | return 0; |
157 | |||
158 | out_enomem_free_avc: | ||
159 | anon_vma_chain_free(avc); | ||
160 | out_enomem: | ||
161 | return -ENOMEM; | ||
136 | } | 162 | } |
137 | 163 | ||
138 | void __anon_vma_merge(struct vm_area_struct *vma, struct vm_area_struct *next) | 164 | static void anon_vma_chain_link(struct vm_area_struct *vma, |
165 | struct anon_vma_chain *avc, | ||
166 | struct anon_vma *anon_vma) | ||
139 | { | 167 | { |
140 | BUG_ON(vma->anon_vma != next->anon_vma); | 168 | avc->vma = vma; |
141 | list_del(&next->anon_vma_node); | 169 | avc->anon_vma = anon_vma; |
170 | list_add(&avc->same_vma, &vma->anon_vma_chain); | ||
171 | |||
172 | spin_lock(&anon_vma->lock); | ||
173 | list_add_tail(&avc->same_anon_vma, &anon_vma->head); | ||
174 | spin_unlock(&anon_vma->lock); | ||
142 | } | 175 | } |
143 | 176 | ||
144 | void __anon_vma_link(struct vm_area_struct *vma) | 177 | /* |
178 | * Attach the anon_vmas from src to dst. | ||
179 | * Returns 0 on success, -ENOMEM on failure. | ||
180 | */ | ||
181 | int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src) | ||
145 | { | 182 | { |
146 | struct anon_vma *anon_vma = vma->anon_vma; | 183 | struct anon_vma_chain *avc, *pavc; |
184 | |||
185 | list_for_each_entry(pavc, &src->anon_vma_chain, same_vma) { | ||
186 | avc = anon_vma_chain_alloc(); | ||
187 | if (!avc) | ||
188 | goto enomem_failure; | ||
189 | anon_vma_chain_link(dst, avc, pavc->anon_vma); | ||
190 | } | ||
191 | return 0; | ||
147 | 192 | ||
148 | if (anon_vma) | 193 | enomem_failure: |
149 | list_add_tail(&vma->anon_vma_node, &anon_vma->head); | 194 | unlink_anon_vmas(dst); |
195 | return -ENOMEM; | ||
150 | } | 196 | } |
151 | 197 | ||
152 | void anon_vma_link(struct vm_area_struct *vma) | 198 | /* |
199 | * Attach vma to its own anon_vma, as well as to the anon_vmas that | ||
200 | * the corresponding VMA in the parent process is attached to. | ||
201 | * Returns 0 on success, non-zero on failure. | ||
202 | */ | ||
203 | int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma) | ||
153 | { | 204 | { |
154 | struct anon_vma *anon_vma = vma->anon_vma; | 205 | struct anon_vma_chain *avc; |
206 | struct anon_vma *anon_vma; | ||
155 | 207 | ||
156 | if (anon_vma) { | 208 | /* Don't bother if the parent process has no anon_vma here. */ |
157 | spin_lock(&anon_vma->lock); | 209 | if (!pvma->anon_vma) |
158 | list_add_tail(&vma->anon_vma_node, &anon_vma->head); | 210 | return 0; |
159 | spin_unlock(&anon_vma->lock); | 211 | |
160 | } | 212 | /* |
213 | * First, attach the new VMA to the parent VMA's anon_vmas, | ||
214 | * so rmap can find non-COWed pages in child processes. | ||
215 | */ | ||
216 | if (anon_vma_clone(vma, pvma)) | ||
217 | return -ENOMEM; | ||
218 | |||
219 | /* Then add our own anon_vma. */ | ||
220 | anon_vma = anon_vma_alloc(); | ||
221 | if (!anon_vma) | ||
222 | goto out_error; | ||
223 | avc = anon_vma_chain_alloc(); | ||
224 | if (!avc) | ||
225 | goto out_error_free_anon_vma; | ||
226 | anon_vma_chain_link(vma, avc, anon_vma); | ||
227 | /* Mark this anon_vma as the one where our new (COWed) pages go. */ | ||
228 | vma->anon_vma = anon_vma; | ||
229 | |||
230 | return 0; | ||
231 | |||
232 | out_error_free_anon_vma: | ||
233 | anon_vma_free(anon_vma); | ||
234 | out_error: | ||
235 | return -ENOMEM; | ||
161 | } | 236 | } |
162 | 237 | ||
163 | void anon_vma_unlink(struct vm_area_struct *vma) | 238 | static void anon_vma_unlink(struct anon_vma_chain *anon_vma_chain) |
164 | { | 239 | { |
165 | struct anon_vma *anon_vma = vma->anon_vma; | 240 | struct anon_vma *anon_vma = anon_vma_chain->anon_vma; |
166 | int empty; | 241 | int empty; |
167 | 242 | ||
243 | /* If anon_vma_fork fails, we can get an empty anon_vma_chain. */ | ||
168 | if (!anon_vma) | 244 | if (!anon_vma) |
169 | return; | 245 | return; |
170 | 246 | ||
171 | spin_lock(&anon_vma->lock); | 247 | spin_lock(&anon_vma->lock); |
172 | list_del(&vma->anon_vma_node); | 248 | list_del(&anon_vma_chain->same_anon_vma); |
173 | 249 | ||
174 | /* We must garbage collect the anon_vma if it's empty */ | 250 | /* We must garbage collect the anon_vma if it's empty */ |
175 | empty = list_empty(&anon_vma->head) && !ksm_refcount(anon_vma); | 251 | empty = list_empty(&anon_vma->head) && !ksm_refcount(anon_vma); |
@@ -179,6 +255,18 @@ void anon_vma_unlink(struct vm_area_struct *vma) | |||
179 | anon_vma_free(anon_vma); | 255 | anon_vma_free(anon_vma); |
180 | } | 256 | } |
181 | 257 | ||
258 | void unlink_anon_vmas(struct vm_area_struct *vma) | ||
259 | { | ||
260 | struct anon_vma_chain *avc, *next; | ||
261 | |||
262 | /* Unlink each anon_vma chained to the VMA. */ | ||
263 | list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) { | ||
264 | anon_vma_unlink(avc); | ||
265 | list_del(&avc->same_vma); | ||
266 | anon_vma_chain_free(avc); | ||
267 | } | ||
268 | } | ||
269 | |||
182 | static void anon_vma_ctor(void *data) | 270 | static void anon_vma_ctor(void *data) |
183 | { | 271 | { |
184 | struct anon_vma *anon_vma = data; | 272 | struct anon_vma *anon_vma = data; |
@@ -192,6 +280,7 @@ void __init anon_vma_init(void) | |||
192 | { | 280 | { |
193 | anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma), | 281 | anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma), |
194 | 0, SLAB_DESTROY_BY_RCU|SLAB_PANIC, anon_vma_ctor); | 282 | 0, SLAB_DESTROY_BY_RCU|SLAB_PANIC, anon_vma_ctor); |
283 | anon_vma_chain_cachep = KMEM_CACHE(anon_vma_chain, SLAB_PANIC); | ||
195 | } | 284 | } |
196 | 285 | ||
197 | /* | 286 | /* |
@@ -240,6 +329,18 @@ vma_address(struct page *page, struct vm_area_struct *vma) | |||
240 | /* page should be within @vma mapping range */ | 329 | /* page should be within @vma mapping range */ |
241 | return -EFAULT; | 330 | return -EFAULT; |
242 | } | 331 | } |
332 | if (unlikely(vma->vm_flags & VM_LOCK_RMAP)) { | ||
333 | /* | ||
334 | * This VMA is being unlinked or is not yet linked into the | ||
335 | * VMA tree. Do not try to follow this rmap. This race | ||
336 | * condition can result in page_referenced() ignoring a | ||
337 | * reference or in try_to_unmap() failing to unmap a page. | ||
338 | * The VMA cannot be freed under us because we hold the | ||
339 | * anon_vma->lock, which the munmap code takes while | ||
340 | * unlinking the anon_vmas from the VMA. | ||
341 | */ | ||
342 | return -EFAULT; | ||
343 | } | ||
243 | return address; | 344 | return address; |
244 | } | 345 | } |
245 | 346 | ||
@@ -396,7 +497,7 @@ static int page_referenced_anon(struct page *page, | |||
396 | { | 497 | { |
397 | unsigned int mapcount; | 498 | unsigned int mapcount; |
398 | struct anon_vma *anon_vma; | 499 | struct anon_vma *anon_vma; |
399 | struct vm_area_struct *vma; | 500 | struct anon_vma_chain *avc; |
400 | int referenced = 0; | 501 | int referenced = 0; |
401 | 502 | ||
402 | anon_vma = page_lock_anon_vma(page); | 503 | anon_vma = page_lock_anon_vma(page); |
@@ -404,7 +505,8 @@ static int page_referenced_anon(struct page *page, | |||
404 | return referenced; | 505 | return referenced; |
405 | 506 | ||
406 | mapcount = page_mapcount(page); | 507 | mapcount = page_mapcount(page); |
407 | list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { | 508 | list_for_each_entry(avc, &anon_vma->head, same_anon_vma) { |
509 | struct vm_area_struct *vma = avc->vma; | ||
408 | unsigned long address = vma_address(page, vma); | 510 | unsigned long address = vma_address(page, vma); |
409 | if (address == -EFAULT) | 511 | if (address == -EFAULT) |
410 | continue; | 512 | continue; |
@@ -1025,14 +1127,15 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount, | |||
1025 | static int try_to_unmap_anon(struct page *page, enum ttu_flags flags) | 1127 | static int try_to_unmap_anon(struct page *page, enum ttu_flags flags) |
1026 | { | 1128 | { |
1027 | struct anon_vma *anon_vma; | 1129 | struct anon_vma *anon_vma; |
1028 | struct vm_area_struct *vma; | 1130 | struct anon_vma_chain *avc; |
1029 | int ret = SWAP_AGAIN; | 1131 | int ret = SWAP_AGAIN; |
1030 | 1132 | ||
1031 | anon_vma = page_lock_anon_vma(page); | 1133 | anon_vma = page_lock_anon_vma(page); |
1032 | if (!anon_vma) | 1134 | if (!anon_vma) |
1033 | return ret; | 1135 | return ret; |
1034 | 1136 | ||
1035 | list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { | 1137 | list_for_each_entry(avc, &anon_vma->head, same_anon_vma) { |
1138 | struct vm_area_struct *vma = avc->vma; | ||
1036 | unsigned long address = vma_address(page, vma); | 1139 | unsigned long address = vma_address(page, vma); |
1037 | if (address == -EFAULT) | 1140 | if (address == -EFAULT) |
1038 | continue; | 1141 | continue; |
@@ -1223,7 +1326,7 @@ static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *, | |||
1223 | struct vm_area_struct *, unsigned long, void *), void *arg) | 1326 | struct vm_area_struct *, unsigned long, void *), void *arg) |
1224 | { | 1327 | { |
1225 | struct anon_vma *anon_vma; | 1328 | struct anon_vma *anon_vma; |
1226 | struct vm_area_struct *vma; | 1329 | struct anon_vma_chain *avc; |
1227 | int ret = SWAP_AGAIN; | 1330 | int ret = SWAP_AGAIN; |
1228 | 1331 | ||
1229 | /* | 1332 | /* |
@@ -1238,7 +1341,8 @@ static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *, | |||
1238 | if (!anon_vma) | 1341 | if (!anon_vma) |
1239 | return ret; | 1342 | return ret; |
1240 | spin_lock(&anon_vma->lock); | 1343 | spin_lock(&anon_vma->lock); |
1241 | list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { | 1344 | list_for_each_entry(avc, &anon_vma->head, same_anon_vma) { |
1345 | struct vm_area_struct *vma = avc->vma; | ||
1242 | unsigned long address = vma_address(page, vma); | 1346 | unsigned long address = vma_address(page, vma); |
1243 | if (address == -EFAULT) | 1347 | if (address == -EFAULT) |
1244 | continue; | 1348 | continue; |