aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/ia64/kernel/perfmon.c1
-rw-r--r--arch/ia64/mm/init.c2
-rw-r--r--fs/exec.c6
-rw-r--r--include/linux/mm.h6
-rw-r--r--include/linux/mm_types.h3
-rw-r--r--include/linux/rmap.h35
-rw-r--r--kernel/fork.c6
-rw-r--r--mm/ksm.c12
-rw-r--r--mm/memory-failure.c5
-rw-r--r--mm/memory.c4
-rw-r--r--mm/mmap.c138
-rw-r--r--mm/mremap.c7
-rw-r--r--mm/nommu.c2
-rw-r--r--mm/rmap.c156
14 files changed, 298 insertions, 85 deletions
diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index b81e46b1629b..703062c44fb9 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -2315,6 +2315,7 @@ pfm_smpl_buffer_alloc(struct task_struct *task, struct file *filp, pfm_context_t
2315 DPRINT(("Cannot allocate vma\n")); 2315 DPRINT(("Cannot allocate vma\n"));
2316 goto error_kmem; 2316 goto error_kmem;
2317 } 2317 }
2318 INIT_LIST_HEAD(&vma->anon_vma_chain);
2318 2319
2319 /* 2320 /*
2320 * partially initialize the vma for the sampling buffer 2321 * partially initialize the vma for the sampling buffer
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index ca3335ea56cc..ed41759efcac 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -117,6 +117,7 @@ ia64_init_addr_space (void)
117 */ 117 */
118 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); 118 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
119 if (vma) { 119 if (vma) {
120 INIT_LIST_HEAD(&vma->anon_vma_chain);
120 vma->vm_mm = current->mm; 121 vma->vm_mm = current->mm;
121 vma->vm_start = current->thread.rbs_bot & PAGE_MASK; 122 vma->vm_start = current->thread.rbs_bot & PAGE_MASK;
122 vma->vm_end = vma->vm_start + PAGE_SIZE; 123 vma->vm_end = vma->vm_start + PAGE_SIZE;
@@ -135,6 +136,7 @@ ia64_init_addr_space (void)
135 if (!(current->personality & MMAP_PAGE_ZERO)) { 136 if (!(current->personality & MMAP_PAGE_ZERO)) {
136 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); 137 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
137 if (vma) { 138 if (vma) {
139 INIT_LIST_HEAD(&vma->anon_vma_chain);
138 vma->vm_mm = current->mm; 140 vma->vm_mm = current->mm;
139 vma->vm_end = PAGE_SIZE; 141 vma->vm_end = PAGE_SIZE;
140 vma->vm_page_prot = __pgprot(pgprot_val(PAGE_READONLY) | _PAGE_MA_NAT); 142 vma->vm_page_prot = __pgprot(pgprot_val(PAGE_READONLY) | _PAGE_MA_NAT);
diff --git a/fs/exec.c b/fs/exec.c
index ea7861727efd..591030735591 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -246,6 +246,7 @@ static int __bprm_mm_init(struct linux_binprm *bprm)
246 vma->vm_start = vma->vm_end - PAGE_SIZE; 246 vma->vm_start = vma->vm_end - PAGE_SIZE;
247 vma->vm_flags = VM_STACK_FLAGS; 247 vma->vm_flags = VM_STACK_FLAGS;
248 vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); 248 vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
249 INIT_LIST_HEAD(&vma->anon_vma_chain);
249 err = insert_vm_struct(mm, vma); 250 err = insert_vm_struct(mm, vma);
250 if (err) 251 if (err)
251 goto err; 252 goto err;
@@ -516,7 +517,8 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
516 /* 517 /*
517 * cover the whole range: [new_start, old_end) 518 * cover the whole range: [new_start, old_end)
518 */ 519 */
519 vma_adjust(vma, new_start, old_end, vma->vm_pgoff, NULL); 520 if (vma_adjust(vma, new_start, old_end, vma->vm_pgoff, NULL))
521 return -ENOMEM;
520 522
521 /* 523 /*
522 * move the page tables downwards, on failure we rely on 524 * move the page tables downwards, on failure we rely on
@@ -547,7 +549,7 @@ static int shift_arg_pages(struct vm_area_struct *vma, unsigned long shift)
547 tlb_finish_mmu(tlb, new_end, old_end); 549 tlb_finish_mmu(tlb, new_end, old_end);
548 550
549 /* 551 /*
550 * shrink the vma to just the new range. 552 * Shrink the vma to just the new range. Always succeeds.
551 */ 553 */
552 vma_adjust(vma, new_start, new_end, vma->vm_pgoff, NULL); 554 vma_adjust(vma, new_start, new_end, vma->vm_pgoff, NULL);
553 555
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8e580c07d171..8e2841a2f441 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -97,7 +97,11 @@ extern unsigned int kobjsize(const void *objp);
97#define VM_NORESERVE 0x00200000 /* should the VM suppress accounting */ 97#define VM_NORESERVE 0x00200000 /* should the VM suppress accounting */
98#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */ 98#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */
99#define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */ 99#define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */
100#ifdef CONFIG_MMU
101#define VM_LOCK_RMAP 0x01000000 /* Do not follow this rmap (mmu mmap) */
102#else
100#define VM_MAPPED_COPY 0x01000000 /* T if mapped copy of data (nommu mmap) */ 103#define VM_MAPPED_COPY 0x01000000 /* T if mapped copy of data (nommu mmap) */
104#endif
101#define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it */ 105#define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it */
102#define VM_ALWAYSDUMP 0x04000000 /* Always include in core dumps */ 106#define VM_ALWAYSDUMP 0x04000000 /* Always include in core dumps */
103 107
@@ -1216,7 +1220,7 @@ static inline void vma_nonlinear_insert(struct vm_area_struct *vma,
1216 1220
1217/* mmap.c */ 1221/* mmap.c */
1218extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin); 1222extern int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin);
1219extern void vma_adjust(struct vm_area_struct *vma, unsigned long start, 1223extern int vma_adjust(struct vm_area_struct *vma, unsigned long start,
1220 unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert); 1224 unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert);
1221extern struct vm_area_struct *vma_merge(struct mm_struct *, 1225extern struct vm_area_struct *vma_merge(struct mm_struct *,
1222 struct vm_area_struct *prev, unsigned long addr, unsigned long end, 1226 struct vm_area_struct *prev, unsigned long addr, unsigned long end,
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 19549d7275ab..048b46270aa5 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -163,7 +163,8 @@ struct vm_area_struct {
163 * can only be in the i_mmap tree. An anonymous MAP_PRIVATE, stack 163 * can only be in the i_mmap tree. An anonymous MAP_PRIVATE, stack
164 * or brk vma (with NULL file) can only be in an anon_vma list. 164 * or brk vma (with NULL file) can only be in an anon_vma list.
165 */ 165 */
166 struct list_head anon_vma_node; /* Serialized by anon_vma->lock */ 166 struct list_head anon_vma_chain; /* Serialized by mmap_sem &
167 * page_table_lock */
167 struct anon_vma *anon_vma; /* Serialized by page_table_lock */ 168 struct anon_vma *anon_vma; /* Serialized by page_table_lock */
168 169
169 /* Function pointers to deal with this struct. */ 170 /* Function pointers to deal with this struct. */
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index b019ae64e2ab..62da2001d55c 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -37,7 +37,27 @@ struct anon_vma {
37 * is serialized by a system wide lock only visible to 37 * is serialized by a system wide lock only visible to
38 * mm_take_all_locks() (mm_all_locks_mutex). 38 * mm_take_all_locks() (mm_all_locks_mutex).
39 */ 39 */
40 struct list_head head; /* List of private "related" vmas */ 40 struct list_head head; /* Chain of private "related" vmas */
41};
42
43/*
44 * The copy-on-write semantics of fork mean that an anon_vma
45 * can become associated with multiple processes. Furthermore,
46 * each child process will have its own anon_vma, where new
47 * pages for that process are instantiated.
48 *
49 * This structure allows us to find the anon_vmas associated
50 * with a VMA, or the VMAs associated with an anon_vma.
51 * The "same_vma" list contains the anon_vma_chains linking
52 * all the anon_vmas associated with this VMA.
53 * The "same_anon_vma" list contains the anon_vma_chains
54 * which link all the VMAs associated with this anon_vma.
55 */
56struct anon_vma_chain {
57 struct vm_area_struct *vma;
58 struct anon_vma *anon_vma;
59 struct list_head same_vma; /* locked by mmap_sem & page_table_lock */
60 struct list_head same_anon_vma; /* locked by anon_vma->lock */
41}; 61};
42 62
43#ifdef CONFIG_MMU 63#ifdef CONFIG_MMU
@@ -89,12 +109,19 @@ static inline void anon_vma_unlock(struct vm_area_struct *vma)
89 */ 109 */
90void anon_vma_init(void); /* create anon_vma_cachep */ 110void anon_vma_init(void); /* create anon_vma_cachep */
91int anon_vma_prepare(struct vm_area_struct *); 111int anon_vma_prepare(struct vm_area_struct *);
92void __anon_vma_merge(struct vm_area_struct *, struct vm_area_struct *); 112void unlink_anon_vmas(struct vm_area_struct *);
93void anon_vma_unlink(struct vm_area_struct *); 113int anon_vma_clone(struct vm_area_struct *, struct vm_area_struct *);
94void anon_vma_link(struct vm_area_struct *); 114int anon_vma_fork(struct vm_area_struct *, struct vm_area_struct *);
95void __anon_vma_link(struct vm_area_struct *); 115void __anon_vma_link(struct vm_area_struct *);
96void anon_vma_free(struct anon_vma *); 116void anon_vma_free(struct anon_vma *);
97 117
118static inline void anon_vma_merge(struct vm_area_struct *vma,
119 struct vm_area_struct *next)
120{
121 VM_BUG_ON(vma->anon_vma != next->anon_vma);
122 unlink_anon_vmas(next);
123}
124
98/* 125/*
99 * rmap interfaces called when adding or removing pte of page 126 * rmap interfaces called when adding or removing pte of page
100 */ 127 */
diff --git a/kernel/fork.c b/kernel/fork.c
index 7616bcf107b9..bab7b254ad39 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -329,15 +329,17 @@ static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
329 if (!tmp) 329 if (!tmp)
330 goto fail_nomem; 330 goto fail_nomem;
331 *tmp = *mpnt; 331 *tmp = *mpnt;
332 INIT_LIST_HEAD(&tmp->anon_vma_chain);
332 pol = mpol_dup(vma_policy(mpnt)); 333 pol = mpol_dup(vma_policy(mpnt));
333 retval = PTR_ERR(pol); 334 retval = PTR_ERR(pol);
334 if (IS_ERR(pol)) 335 if (IS_ERR(pol))
335 goto fail_nomem_policy; 336 goto fail_nomem_policy;
336 vma_set_policy(tmp, pol); 337 vma_set_policy(tmp, pol);
338 if (anon_vma_fork(tmp, mpnt))
339 goto fail_nomem_anon_vma_fork;
337 tmp->vm_flags &= ~VM_LOCKED; 340 tmp->vm_flags &= ~VM_LOCKED;
338 tmp->vm_mm = mm; 341 tmp->vm_mm = mm;
339 tmp->vm_next = NULL; 342 tmp->vm_next = NULL;
340 anon_vma_link(tmp);
341 file = tmp->vm_file; 343 file = tmp->vm_file;
342 if (file) { 344 if (file) {
343 struct inode *inode = file->f_path.dentry->d_inode; 345 struct inode *inode = file->f_path.dentry->d_inode;
@@ -392,6 +394,8 @@ out:
392 flush_tlb_mm(oldmm); 394 flush_tlb_mm(oldmm);
393 up_write(&oldmm->mmap_sem); 395 up_write(&oldmm->mmap_sem);
394 return retval; 396 return retval;
397fail_nomem_anon_vma_fork:
398 mpol_put(pol);
395fail_nomem_policy: 399fail_nomem_policy:
396 kmem_cache_free(vm_area_cachep, tmp); 400 kmem_cache_free(vm_area_cachep, tmp);
397fail_nomem: 401fail_nomem:
diff --git a/mm/ksm.c b/mm/ksm.c
index 56a0da1f9979..a93f1b7f508c 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1563,10 +1563,12 @@ int page_referenced_ksm(struct page *page, struct mem_cgroup *memcg,
1563again: 1563again:
1564 hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) { 1564 hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
1565 struct anon_vma *anon_vma = rmap_item->anon_vma; 1565 struct anon_vma *anon_vma = rmap_item->anon_vma;
1566 struct anon_vma_chain *vmac;
1566 struct vm_area_struct *vma; 1567 struct vm_area_struct *vma;
1567 1568
1568 spin_lock(&anon_vma->lock); 1569 spin_lock(&anon_vma->lock);
1569 list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { 1570 list_for_each_entry(vmac, &anon_vma->head, same_anon_vma) {
1571 vma = vmac->vma;
1570 if (rmap_item->address < vma->vm_start || 1572 if (rmap_item->address < vma->vm_start ||
1571 rmap_item->address >= vma->vm_end) 1573 rmap_item->address >= vma->vm_end)
1572 continue; 1574 continue;
@@ -1614,10 +1616,12 @@ int try_to_unmap_ksm(struct page *page, enum ttu_flags flags)
1614again: 1616again:
1615 hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) { 1617 hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
1616 struct anon_vma *anon_vma = rmap_item->anon_vma; 1618 struct anon_vma *anon_vma = rmap_item->anon_vma;
1619 struct anon_vma_chain *vmac;
1617 struct vm_area_struct *vma; 1620 struct vm_area_struct *vma;
1618 1621
1619 spin_lock(&anon_vma->lock); 1622 spin_lock(&anon_vma->lock);
1620 list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { 1623 list_for_each_entry(vmac, &anon_vma->head, same_anon_vma) {
1624 vma = vmac->vma;
1621 if (rmap_item->address < vma->vm_start || 1625 if (rmap_item->address < vma->vm_start ||
1622 rmap_item->address >= vma->vm_end) 1626 rmap_item->address >= vma->vm_end)
1623 continue; 1627 continue;
@@ -1664,10 +1668,12 @@ int rmap_walk_ksm(struct page *page, int (*rmap_one)(struct page *,
1664again: 1668again:
1665 hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) { 1669 hlist_for_each_entry(rmap_item, hlist, &stable_node->hlist, hlist) {
1666 struct anon_vma *anon_vma = rmap_item->anon_vma; 1670 struct anon_vma *anon_vma = rmap_item->anon_vma;
1671 struct anon_vma_chain *vmac;
1667 struct vm_area_struct *vma; 1672 struct vm_area_struct *vma;
1668 1673
1669 spin_lock(&anon_vma->lock); 1674 spin_lock(&anon_vma->lock);
1670 list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { 1675 list_for_each_entry(vmac, &anon_vma->head, same_anon_vma) {
1676 vma = vmac->vma;
1671 if (rmap_item->address < vma->vm_start || 1677 if (rmap_item->address < vma->vm_start ||
1672 rmap_item->address >= vma->vm_end) 1678 rmap_item->address >= vma->vm_end)
1673 continue; 1679 continue;
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 17299fd4577c..d1f335162976 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -383,9 +383,12 @@ static void collect_procs_anon(struct page *page, struct list_head *to_kill,
383 if (av == NULL) /* Not actually mapped anymore */ 383 if (av == NULL) /* Not actually mapped anymore */
384 goto out; 384 goto out;
385 for_each_process (tsk) { 385 for_each_process (tsk) {
386 struct anon_vma_chain *vmac;
387
386 if (!task_early_kill(tsk)) 388 if (!task_early_kill(tsk))
387 continue; 389 continue;
388 list_for_each_entry (vma, &av->head, anon_vma_node) { 390 list_for_each_entry(vmac, &av->head, same_anon_vma) {
391 vma = vmac->vma;
389 if (!page_mapped_in_vma(page, vma)) 392 if (!page_mapped_in_vma(page, vma))
390 continue; 393 continue;
391 if (vma->vm_mm == tsk->mm) 394 if (vma->vm_mm == tsk->mm)
diff --git a/mm/memory.c b/mm/memory.c
index 77d9f840936b..dc785b438d70 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -374,7 +374,7 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
374 * Hide vma from rmap and truncate_pagecache before freeing 374 * Hide vma from rmap and truncate_pagecache before freeing
375 * pgtables 375 * pgtables
376 */ 376 */
377 anon_vma_unlink(vma); 377 unlink_anon_vmas(vma);
378 unlink_file_vma(vma); 378 unlink_file_vma(vma);
379 379
380 if (is_vm_hugetlb_page(vma)) { 380 if (is_vm_hugetlb_page(vma)) {
@@ -388,7 +388,7 @@ void free_pgtables(struct mmu_gather *tlb, struct vm_area_struct *vma,
388 && !is_vm_hugetlb_page(next)) { 388 && !is_vm_hugetlb_page(next)) {
389 vma = next; 389 vma = next;
390 next = vma->vm_next; 390 next = vma->vm_next;
391 anon_vma_unlink(vma); 391 unlink_anon_vmas(vma);
392 unlink_file_vma(vma); 392 unlink_file_vma(vma);
393 } 393 }
394 free_pgd_range(tlb, addr, vma->vm_end, 394 free_pgd_range(tlb, addr, vma->vm_end,
diff --git a/mm/mmap.c b/mm/mmap.c
index 31656147128e..6a0c15db7f60 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -437,7 +437,6 @@ __vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
437{ 437{
438 __vma_link_list(mm, vma, prev, rb_parent); 438 __vma_link_list(mm, vma, prev, rb_parent);
439 __vma_link_rb(mm, vma, rb_link, rb_parent); 439 __vma_link_rb(mm, vma, rb_link, rb_parent);
440 __anon_vma_link(vma);
441} 440}
442 441
443static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma, 442static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
@@ -499,7 +498,7 @@ __vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
499 * are necessary. The "insert" vma (if any) is to be inserted 498 * are necessary. The "insert" vma (if any) is to be inserted
500 * before we drop the necessary locks. 499 * before we drop the necessary locks.
501 */ 500 */
502void vma_adjust(struct vm_area_struct *vma, unsigned long start, 501int vma_adjust(struct vm_area_struct *vma, unsigned long start,
503 unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert) 502 unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert)
504{ 503{
505 struct mm_struct *mm = vma->vm_mm; 504 struct mm_struct *mm = vma->vm_mm;
@@ -542,6 +541,28 @@ again: remove_next = 1 + (end > next->vm_end);
542 } 541 }
543 } 542 }
544 543
544 /*
545 * When changing only vma->vm_end, we don't really need anon_vma lock.
546 */
547 if (vma->anon_vma && (insert || importer || start != vma->vm_start))
548 anon_vma = vma->anon_vma;
549 if (anon_vma) {
550 /*
551 * Easily overlooked: when mprotect shifts the boundary,
552 * make sure the expanding vma has anon_vma set if the
553 * shrinking vma had, to cover any anon pages imported.
554 */
555 if (importer && !importer->anon_vma) {
556 /* Block reverse map lookups until things are set up. */
557 importer->vm_flags |= VM_LOCK_RMAP;
558 if (anon_vma_clone(importer, vma)) {
559 importer->vm_flags &= ~VM_LOCK_RMAP;
560 return -ENOMEM;
561 }
562 importer->anon_vma = anon_vma;
563 }
564 }
565
545 if (file) { 566 if (file) {
546 mapping = file->f_mapping; 567 mapping = file->f_mapping;
547 if (!(vma->vm_flags & VM_NONLINEAR)) 568 if (!(vma->vm_flags & VM_NONLINEAR))
@@ -567,25 +588,6 @@ again: remove_next = 1 + (end > next->vm_end);
567 } 588 }
568 } 589 }
569 590
570 /*
571 * When changing only vma->vm_end, we don't really need
572 * anon_vma lock.
573 */
574 if (vma->anon_vma && (insert || importer || start != vma->vm_start))
575 anon_vma = vma->anon_vma;
576 if (anon_vma) {
577 spin_lock(&anon_vma->lock);
578 /*
579 * Easily overlooked: when mprotect shifts the boundary,
580 * make sure the expanding vma has anon_vma set if the
581 * shrinking vma had, to cover any anon pages imported.
582 */
583 if (importer && !importer->anon_vma) {
584 importer->anon_vma = anon_vma;
585 __anon_vma_link(importer);
586 }
587 }
588
589 if (root) { 591 if (root) {
590 flush_dcache_mmap_lock(mapping); 592 flush_dcache_mmap_lock(mapping);
591 vma_prio_tree_remove(vma, root); 593 vma_prio_tree_remove(vma, root);
@@ -616,8 +618,11 @@ again: remove_next = 1 + (end > next->vm_end);
616 __vma_unlink(mm, next, vma); 618 __vma_unlink(mm, next, vma);
617 if (file) 619 if (file)
618 __remove_shared_vm_struct(next, file, mapping); 620 __remove_shared_vm_struct(next, file, mapping);
619 if (next->anon_vma) 621 /*
620 __anon_vma_merge(vma, next); 622 * This VMA is now dead, no need for rmap to follow it.
623 * Call anon_vma_merge below, outside of i_mmap_lock.
624 */
625 next->vm_flags |= VM_LOCK_RMAP;
621 } else if (insert) { 626 } else if (insert) {
622 /* 627 /*
623 * split_vma has split insert from vma, and needs 628 * split_vma has split insert from vma, and needs
@@ -627,17 +632,25 @@ again: remove_next = 1 + (end > next->vm_end);
627 __insert_vm_struct(mm, insert); 632 __insert_vm_struct(mm, insert);
628 } 633 }
629 634
630 if (anon_vma)
631 spin_unlock(&anon_vma->lock);
632 if (mapping) 635 if (mapping)
633 spin_unlock(&mapping->i_mmap_lock); 636 spin_unlock(&mapping->i_mmap_lock);
634 637
638 /*
639 * The current VMA has been set up. It is now safe for the
640 * rmap code to get from the pages to the ptes.
641 */
642 if (anon_vma && importer)
643 importer->vm_flags &= ~VM_LOCK_RMAP;
644
635 if (remove_next) { 645 if (remove_next) {
636 if (file) { 646 if (file) {
637 fput(file); 647 fput(file);
638 if (next->vm_flags & VM_EXECUTABLE) 648 if (next->vm_flags & VM_EXECUTABLE)
639 removed_exe_file_vma(mm); 649 removed_exe_file_vma(mm);
640 } 650 }
651 /* Protected by mmap_sem and VM_LOCK_RMAP. */
652 if (next->anon_vma)
653 anon_vma_merge(vma, next);
641 mm->map_count--; 654 mm->map_count--;
642 mpol_put(vma_policy(next)); 655 mpol_put(vma_policy(next));
643 kmem_cache_free(vm_area_cachep, next); 656 kmem_cache_free(vm_area_cachep, next);
@@ -653,6 +666,8 @@ again: remove_next = 1 + (end > next->vm_end);
653 } 666 }
654 667
655 validate_mm(mm); 668 validate_mm(mm);
669
670 return 0;
656} 671}
657 672
658/* 673/*
@@ -759,6 +774,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
759{ 774{
760 pgoff_t pglen = (end - addr) >> PAGE_SHIFT; 775 pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
761 struct vm_area_struct *area, *next; 776 struct vm_area_struct *area, *next;
777 int err;
762 778
763 /* 779 /*
764 * We later require that vma->vm_flags == vm_flags, 780 * We later require that vma->vm_flags == vm_flags,
@@ -792,11 +808,13 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
792 is_mergeable_anon_vma(prev->anon_vma, 808 is_mergeable_anon_vma(prev->anon_vma,
793 next->anon_vma)) { 809 next->anon_vma)) {
794 /* cases 1, 6 */ 810 /* cases 1, 6 */
795 vma_adjust(prev, prev->vm_start, 811 err = vma_adjust(prev, prev->vm_start,
796 next->vm_end, prev->vm_pgoff, NULL); 812 next->vm_end, prev->vm_pgoff, NULL);
797 } else /* cases 2, 5, 7 */ 813 } else /* cases 2, 5, 7 */
798 vma_adjust(prev, prev->vm_start, 814 err = vma_adjust(prev, prev->vm_start,
799 end, prev->vm_pgoff, NULL); 815 end, prev->vm_pgoff, NULL);
816 if (err)
817 return NULL;
800 return prev; 818 return prev;
801 } 819 }
802 820
@@ -808,11 +826,13 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
808 can_vma_merge_before(next, vm_flags, 826 can_vma_merge_before(next, vm_flags,
809 anon_vma, file, pgoff+pglen)) { 827 anon_vma, file, pgoff+pglen)) {
810 if (prev && addr < prev->vm_end) /* case 4 */ 828 if (prev && addr < prev->vm_end) /* case 4 */
811 vma_adjust(prev, prev->vm_start, 829 err = vma_adjust(prev, prev->vm_start,
812 addr, prev->vm_pgoff, NULL); 830 addr, prev->vm_pgoff, NULL);
813 else /* cases 3, 8 */ 831 else /* cases 3, 8 */
814 vma_adjust(area, addr, next->vm_end, 832 err = vma_adjust(area, addr, next->vm_end,
815 next->vm_pgoff - pglen, NULL); 833 next->vm_pgoff - pglen, NULL);
834 if (err)
835 return NULL;
816 return area; 836 return area;
817 } 837 }
818 838
@@ -1205,6 +1225,7 @@ munmap_back:
1205 vma->vm_flags = vm_flags; 1225 vma->vm_flags = vm_flags;
1206 vma->vm_page_prot = vm_get_page_prot(vm_flags); 1226 vma->vm_page_prot = vm_get_page_prot(vm_flags);
1207 vma->vm_pgoff = pgoff; 1227 vma->vm_pgoff = pgoff;
1228 INIT_LIST_HEAD(&vma->anon_vma_chain);
1208 1229
1209 if (file) { 1230 if (file) {
1210 error = -EINVAL; 1231 error = -EINVAL;
@@ -1865,6 +1886,7 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
1865{ 1886{
1866 struct mempolicy *pol; 1887 struct mempolicy *pol;
1867 struct vm_area_struct *new; 1888 struct vm_area_struct *new;
1889 int err = -ENOMEM;
1868 1890
1869 if (is_vm_hugetlb_page(vma) && (addr & 1891 if (is_vm_hugetlb_page(vma) && (addr &
1870 ~(huge_page_mask(hstate_vma(vma))))) 1892 ~(huge_page_mask(hstate_vma(vma)))))
@@ -1872,11 +1894,13 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
1872 1894
1873 new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); 1895 new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
1874 if (!new) 1896 if (!new)
1875 return -ENOMEM; 1897 goto out_err;
1876 1898
1877 /* most fields are the same, copy all, and then fixup */ 1899 /* most fields are the same, copy all, and then fixup */
1878 *new = *vma; 1900 *new = *vma;
1879 1901
1902 INIT_LIST_HEAD(&new->anon_vma_chain);
1903
1880 if (new_below) 1904 if (new_below)
1881 new->vm_end = addr; 1905 new->vm_end = addr;
1882 else { 1906 else {
@@ -1886,11 +1910,14 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
1886 1910
1887 pol = mpol_dup(vma_policy(vma)); 1911 pol = mpol_dup(vma_policy(vma));
1888 if (IS_ERR(pol)) { 1912 if (IS_ERR(pol)) {
1889 kmem_cache_free(vm_area_cachep, new); 1913 err = PTR_ERR(pol);
1890 return PTR_ERR(pol); 1914 goto out_free_vma;
1891 } 1915 }
1892 vma_set_policy(new, pol); 1916 vma_set_policy(new, pol);
1893 1917
1918 if (anon_vma_clone(new, vma))
1919 goto out_free_mpol;
1920
1894 if (new->vm_file) { 1921 if (new->vm_file) {
1895 get_file(new->vm_file); 1922 get_file(new->vm_file);
1896 if (vma->vm_flags & VM_EXECUTABLE) 1923 if (vma->vm_flags & VM_EXECUTABLE)
@@ -1901,12 +1928,28 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
1901 new->vm_ops->open(new); 1928 new->vm_ops->open(new);
1902 1929
1903 if (new_below) 1930 if (new_below)
1904 vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff + 1931 err = vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
1905 ((addr - new->vm_start) >> PAGE_SHIFT), new); 1932 ((addr - new->vm_start) >> PAGE_SHIFT), new);
1906 else 1933 else
1907 vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new); 1934 err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
1908 1935
1909 return 0; 1936 /* Success. */
1937 if (!err)
1938 return 0;
1939
1940 /* Clean everything up if vma_adjust failed. */
1941 new->vm_ops->close(new);
1942 if (new->vm_file) {
1943 if (vma->vm_flags & VM_EXECUTABLE)
1944 removed_exe_file_vma(mm);
1945 fput(new->vm_file);
1946 }
1947 out_free_mpol:
1948 mpol_put(pol);
1949 out_free_vma:
1950 kmem_cache_free(vm_area_cachep, new);
1951 out_err:
1952 return err;
1910} 1953}
1911 1954
1912/* 1955/*
@@ -2116,6 +2159,7 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
2116 return -ENOMEM; 2159 return -ENOMEM;
2117 } 2160 }
2118 2161
2162 INIT_LIST_HEAD(&vma->anon_vma_chain);
2119 vma->vm_mm = mm; 2163 vma->vm_mm = mm;
2120 vma->vm_start = addr; 2164 vma->vm_start = addr;
2121 vma->vm_end = addr + len; 2165 vma->vm_end = addr + len;
@@ -2252,10 +2296,11 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
2252 if (new_vma) { 2296 if (new_vma) {
2253 *new_vma = *vma; 2297 *new_vma = *vma;
2254 pol = mpol_dup(vma_policy(vma)); 2298 pol = mpol_dup(vma_policy(vma));
2255 if (IS_ERR(pol)) { 2299 if (IS_ERR(pol))
2256 kmem_cache_free(vm_area_cachep, new_vma); 2300 goto out_free_vma;
2257 return NULL; 2301 INIT_LIST_HEAD(&new_vma->anon_vma_chain);
2258 } 2302 if (anon_vma_clone(new_vma, vma))
2303 goto out_free_mempol;
2259 vma_set_policy(new_vma, pol); 2304 vma_set_policy(new_vma, pol);
2260 new_vma->vm_start = addr; 2305 new_vma->vm_start = addr;
2261 new_vma->vm_end = addr + len; 2306 new_vma->vm_end = addr + len;
@@ -2271,6 +2316,12 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
2271 } 2316 }
2272 } 2317 }
2273 return new_vma; 2318 return new_vma;
2319
2320 out_free_mempol:
2321 mpol_put(pol);
2322 out_free_vma:
2323 kmem_cache_free(vm_area_cachep, new_vma);
2324 return NULL;
2274} 2325}
2275 2326
2276/* 2327/*
@@ -2348,6 +2399,7 @@ int install_special_mapping(struct mm_struct *mm,
2348 if (unlikely(vma == NULL)) 2399 if (unlikely(vma == NULL))
2349 return -ENOMEM; 2400 return -ENOMEM;
2350 2401
2402 INIT_LIST_HEAD(&vma->anon_vma_chain);
2351 vma->vm_mm = mm; 2403 vma->vm_mm = mm;
2352 vma->vm_start = addr; 2404 vma->vm_start = addr;
2353 vma->vm_end = addr + len; 2405 vma->vm_end = addr + len;
@@ -2448,6 +2500,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
2448int mm_take_all_locks(struct mm_struct *mm) 2500int mm_take_all_locks(struct mm_struct *mm)
2449{ 2501{
2450 struct vm_area_struct *vma; 2502 struct vm_area_struct *vma;
2503 struct anon_vma_chain *avc;
2451 int ret = -EINTR; 2504 int ret = -EINTR;
2452 2505
2453 BUG_ON(down_read_trylock(&mm->mmap_sem)); 2506 BUG_ON(down_read_trylock(&mm->mmap_sem));
@@ -2465,7 +2518,8 @@ int mm_take_all_locks(struct mm_struct *mm)
2465 if (signal_pending(current)) 2518 if (signal_pending(current))
2466 goto out_unlock; 2519 goto out_unlock;
2467 if (vma->anon_vma) 2520 if (vma->anon_vma)
2468 vm_lock_anon_vma(mm, vma->anon_vma); 2521 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
2522 vm_lock_anon_vma(mm, avc->anon_vma);
2469 } 2523 }
2470 2524
2471 ret = 0; 2525 ret = 0;
@@ -2520,13 +2574,15 @@ static void vm_unlock_mapping(struct address_space *mapping)
2520void mm_drop_all_locks(struct mm_struct *mm) 2574void mm_drop_all_locks(struct mm_struct *mm)
2521{ 2575{
2522 struct vm_area_struct *vma; 2576 struct vm_area_struct *vma;
2577 struct anon_vma_chain *avc;
2523 2578
2524 BUG_ON(down_read_trylock(&mm->mmap_sem)); 2579 BUG_ON(down_read_trylock(&mm->mmap_sem));
2525 BUG_ON(!mutex_is_locked(&mm_all_locks_mutex)); 2580 BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
2526 2581
2527 for (vma = mm->mmap; vma; vma = vma->vm_next) { 2582 for (vma = mm->mmap; vma; vma = vma->vm_next) {
2528 if (vma->anon_vma) 2583 if (vma->anon_vma)
2529 vm_unlock_anon_vma(vma->anon_vma); 2584 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
2585 vm_unlock_anon_vma(avc->anon_vma);
2530 if (vma->vm_file && vma->vm_file->f_mapping) 2586 if (vma->vm_file && vma->vm_file->f_mapping)
2531 vm_unlock_mapping(vma->vm_file->f_mapping); 2587 vm_unlock_mapping(vma->vm_file->f_mapping);
2532 } 2588 }
diff --git a/mm/mremap.c b/mm/mremap.c
index 4c4c803453f3..e9c75efce609 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -460,8 +460,11 @@ unsigned long do_mremap(unsigned long addr,
460 if (vma_expandable(vma, new_len - old_len)) { 460 if (vma_expandable(vma, new_len - old_len)) {
461 int pages = (new_len - old_len) >> PAGE_SHIFT; 461 int pages = (new_len - old_len) >> PAGE_SHIFT;
462 462
463 vma_adjust(vma, vma->vm_start, 463 if (vma_adjust(vma, vma->vm_start, addr + new_len,
464 addr + new_len, vma->vm_pgoff, NULL); 464 vma->vm_pgoff, NULL)) {
465 ret = -ENOMEM;
466 goto out;
467 }
465 468
466 mm->total_vm += pages; 469 mm->total_vm += pages;
467 vm_stat_account(mm, vma->vm_flags, vma->vm_file, pages); 470 vm_stat_account(mm, vma->vm_flags, vma->vm_file, pages);
diff --git a/mm/nommu.c b/mm/nommu.c
index 48a2ecfaf059..55727a74af98 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1209,7 +1209,7 @@ unsigned long do_mmap_pgoff(struct file *file,
1209 region->vm_flags = vm_flags; 1209 region->vm_flags = vm_flags;
1210 region->vm_pgoff = pgoff; 1210 region->vm_pgoff = pgoff;
1211 1211
1212 INIT_LIST_HEAD(&vma->anon_vma_node); 1212 INIT_LIST_HEAD(&vma->anon_vma_chain);
1213 vma->vm_flags = vm_flags; 1213 vma->vm_flags = vm_flags;
1214 vma->vm_pgoff = pgoff; 1214 vma->vm_pgoff = pgoff;
1215 1215
diff --git a/mm/rmap.c b/mm/rmap.c
index 5cb47111f79e..be34094e4595 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -62,6 +62,7 @@
62#include "internal.h" 62#include "internal.h"
63 63
64static struct kmem_cache *anon_vma_cachep; 64static struct kmem_cache *anon_vma_cachep;
65static struct kmem_cache *anon_vma_chain_cachep;
65 66
66static inline struct anon_vma *anon_vma_alloc(void) 67static inline struct anon_vma *anon_vma_alloc(void)
67{ 68{
@@ -73,6 +74,16 @@ void anon_vma_free(struct anon_vma *anon_vma)
73 kmem_cache_free(anon_vma_cachep, anon_vma); 74 kmem_cache_free(anon_vma_cachep, anon_vma);
74} 75}
75 76
77static inline struct anon_vma_chain *anon_vma_chain_alloc(void)
78{
79 return kmem_cache_alloc(anon_vma_chain_cachep, GFP_KERNEL);
80}
81
82void anon_vma_chain_free(struct anon_vma_chain *anon_vma_chain)
83{
84 kmem_cache_free(anon_vma_chain_cachep, anon_vma_chain);
85}
86
76/** 87/**
77 * anon_vma_prepare - attach an anon_vma to a memory region 88 * anon_vma_prepare - attach an anon_vma to a memory region
78 * @vma: the memory region in question 89 * @vma: the memory region in question
@@ -103,18 +114,23 @@ void anon_vma_free(struct anon_vma *anon_vma)
103int anon_vma_prepare(struct vm_area_struct *vma) 114int anon_vma_prepare(struct vm_area_struct *vma)
104{ 115{
105 struct anon_vma *anon_vma = vma->anon_vma; 116 struct anon_vma *anon_vma = vma->anon_vma;
117 struct anon_vma_chain *avc;
106 118
107 might_sleep(); 119 might_sleep();
108 if (unlikely(!anon_vma)) { 120 if (unlikely(!anon_vma)) {
109 struct mm_struct *mm = vma->vm_mm; 121 struct mm_struct *mm = vma->vm_mm;
110 struct anon_vma *allocated; 122 struct anon_vma *allocated;
111 123
124 avc = anon_vma_chain_alloc();
125 if (!avc)
126 goto out_enomem;
127
112 anon_vma = find_mergeable_anon_vma(vma); 128 anon_vma = find_mergeable_anon_vma(vma);
113 allocated = NULL; 129 allocated = NULL;
114 if (!anon_vma) { 130 if (!anon_vma) {
115 anon_vma = anon_vma_alloc(); 131 anon_vma = anon_vma_alloc();
116 if (unlikely(!anon_vma)) 132 if (unlikely(!anon_vma))
117 return -ENOMEM; 133 goto out_enomem_free_avc;
118 allocated = anon_vma; 134 allocated = anon_vma;
119 } 135 }
120 spin_lock(&anon_vma->lock); 136 spin_lock(&anon_vma->lock);
@@ -123,53 +139,113 @@ int anon_vma_prepare(struct vm_area_struct *vma)
123 spin_lock(&mm->page_table_lock); 139 spin_lock(&mm->page_table_lock);
124 if (likely(!vma->anon_vma)) { 140 if (likely(!vma->anon_vma)) {
125 vma->anon_vma = anon_vma; 141 vma->anon_vma = anon_vma;
126 list_add_tail(&vma->anon_vma_node, &anon_vma->head); 142 avc->anon_vma = anon_vma;
143 avc->vma = vma;
144 list_add(&avc->same_vma, &vma->anon_vma_chain);
145 list_add(&avc->same_anon_vma, &anon_vma->head);
127 allocated = NULL; 146 allocated = NULL;
128 } 147 }
129 spin_unlock(&mm->page_table_lock); 148 spin_unlock(&mm->page_table_lock);
130 149
131 spin_unlock(&anon_vma->lock); 150 spin_unlock(&anon_vma->lock);
132 if (unlikely(allocated)) 151 if (unlikely(allocated)) {
133 anon_vma_free(allocated); 152 anon_vma_free(allocated);
153 anon_vma_chain_free(avc);
154 }
134 } 155 }
135 return 0; 156 return 0;
157
158 out_enomem_free_avc:
159 anon_vma_chain_free(avc);
160 out_enomem:
161 return -ENOMEM;
136} 162}
137 163
138void __anon_vma_merge(struct vm_area_struct *vma, struct vm_area_struct *next) 164static void anon_vma_chain_link(struct vm_area_struct *vma,
165 struct anon_vma_chain *avc,
166 struct anon_vma *anon_vma)
139{ 167{
140 BUG_ON(vma->anon_vma != next->anon_vma); 168 avc->vma = vma;
141 list_del(&next->anon_vma_node); 169 avc->anon_vma = anon_vma;
170 list_add(&avc->same_vma, &vma->anon_vma_chain);
171
172 spin_lock(&anon_vma->lock);
173 list_add_tail(&avc->same_anon_vma, &anon_vma->head);
174 spin_unlock(&anon_vma->lock);
142} 175}
143 176
144void __anon_vma_link(struct vm_area_struct *vma) 177/*
178 * Attach the anon_vmas from src to dst.
179 * Returns 0 on success, -ENOMEM on failure.
180 */
181int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
145{ 182{
146 struct anon_vma *anon_vma = vma->anon_vma; 183 struct anon_vma_chain *avc, *pavc;
184
185 list_for_each_entry(pavc, &src->anon_vma_chain, same_vma) {
186 avc = anon_vma_chain_alloc();
187 if (!avc)
188 goto enomem_failure;
189 anon_vma_chain_link(dst, avc, pavc->anon_vma);
190 }
191 return 0;
147 192
148 if (anon_vma) 193 enomem_failure:
149 list_add_tail(&vma->anon_vma_node, &anon_vma->head); 194 unlink_anon_vmas(dst);
195 return -ENOMEM;
150} 196}
151 197
152void anon_vma_link(struct vm_area_struct *vma) 198/*
199 * Attach vma to its own anon_vma, as well as to the anon_vmas that
200 * the corresponding VMA in the parent process is attached to.
201 * Returns 0 on success, non-zero on failure.
202 */
203int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
153{ 204{
154 struct anon_vma *anon_vma = vma->anon_vma; 205 struct anon_vma_chain *avc;
206 struct anon_vma *anon_vma;
155 207
156 if (anon_vma) { 208 /* Don't bother if the parent process has no anon_vma here. */
157 spin_lock(&anon_vma->lock); 209 if (!pvma->anon_vma)
158 list_add_tail(&vma->anon_vma_node, &anon_vma->head); 210 return 0;
159 spin_unlock(&anon_vma->lock); 211
160 } 212 /*
213 * First, attach the new VMA to the parent VMA's anon_vmas,
214 * so rmap can find non-COWed pages in child processes.
215 */
216 if (anon_vma_clone(vma, pvma))
217 return -ENOMEM;
218
219 /* Then add our own anon_vma. */
220 anon_vma = anon_vma_alloc();
221 if (!anon_vma)
222 goto out_error;
223 avc = anon_vma_chain_alloc();
224 if (!avc)
225 goto out_error_free_anon_vma;
226 anon_vma_chain_link(vma, avc, anon_vma);
227 /* Mark this anon_vma as the one where our new (COWed) pages go. */
228 vma->anon_vma = anon_vma;
229
230 return 0;
231
232 out_error_free_anon_vma:
233 anon_vma_free(anon_vma);
234 out_error:
235 return -ENOMEM;
161} 236}
162 237
163void anon_vma_unlink(struct vm_area_struct *vma) 238static void anon_vma_unlink(struct anon_vma_chain *anon_vma_chain)
164{ 239{
165 struct anon_vma *anon_vma = vma->anon_vma; 240 struct anon_vma *anon_vma = anon_vma_chain->anon_vma;
166 int empty; 241 int empty;
167 242
243 /* If anon_vma_fork fails, we can get an empty anon_vma_chain. */
168 if (!anon_vma) 244 if (!anon_vma)
169 return; 245 return;
170 246
171 spin_lock(&anon_vma->lock); 247 spin_lock(&anon_vma->lock);
172 list_del(&vma->anon_vma_node); 248 list_del(&anon_vma_chain->same_anon_vma);
173 249
174 /* We must garbage collect the anon_vma if it's empty */ 250 /* We must garbage collect the anon_vma if it's empty */
175 empty = list_empty(&anon_vma->head) && !ksm_refcount(anon_vma); 251 empty = list_empty(&anon_vma->head) && !ksm_refcount(anon_vma);
@@ -179,6 +255,18 @@ void anon_vma_unlink(struct vm_area_struct *vma)
179 anon_vma_free(anon_vma); 255 anon_vma_free(anon_vma);
180} 256}
181 257
258void unlink_anon_vmas(struct vm_area_struct *vma)
259{
260 struct anon_vma_chain *avc, *next;
261
262 /* Unlink each anon_vma chained to the VMA. */
263 list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
264 anon_vma_unlink(avc);
265 list_del(&avc->same_vma);
266 anon_vma_chain_free(avc);
267 }
268}
269
182static void anon_vma_ctor(void *data) 270static void anon_vma_ctor(void *data)
183{ 271{
184 struct anon_vma *anon_vma = data; 272 struct anon_vma *anon_vma = data;
@@ -192,6 +280,7 @@ void __init anon_vma_init(void)
192{ 280{
193 anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma), 281 anon_vma_cachep = kmem_cache_create("anon_vma", sizeof(struct anon_vma),
194 0, SLAB_DESTROY_BY_RCU|SLAB_PANIC, anon_vma_ctor); 282 0, SLAB_DESTROY_BY_RCU|SLAB_PANIC, anon_vma_ctor);
283 anon_vma_chain_cachep = KMEM_CACHE(anon_vma_chain, SLAB_PANIC);
195} 284}
196 285
197/* 286/*
@@ -240,6 +329,18 @@ vma_address(struct page *page, struct vm_area_struct *vma)
240 /* page should be within @vma mapping range */ 329 /* page should be within @vma mapping range */
241 return -EFAULT; 330 return -EFAULT;
242 } 331 }
332 if (unlikely(vma->vm_flags & VM_LOCK_RMAP)) {
333 /*
334 * This VMA is being unlinked or is not yet linked into the
335 * VMA tree. Do not try to follow this rmap. This race
336 * condition can result in page_referenced() ignoring a
337 * reference or in try_to_unmap() failing to unmap a page.
338 * The VMA cannot be freed under us because we hold the
339 * anon_vma->lock, which the munmap code takes while
340 * unlinking the anon_vmas from the VMA.
341 */
342 return -EFAULT;
343 }
243 return address; 344 return address;
244} 345}
245 346
@@ -396,7 +497,7 @@ static int page_referenced_anon(struct page *page,
396{ 497{
397 unsigned int mapcount; 498 unsigned int mapcount;
398 struct anon_vma *anon_vma; 499 struct anon_vma *anon_vma;
399 struct vm_area_struct *vma; 500 struct anon_vma_chain *avc;
400 int referenced = 0; 501 int referenced = 0;
401 502
402 anon_vma = page_lock_anon_vma(page); 503 anon_vma = page_lock_anon_vma(page);
@@ -404,7 +505,8 @@ static int page_referenced_anon(struct page *page,
404 return referenced; 505 return referenced;
405 506
406 mapcount = page_mapcount(page); 507 mapcount = page_mapcount(page);
407 list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { 508 list_for_each_entry(avc, &anon_vma->head, same_anon_vma) {
509 struct vm_area_struct *vma = avc->vma;
408 unsigned long address = vma_address(page, vma); 510 unsigned long address = vma_address(page, vma);
409 if (address == -EFAULT) 511 if (address == -EFAULT)
410 continue; 512 continue;
@@ -1025,14 +1127,15 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
1025static int try_to_unmap_anon(struct page *page, enum ttu_flags flags) 1127static int try_to_unmap_anon(struct page *page, enum ttu_flags flags)
1026{ 1128{
1027 struct anon_vma *anon_vma; 1129 struct anon_vma *anon_vma;
1028 struct vm_area_struct *vma; 1130 struct anon_vma_chain *avc;
1029 int ret = SWAP_AGAIN; 1131 int ret = SWAP_AGAIN;
1030 1132
1031 anon_vma = page_lock_anon_vma(page); 1133 anon_vma = page_lock_anon_vma(page);
1032 if (!anon_vma) 1134 if (!anon_vma)
1033 return ret; 1135 return ret;
1034 1136
1035 list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { 1137 list_for_each_entry(avc, &anon_vma->head, same_anon_vma) {
1138 struct vm_area_struct *vma = avc->vma;
1036 unsigned long address = vma_address(page, vma); 1139 unsigned long address = vma_address(page, vma);
1037 if (address == -EFAULT) 1140 if (address == -EFAULT)
1038 continue; 1141 continue;
@@ -1223,7 +1326,7 @@ static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *,
1223 struct vm_area_struct *, unsigned long, void *), void *arg) 1326 struct vm_area_struct *, unsigned long, void *), void *arg)
1224{ 1327{
1225 struct anon_vma *anon_vma; 1328 struct anon_vma *anon_vma;
1226 struct vm_area_struct *vma; 1329 struct anon_vma_chain *avc;
1227 int ret = SWAP_AGAIN; 1330 int ret = SWAP_AGAIN;
1228 1331
1229 /* 1332 /*
@@ -1238,7 +1341,8 @@ static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *,
1238 if (!anon_vma) 1341 if (!anon_vma)
1239 return ret; 1342 return ret;
1240 spin_lock(&anon_vma->lock); 1343 spin_lock(&anon_vma->lock);
1241 list_for_each_entry(vma, &anon_vma->head, anon_vma_node) { 1344 list_for_each_entry(avc, &anon_vma->head, same_anon_vma) {
1345 struct vm_area_struct *vma = avc->vma;
1242 unsigned long address = vma_address(page, vma); 1346 unsigned long address = vma_address(page, vma);
1243 if (address == -EFAULT) 1347 if (address == -EFAULT)
1244 continue; 1348 continue;