aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-04-10 13:36:19 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-04-12 20:53:59 -0400
commitd0e9fe1758f222f13ec893f856552d81a10d266d (patch)
tree93e55a2e26c2b2f40b6d9142515fd14de8eb6647
parent0eddb519b9127c73d53db4bf3ec1d45b13f844d1 (diff)
Simplify and comment on anon_vma re-use for anon_vma_prepare()
This changes the anon_vma reuse case to require that we only reuse simple anon_vma's - ie the case when the vma only has a single anon_vma associated with it. This means that a reuse of an anon_vma from an adjacent vma will always guarantee that both vma's are associated not only with the same anon_vma, they will also have the same anon_vma chain (of just a single entry in this case). And since anon_vma re-use was the only case where the same anon_vma might be associated with different chains of anon_vma's, we now have the case that every vma that shares the same anon_vma will always also have the same chain. That makes it much easier to think about merging vma's that share the same anon_vma's: you can always just drop the other anon_vma chain in anon_vma_merge() since you know that they are always identical. This also splits up the function to validate the anon_vma re-use, and adds a lot of commentary about the possible races. Reviewed-by: Rik van Riel <riel@redhat.com> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Tested-by: Borislav Petkov <bp@alien8.de> [ "That didn't fix it" ] Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/mmap.c86
1 files changed, 62 insertions, 24 deletions
diff --git a/mm/mmap.c b/mm/mmap.c
index 75557c639ad4..acb023e2d35a 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -825,6 +825,61 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
825} 825}
826 826
827/* 827/*
828 * Rough compatbility check to quickly see if it's even worth looking
829 * at sharing an anon_vma.
830 *
831 * They need to have the same vm_file, and the flags can only differ
832 * in things that mprotect may change.
833 *
834 * NOTE! The fact that we share an anon_vma doesn't _have_ to mean that
835 * we can merge the two vma's. For example, we refuse to merge a vma if
836 * there is a vm_ops->close() function, because that indicates that the
837 * driver is doing some kind of reference counting. But that doesn't
838 * really matter for the anon_vma sharing case.
839 */
840static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *b)
841{
842 return a->vm_end == b->vm_start &&
843 mpol_equal(vma_policy(a), vma_policy(b)) &&
844 a->vm_file == b->vm_file &&
845 !((a->vm_flags ^ b->vm_flags) & ~(VM_READ|VM_WRITE|VM_EXEC)) &&
846 b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT);
847}
848
849/*
850 * Do some basic sanity checking to see if we can re-use the anon_vma
851 * from 'old'. The 'a'/'b' vma's are in VM order - one of them will be
852 * the same as 'old', the other will be the new one that is trying
853 * to share the anon_vma.
854 *
855 * NOTE! This runs with mm_sem held for reading, so it is possible that
856 * the anon_vma of 'old' is concurrently in the process of being set up
857 * by another page fault trying to merge _that_. But that's ok: if it
858 * is being set up, that automatically means that it will be a singleton
859 * acceptable for merging, so we can do all of this optimistically. But
860 * we do that ACCESS_ONCE() to make sure that we never re-load the pointer.
861 *
862 * IOW: that the "list_is_singular()" test on the anon_vma_chain only
863 * matters for the 'stable anon_vma' case (ie the thing we want to avoid
864 * is to return an anon_vma that is "complex" due to having gone through
865 * a fork).
866 *
867 * We also make sure that the two vma's are compatible (adjacent,
868 * and with the same memory policies). That's all stable, even with just
869 * a read lock on the mm_sem.
870 */
871static struct anon_vma *reusable_anon_vma(struct vm_area_struct *old, struct vm_area_struct *a, struct vm_area_struct *b)
872{
873 if (anon_vma_compatible(a, b)) {
874 struct anon_vma *anon_vma = ACCESS_ONCE(old->anon_vma);
875
876 if (anon_vma && list_is_singular(&old->anon_vma_chain))
877 return anon_vma;
878 }
879 return NULL;
880}
881
882/*
828 * find_mergeable_anon_vma is used by anon_vma_prepare, to check 883 * find_mergeable_anon_vma is used by anon_vma_prepare, to check
829 * neighbouring vmas for a suitable anon_vma, before it goes off 884 * neighbouring vmas for a suitable anon_vma, before it goes off
830 * to allocate a new anon_vma. It checks because a repetitive 885 * to allocate a new anon_vma. It checks because a repetitive
@@ -834,28 +889,16 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
834 */ 889 */
835struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma) 890struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
836{ 891{
892 struct anon_vma *anon_vma;
837 struct vm_area_struct *near; 893 struct vm_area_struct *near;
838 unsigned long vm_flags;
839 894
840 near = vma->vm_next; 895 near = vma->vm_next;
841 if (!near) 896 if (!near)
842 goto try_prev; 897 goto try_prev;
843 898
844 /* 899 anon_vma = reusable_anon_vma(near, vma, near);
845 * Since only mprotect tries to remerge vmas, match flags 900 if (anon_vma)
846 * which might be mprotected into each other later on. 901 return anon_vma;
847 * Neither mlock nor madvise tries to remerge at present,
848 * so leave their flags as obstructing a merge.
849 */
850 vm_flags = vma->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC);
851 vm_flags |= near->vm_flags & (VM_READ|VM_WRITE|VM_EXEC);
852
853 if (near->anon_vma && vma->vm_end == near->vm_start &&
854 mpol_equal(vma_policy(vma), vma_policy(near)) &&
855 can_vma_merge_before(near, vm_flags,
856 NULL, vma->vm_file, vma->vm_pgoff +
857 ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT)))
858 return near->anon_vma;
859try_prev: 902try_prev:
860 /* 903 /*
861 * It is potentially slow to have to call find_vma_prev here. 904 * It is potentially slow to have to call find_vma_prev here.
@@ -868,14 +911,9 @@ try_prev:
868 if (!near) 911 if (!near)
869 goto none; 912 goto none;
870 913
871 vm_flags = vma->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC); 914 anon_vma = reusable_anon_vma(near, near, vma);
872 vm_flags |= near->vm_flags & (VM_READ|VM_WRITE|VM_EXEC); 915 if (anon_vma)
873 916 return anon_vma;
874 if (near->anon_vma && near->vm_end == vma->vm_start &&
875 mpol_equal(vma_policy(near), vma_policy(vma)) &&
876 can_vma_merge_after(near, vm_flags,
877 NULL, vma->vm_file, vma->vm_pgoff))
878 return near->anon_vma;
879none: 917none:
880 /* 918 /*
881 * There's no absolute need to look only at touching neighbours: 919 * There's no absolute need to look only at touching neighbours: