diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-04-10 13:36:19 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-04-12 20:53:59 -0400 |
commit | d0e9fe1758f222f13ec893f856552d81a10d266d (patch) | |
tree | 93e55a2e26c2b2f40b6d9142515fd14de8eb6647 /mm/mmap.c | |
parent | 0eddb519b9127c73d53db4bf3ec1d45b13f844d1 (diff) |
Simplify and comment on anon_vma re-use for anon_vma_prepare()
This changes the anon_vma reuse case to require that we only reuse
simple anon_vma's - ie the case when the vma only has a single anon_vma
associated with it.
This means that a reuse of an anon_vma from an adjacent vma will always
guarantee that both vma's are associated not only with the same
anon_vma, they will also have the same anon_vma chain (of just a single
entry in this case).
And since anon_vma re-use was the only case where the same anon_vma
might be associated with different chains of anon_vma's, we now have the
case that every vma that shares the same anon_vma will always also have
the same chain. That makes it much easier to think about merging vma's
that share the same anon_vma's: you can always just drop the other
anon_vma chain in anon_vma_merge() since you know that they are always
identical.
This also splits up the function to validate the anon_vma re-use, and
adds a lot of commentary about the possible races.
Reviewed-by: Rik van Riel <riel@redhat.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Tested-by: Borislav Petkov <bp@alien8.de> [ "That didn't fix it" ]
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/mmap.c')
-rw-r--r-- | mm/mmap.c | 86 |
1 files changed, 62 insertions, 24 deletions
@@ -825,6 +825,61 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, | |||
825 | } | 825 | } |
826 | 826 | ||
827 | /* | 827 | /* |
828 | * Rough compatbility check to quickly see if it's even worth looking | ||
829 | * at sharing an anon_vma. | ||
830 | * | ||
831 | * They need to have the same vm_file, and the flags can only differ | ||
832 | * in things that mprotect may change. | ||
833 | * | ||
834 | * NOTE! The fact that we share an anon_vma doesn't _have_ to mean that | ||
835 | * we can merge the two vma's. For example, we refuse to merge a vma if | ||
836 | * there is a vm_ops->close() function, because that indicates that the | ||
837 | * driver is doing some kind of reference counting. But that doesn't | ||
838 | * really matter for the anon_vma sharing case. | ||
839 | */ | ||
840 | static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *b) | ||
841 | { | ||
842 | return a->vm_end == b->vm_start && | ||
843 | mpol_equal(vma_policy(a), vma_policy(b)) && | ||
844 | a->vm_file == b->vm_file && | ||
845 | !((a->vm_flags ^ b->vm_flags) & ~(VM_READ|VM_WRITE|VM_EXEC)) && | ||
846 | b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT); | ||
847 | } | ||
848 | |||
849 | /* | ||
850 | * Do some basic sanity checking to see if we can re-use the anon_vma | ||
851 | * from 'old'. The 'a'/'b' vma's are in VM order - one of them will be | ||
852 | * the same as 'old', the other will be the new one that is trying | ||
853 | * to share the anon_vma. | ||
854 | * | ||
855 | * NOTE! This runs with mm_sem held for reading, so it is possible that | ||
856 | * the anon_vma of 'old' is concurrently in the process of being set up | ||
857 | * by another page fault trying to merge _that_. But that's ok: if it | ||
858 | * is being set up, that automatically means that it will be a singleton | ||
859 | * acceptable for merging, so we can do all of this optimistically. But | ||
860 | * we do that ACCESS_ONCE() to make sure that we never re-load the pointer. | ||
861 | * | ||
862 | * IOW: that the "list_is_singular()" test on the anon_vma_chain only | ||
863 | * matters for the 'stable anon_vma' case (ie the thing we want to avoid | ||
864 | * is to return an anon_vma that is "complex" due to having gone through | ||
865 | * a fork). | ||
866 | * | ||
867 | * We also make sure that the two vma's are compatible (adjacent, | ||
868 | * and with the same memory policies). That's all stable, even with just | ||
869 | * a read lock on the mm_sem. | ||
870 | */ | ||
871 | static struct anon_vma *reusable_anon_vma(struct vm_area_struct *old, struct vm_area_struct *a, struct vm_area_struct *b) | ||
872 | { | ||
873 | if (anon_vma_compatible(a, b)) { | ||
874 | struct anon_vma *anon_vma = ACCESS_ONCE(old->anon_vma); | ||
875 | |||
876 | if (anon_vma && list_is_singular(&old->anon_vma_chain)) | ||
877 | return anon_vma; | ||
878 | } | ||
879 | return NULL; | ||
880 | } | ||
881 | |||
882 | /* | ||
828 | * find_mergeable_anon_vma is used by anon_vma_prepare, to check | 883 | * find_mergeable_anon_vma is used by anon_vma_prepare, to check |
829 | * neighbouring vmas for a suitable anon_vma, before it goes off | 884 | * neighbouring vmas for a suitable anon_vma, before it goes off |
830 | * to allocate a new anon_vma. It checks because a repetitive | 885 | * to allocate a new anon_vma. It checks because a repetitive |
@@ -834,28 +889,16 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, | |||
834 | */ | 889 | */ |
835 | struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma) | 890 | struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma) |
836 | { | 891 | { |
892 | struct anon_vma *anon_vma; | ||
837 | struct vm_area_struct *near; | 893 | struct vm_area_struct *near; |
838 | unsigned long vm_flags; | ||
839 | 894 | ||
840 | near = vma->vm_next; | 895 | near = vma->vm_next; |
841 | if (!near) | 896 | if (!near) |
842 | goto try_prev; | 897 | goto try_prev; |
843 | 898 | ||
844 | /* | 899 | anon_vma = reusable_anon_vma(near, vma, near); |
845 | * Since only mprotect tries to remerge vmas, match flags | 900 | if (anon_vma) |
846 | * which might be mprotected into each other later on. | 901 | return anon_vma; |
847 | * Neither mlock nor madvise tries to remerge at present, | ||
848 | * so leave their flags as obstructing a merge. | ||
849 | */ | ||
850 | vm_flags = vma->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC); | ||
851 | vm_flags |= near->vm_flags & (VM_READ|VM_WRITE|VM_EXEC); | ||
852 | |||
853 | if (near->anon_vma && vma->vm_end == near->vm_start && | ||
854 | mpol_equal(vma_policy(vma), vma_policy(near)) && | ||
855 | can_vma_merge_before(near, vm_flags, | ||
856 | NULL, vma->vm_file, vma->vm_pgoff + | ||
857 | ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT))) | ||
858 | return near->anon_vma; | ||
859 | try_prev: | 902 | try_prev: |
860 | /* | 903 | /* |
861 | * It is potentially slow to have to call find_vma_prev here. | 904 | * It is potentially slow to have to call find_vma_prev here. |
@@ -868,14 +911,9 @@ try_prev: | |||
868 | if (!near) | 911 | if (!near) |
869 | goto none; | 912 | goto none; |
870 | 913 | ||
871 | vm_flags = vma->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC); | 914 | anon_vma = reusable_anon_vma(near, near, vma); |
872 | vm_flags |= near->vm_flags & (VM_READ|VM_WRITE|VM_EXEC); | 915 | if (anon_vma) |
873 | 916 | return anon_vma; | |
874 | if (near->anon_vma && near->vm_end == vma->vm_start && | ||
875 | mpol_equal(vma_policy(near), vma_policy(vma)) && | ||
876 | can_vma_merge_after(near, vm_flags, | ||
877 | NULL, vma->vm_file, vma->vm_pgoff)) | ||
878 | return near->anon_vma; | ||
879 | none: | 917 | none: |
880 | /* | 918 | /* |
881 | * There's no absolute need to look only at touching neighbours: | 919 | * There's no absolute need to look only at touching neighbours: |