diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-04-12 21:39:58 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-04-12 21:39:58 -0400 |
| commit | 64a8920fab962fb581bbe67856cb4740fc37d743 (patch) | |
| tree | a569ffc6d02994a899e4855eda71001d665a80a8 | |
| parent | 50b88c46f01939d19d4cdd30ad563d29265b6c59 (diff) | |
| parent | ea90002b0fa7bdee86ec22eba1d951f30bf043a6 (diff) | |
Merge branch 'anonvma'
* anonvma:
anonvma: when setting up page->mapping, we need to pick the _oldest_ anonvma
anon_vma: clone the anon_vma chain in the right order
vma_adjust: fix the copying of anon_vma chains
Simplify and comment on anon_vma re-use for anon_vma_prepare()
| -rw-r--r-- | mm/mmap.c | 110 | ||||
| -rw-r--r-- | mm/rmap.c | 17 |
2 files changed, 84 insertions, 43 deletions
| @@ -507,11 +507,12 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start, | |||
| 507 | struct address_space *mapping = NULL; | 507 | struct address_space *mapping = NULL; |
| 508 | struct prio_tree_root *root = NULL; | 508 | struct prio_tree_root *root = NULL; |
| 509 | struct file *file = vma->vm_file; | 509 | struct file *file = vma->vm_file; |
| 510 | struct anon_vma *anon_vma = NULL; | ||
| 511 | long adjust_next = 0; | 510 | long adjust_next = 0; |
| 512 | int remove_next = 0; | 511 | int remove_next = 0; |
| 513 | 512 | ||
| 514 | if (next && !insert) { | 513 | if (next && !insert) { |
| 514 | struct vm_area_struct *exporter = NULL; | ||
| 515 | |||
| 515 | if (end >= next->vm_end) { | 516 | if (end >= next->vm_end) { |
| 516 | /* | 517 | /* |
| 517 | * vma expands, overlapping all the next, and | 518 | * vma expands, overlapping all the next, and |
| @@ -519,7 +520,7 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start, | |||
| 519 | */ | 520 | */ |
| 520 | again: remove_next = 1 + (end > next->vm_end); | 521 | again: remove_next = 1 + (end > next->vm_end); |
| 521 | end = next->vm_end; | 522 | end = next->vm_end; |
| 522 | anon_vma = next->anon_vma; | 523 | exporter = next; |
| 523 | importer = vma; | 524 | importer = vma; |
| 524 | } else if (end > next->vm_start) { | 525 | } else if (end > next->vm_start) { |
| 525 | /* | 526 | /* |
| @@ -527,7 +528,7 @@ again: remove_next = 1 + (end > next->vm_end); | |||
| 527 | * mprotect case 5 shifting the boundary up. | 528 | * mprotect case 5 shifting the boundary up. |
| 528 | */ | 529 | */ |
| 529 | adjust_next = (end - next->vm_start) >> PAGE_SHIFT; | 530 | adjust_next = (end - next->vm_start) >> PAGE_SHIFT; |
| 530 | anon_vma = next->anon_vma; | 531 | exporter = next; |
| 531 | importer = vma; | 532 | importer = vma; |
| 532 | } else if (end < vma->vm_end) { | 533 | } else if (end < vma->vm_end) { |
| 533 | /* | 534 | /* |
| @@ -536,28 +537,19 @@ again: remove_next = 1 + (end > next->vm_end); | |||
| 536 | * mprotect case 4 shifting the boundary down. | 537 | * mprotect case 4 shifting the boundary down. |
| 537 | */ | 538 | */ |
| 538 | adjust_next = - ((vma->vm_end - end) >> PAGE_SHIFT); | 539 | adjust_next = - ((vma->vm_end - end) >> PAGE_SHIFT); |
| 539 | anon_vma = next->anon_vma; | 540 | exporter = vma; |
| 540 | importer = next; | 541 | importer = next; |
| 541 | } | 542 | } |
| 542 | } | ||
| 543 | 543 | ||
| 544 | /* | ||
| 545 | * When changing only vma->vm_end, we don't really need anon_vma lock. | ||
| 546 | */ | ||
| 547 | if (vma->anon_vma && (insert || importer || start != vma->vm_start)) | ||
| 548 | anon_vma = vma->anon_vma; | ||
| 549 | if (anon_vma) { | ||
| 550 | /* | 544 | /* |
| 551 | * Easily overlooked: when mprotect shifts the boundary, | 545 | * Easily overlooked: when mprotect shifts the boundary, |
| 552 | * make sure the expanding vma has anon_vma set if the | 546 | * make sure the expanding vma has anon_vma set if the |
| 553 | * shrinking vma had, to cover any anon pages imported. | 547 | * shrinking vma had, to cover any anon pages imported. |
| 554 | */ | 548 | */ |
| 555 | if (importer && !importer->anon_vma) { | 549 | if (exporter && exporter->anon_vma && !importer->anon_vma) { |
| 556 | /* Block reverse map lookups until things are set up. */ | 550 | if (anon_vma_clone(importer, exporter)) |
| 557 | if (anon_vma_clone(importer, vma)) { | ||
| 558 | return -ENOMEM; | 551 | return -ENOMEM; |
| 559 | } | 552 | importer->anon_vma = exporter->anon_vma; |
| 560 | importer->anon_vma = anon_vma; | ||
| 561 | } | 553 | } |
| 562 | } | 554 | } |
| 563 | 555 | ||
| @@ -825,6 +817,61 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, | |||
| 825 | } | 817 | } |
| 826 | 818 | ||
| 827 | /* | 819 | /* |
| 820 | * Rough compatbility check to quickly see if it's even worth looking | ||
| 821 | * at sharing an anon_vma. | ||
| 822 | * | ||
| 823 | * They need to have the same vm_file, and the flags can only differ | ||
| 824 | * in things that mprotect may change. | ||
| 825 | * | ||
| 826 | * NOTE! The fact that we share an anon_vma doesn't _have_ to mean that | ||
| 827 | * we can merge the two vma's. For example, we refuse to merge a vma if | ||
| 828 | * there is a vm_ops->close() function, because that indicates that the | ||
| 829 | * driver is doing some kind of reference counting. But that doesn't | ||
| 830 | * really matter for the anon_vma sharing case. | ||
| 831 | */ | ||
| 832 | static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *b) | ||
| 833 | { | ||
| 834 | return a->vm_end == b->vm_start && | ||
| 835 | mpol_equal(vma_policy(a), vma_policy(b)) && | ||
| 836 | a->vm_file == b->vm_file && | ||
| 837 | !((a->vm_flags ^ b->vm_flags) & ~(VM_READ|VM_WRITE|VM_EXEC)) && | ||
| 838 | b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT); | ||
| 839 | } | ||
| 840 | |||
| 841 | /* | ||
| 842 | * Do some basic sanity checking to see if we can re-use the anon_vma | ||
| 843 | * from 'old'. The 'a'/'b' vma's are in VM order - one of them will be | ||
| 844 | * the same as 'old', the other will be the new one that is trying | ||
| 845 | * to share the anon_vma. | ||
| 846 | * | ||
| 847 | * NOTE! This runs with mm_sem held for reading, so it is possible that | ||
| 848 | * the anon_vma of 'old' is concurrently in the process of being set up | ||
| 849 | * by another page fault trying to merge _that_. But that's ok: if it | ||
| 850 | * is being set up, that automatically means that it will be a singleton | ||
| 851 | * acceptable for merging, so we can do all of this optimistically. But | ||
| 852 | * we do that ACCESS_ONCE() to make sure that we never re-load the pointer. | ||
| 853 | * | ||
| 854 | * IOW: that the "list_is_singular()" test on the anon_vma_chain only | ||
| 855 | * matters for the 'stable anon_vma' case (ie the thing we want to avoid | ||
| 856 | * is to return an anon_vma that is "complex" due to having gone through | ||
| 857 | * a fork). | ||
| 858 | * | ||
| 859 | * We also make sure that the two vma's are compatible (adjacent, | ||
| 860 | * and with the same memory policies). That's all stable, even with just | ||
| 861 | * a read lock on the mm_sem. | ||
| 862 | */ | ||
| 863 | static struct anon_vma *reusable_anon_vma(struct vm_area_struct *old, struct vm_area_struct *a, struct vm_area_struct *b) | ||
| 864 | { | ||
| 865 | if (anon_vma_compatible(a, b)) { | ||
| 866 | struct anon_vma *anon_vma = ACCESS_ONCE(old->anon_vma); | ||
| 867 | |||
| 868 | if (anon_vma && list_is_singular(&old->anon_vma_chain)) | ||
| 869 | return anon_vma; | ||
| 870 | } | ||
| 871 | return NULL; | ||
| 872 | } | ||
| 873 | |||
| 874 | /* | ||
| 828 | * find_mergeable_anon_vma is used by anon_vma_prepare, to check | 875 | * find_mergeable_anon_vma is used by anon_vma_prepare, to check |
| 829 | * neighbouring vmas for a suitable anon_vma, before it goes off | 876 | * neighbouring vmas for a suitable anon_vma, before it goes off |
| 830 | * to allocate a new anon_vma. It checks because a repetitive | 877 | * to allocate a new anon_vma. It checks because a repetitive |
| @@ -834,28 +881,16 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, | |||
| 834 | */ | 881 | */ |
| 835 | struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma) | 882 | struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma) |
| 836 | { | 883 | { |
| 884 | struct anon_vma *anon_vma; | ||
| 837 | struct vm_area_struct *near; | 885 | struct vm_area_struct *near; |
| 838 | unsigned long vm_flags; | ||
| 839 | 886 | ||
| 840 | near = vma->vm_next; | 887 | near = vma->vm_next; |
| 841 | if (!near) | 888 | if (!near) |
| 842 | goto try_prev; | 889 | goto try_prev; |
| 843 | 890 | ||
| 844 | /* | 891 | anon_vma = reusable_anon_vma(near, vma, near); |
| 845 | * Since only mprotect tries to remerge vmas, match flags | 892 | if (anon_vma) |
| 846 | * which might be mprotected into each other later on. | 893 | return anon_vma; |
| 847 | * Neither mlock nor madvise tries to remerge at present, | ||
| 848 | * so leave their flags as obstructing a merge. | ||
| 849 | */ | ||
| 850 | vm_flags = vma->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC); | ||
| 851 | vm_flags |= near->vm_flags & (VM_READ|VM_WRITE|VM_EXEC); | ||
| 852 | |||
| 853 | if (near->anon_vma && vma->vm_end == near->vm_start && | ||
| 854 | mpol_equal(vma_policy(vma), vma_policy(near)) && | ||
| 855 | can_vma_merge_before(near, vm_flags, | ||
| 856 | NULL, vma->vm_file, vma->vm_pgoff + | ||
| 857 | ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT))) | ||
| 858 | return near->anon_vma; | ||
| 859 | try_prev: | 894 | try_prev: |
| 860 | /* | 895 | /* |
| 861 | * It is potentially slow to have to call find_vma_prev here. | 896 | * It is potentially slow to have to call find_vma_prev here. |
| @@ -868,14 +903,9 @@ try_prev: | |||
| 868 | if (!near) | 903 | if (!near) |
| 869 | goto none; | 904 | goto none; |
| 870 | 905 | ||
| 871 | vm_flags = vma->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC); | 906 | anon_vma = reusable_anon_vma(near, near, vma); |
| 872 | vm_flags |= near->vm_flags & (VM_READ|VM_WRITE|VM_EXEC); | 907 | if (anon_vma) |
| 873 | 908 | return anon_vma; | |
| 874 | if (near->anon_vma && near->vm_end == vma->vm_start && | ||
| 875 | mpol_equal(vma_policy(near), vma_policy(vma)) && | ||
| 876 | can_vma_merge_after(near, vm_flags, | ||
| 877 | NULL, vma->vm_file, vma->vm_pgoff)) | ||
| 878 | return near->anon_vma; | ||
| 879 | none: | 909 | none: |
| 880 | /* | 910 | /* |
| 881 | * There's no absolute need to look only at touching neighbours: | 911 | * There's no absolute need to look only at touching neighbours: |
| @@ -182,7 +182,7 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src) | |||
| 182 | { | 182 | { |
| 183 | struct anon_vma_chain *avc, *pavc; | 183 | struct anon_vma_chain *avc, *pavc; |
| 184 | 184 | ||
| 185 | list_for_each_entry(pavc, &src->anon_vma_chain, same_vma) { | 185 | list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) { |
| 186 | avc = anon_vma_chain_alloc(); | 186 | avc = anon_vma_chain_alloc(); |
| 187 | if (!avc) | 187 | if (!avc) |
| 188 | goto enomem_failure; | 188 | goto enomem_failure; |
| @@ -734,9 +734,20 @@ void page_move_anon_rmap(struct page *page, | |||
| 734 | static void __page_set_anon_rmap(struct page *page, | 734 | static void __page_set_anon_rmap(struct page *page, |
| 735 | struct vm_area_struct *vma, unsigned long address) | 735 | struct vm_area_struct *vma, unsigned long address) |
| 736 | { | 736 | { |
| 737 | struct anon_vma *anon_vma = vma->anon_vma; | 737 | struct anon_vma_chain *avc; |
| 738 | struct anon_vma *anon_vma; | ||
| 739 | |||
| 740 | BUG_ON(!vma->anon_vma); | ||
| 741 | |||
| 742 | /* | ||
| 743 | * We must use the _oldest_ possible anon_vma for the page mapping! | ||
| 744 | * | ||
| 745 | * So take the last AVC chain entry in the vma, which is the deepest | ||
| 746 | * ancestor, and use the anon_vma from that. | ||
| 747 | */ | ||
| 748 | avc = list_entry(vma->anon_vma_chain.prev, struct anon_vma_chain, same_vma); | ||
| 749 | anon_vma = avc->anon_vma; | ||
| 738 | 750 | ||
| 739 | BUG_ON(!anon_vma); | ||
| 740 | anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; | 751 | anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; |
| 741 | page->mapping = (struct address_space *) anon_vma; | 752 | page->mapping = (struct address_space *) anon_vma; |
| 742 | page->index = linear_page_index(vma, address); | 753 | page->index = linear_page_index(vma, address); |
