aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2010-04-23 05:10:28 -0400
committerIngo Molnar <mingo@elte.hu>2010-04-23 05:10:30 -0400
commit70bce3ba77540ebe77b8c0e1ac38d281a23fbb5e (patch)
tree34b09a49228f0949ff49dce66a433b0dfd83a2dc /mm
parent6eca8cc35b50af1037bc919106dd6dd332c959c2 (diff)
parentd5a30458a90597915977f06e79406b664a41b8ac (diff)
Merge branch 'linus' into perf/core
Merge reason: merge the latest fixes, update to latest -rc. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'mm')
-rw-r--r--mm/backing-dev.c3
-rw-r--r--mm/mmap.c110
-rw-r--r--mm/rmap.c24
-rw-r--r--mm/slab.c13
-rw-r--r--mm/slub.c3
-rw-r--r--mm/util.c21
6 files changed, 118 insertions, 56 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 0e8ca0347707..f13e067e1467 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -227,6 +227,9 @@ static struct device_attribute bdi_dev_attrs[] = {
227static __init int bdi_class_init(void) 227static __init int bdi_class_init(void)
228{ 228{
229 bdi_class = class_create(THIS_MODULE, "bdi"); 229 bdi_class = class_create(THIS_MODULE, "bdi");
230 if (IS_ERR(bdi_class))
231 return PTR_ERR(bdi_class);
232
230 bdi_class->dev_attrs = bdi_dev_attrs; 233 bdi_class->dev_attrs = bdi_dev_attrs;
231 bdi_debug_init(); 234 bdi_debug_init();
232 return 0; 235 return 0;
diff --git a/mm/mmap.c b/mm/mmap.c
index 75557c639ad4..f90ea92f755a 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -507,11 +507,12 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start,
507 struct address_space *mapping = NULL; 507 struct address_space *mapping = NULL;
508 struct prio_tree_root *root = NULL; 508 struct prio_tree_root *root = NULL;
509 struct file *file = vma->vm_file; 509 struct file *file = vma->vm_file;
510 struct anon_vma *anon_vma = NULL;
511 long adjust_next = 0; 510 long adjust_next = 0;
512 int remove_next = 0; 511 int remove_next = 0;
513 512
514 if (next && !insert) { 513 if (next && !insert) {
514 struct vm_area_struct *exporter = NULL;
515
515 if (end >= next->vm_end) { 516 if (end >= next->vm_end) {
516 /* 517 /*
517 * vma expands, overlapping all the next, and 518 * vma expands, overlapping all the next, and
@@ -519,7 +520,7 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start,
519 */ 520 */
520again: remove_next = 1 + (end > next->vm_end); 521again: remove_next = 1 + (end > next->vm_end);
521 end = next->vm_end; 522 end = next->vm_end;
522 anon_vma = next->anon_vma; 523 exporter = next;
523 importer = vma; 524 importer = vma;
524 } else if (end > next->vm_start) { 525 } else if (end > next->vm_start) {
525 /* 526 /*
@@ -527,7 +528,7 @@ again: remove_next = 1 + (end > next->vm_end);
527 * mprotect case 5 shifting the boundary up. 528 * mprotect case 5 shifting the boundary up.
528 */ 529 */
529 adjust_next = (end - next->vm_start) >> PAGE_SHIFT; 530 adjust_next = (end - next->vm_start) >> PAGE_SHIFT;
530 anon_vma = next->anon_vma; 531 exporter = next;
531 importer = vma; 532 importer = vma;
532 } else if (end < vma->vm_end) { 533 } else if (end < vma->vm_end) {
533 /* 534 /*
@@ -536,28 +537,19 @@ again: remove_next = 1 + (end > next->vm_end);
536 * mprotect case 4 shifting the boundary down. 537 * mprotect case 4 shifting the boundary down.
537 */ 538 */
538 adjust_next = - ((vma->vm_end - end) >> PAGE_SHIFT); 539 adjust_next = - ((vma->vm_end - end) >> PAGE_SHIFT);
539 anon_vma = next->anon_vma; 540 exporter = vma;
540 importer = next; 541 importer = next;
541 } 542 }
542 }
543 543
544 /*
545 * When changing only vma->vm_end, we don't really need anon_vma lock.
546 */
547 if (vma->anon_vma && (insert || importer || start != vma->vm_start))
548 anon_vma = vma->anon_vma;
549 if (anon_vma) {
550 /* 544 /*
551 * Easily overlooked: when mprotect shifts the boundary, 545 * Easily overlooked: when mprotect shifts the boundary,
552 * make sure the expanding vma has anon_vma set if the 546 * make sure the expanding vma has anon_vma set if the
553 * shrinking vma had, to cover any anon pages imported. 547 * shrinking vma had, to cover any anon pages imported.
554 */ 548 */
555 if (importer && !importer->anon_vma) { 549 if (exporter && exporter->anon_vma && !importer->anon_vma) {
556 /* Block reverse map lookups until things are set up. */ 550 if (anon_vma_clone(importer, exporter))
557 if (anon_vma_clone(importer, vma)) {
558 return -ENOMEM; 551 return -ENOMEM;
559 } 552 importer->anon_vma = exporter->anon_vma;
560 importer->anon_vma = anon_vma;
561 } 553 }
562 } 554 }
563 555
@@ -825,6 +817,61 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
825} 817}
826 818
827/* 819/*
820 * Rough compatbility check to quickly see if it's even worth looking
821 * at sharing an anon_vma.
822 *
823 * They need to have the same vm_file, and the flags can only differ
824 * in things that mprotect may change.
825 *
826 * NOTE! The fact that we share an anon_vma doesn't _have_ to mean that
827 * we can merge the two vma's. For example, we refuse to merge a vma if
828 * there is a vm_ops->close() function, because that indicates that the
829 * driver is doing some kind of reference counting. But that doesn't
830 * really matter for the anon_vma sharing case.
831 */
832static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *b)
833{
834 return a->vm_end == b->vm_start &&
835 mpol_equal(vma_policy(a), vma_policy(b)) &&
836 a->vm_file == b->vm_file &&
837 !((a->vm_flags ^ b->vm_flags) & ~(VM_READ|VM_WRITE|VM_EXEC)) &&
838 b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT);
839}
840
841/*
842 * Do some basic sanity checking to see if we can re-use the anon_vma
843 * from 'old'. The 'a'/'b' vma's are in VM order - one of them will be
844 * the same as 'old', the other will be the new one that is trying
845 * to share the anon_vma.
846 *
847 * NOTE! This runs with mm_sem held for reading, so it is possible that
848 * the anon_vma of 'old' is concurrently in the process of being set up
849 * by another page fault trying to merge _that_. But that's ok: if it
850 * is being set up, that automatically means that it will be a singleton
851 * acceptable for merging, so we can do all of this optimistically. But
852 * we do that ACCESS_ONCE() to make sure that we never re-load the pointer.
853 *
854 * IOW: that the "list_is_singular()" test on the anon_vma_chain only
855 * matters for the 'stable anon_vma' case (ie the thing we want to avoid
856 * is to return an anon_vma that is "complex" due to having gone through
857 * a fork).
858 *
859 * We also make sure that the two vma's are compatible (adjacent,
860 * and with the same memory policies). That's all stable, even with just
861 * a read lock on the mm_sem.
862 */
863static struct anon_vma *reusable_anon_vma(struct vm_area_struct *old, struct vm_area_struct *a, struct vm_area_struct *b)
864{
865 if (anon_vma_compatible(a, b)) {
866 struct anon_vma *anon_vma = ACCESS_ONCE(old->anon_vma);
867
868 if (anon_vma && list_is_singular(&old->anon_vma_chain))
869 return anon_vma;
870 }
871 return NULL;
872}
873
874/*
828 * find_mergeable_anon_vma is used by anon_vma_prepare, to check 875 * find_mergeable_anon_vma is used by anon_vma_prepare, to check
829 * neighbouring vmas for a suitable anon_vma, before it goes off 876 * neighbouring vmas for a suitable anon_vma, before it goes off
830 * to allocate a new anon_vma. It checks because a repetitive 877 * to allocate a new anon_vma. It checks because a repetitive
@@ -834,28 +881,16 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
834 */ 881 */
835struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma) 882struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
836{ 883{
884 struct anon_vma *anon_vma;
837 struct vm_area_struct *near; 885 struct vm_area_struct *near;
838 unsigned long vm_flags;
839 886
840 near = vma->vm_next; 887 near = vma->vm_next;
841 if (!near) 888 if (!near)
842 goto try_prev; 889 goto try_prev;
843 890
844 /* 891 anon_vma = reusable_anon_vma(near, vma, near);
845 * Since only mprotect tries to remerge vmas, match flags 892 if (anon_vma)
846 * which might be mprotected into each other later on. 893 return anon_vma;
847 * Neither mlock nor madvise tries to remerge at present,
848 * so leave their flags as obstructing a merge.
849 */
850 vm_flags = vma->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC);
851 vm_flags |= near->vm_flags & (VM_READ|VM_WRITE|VM_EXEC);
852
853 if (near->anon_vma && vma->vm_end == near->vm_start &&
854 mpol_equal(vma_policy(vma), vma_policy(near)) &&
855 can_vma_merge_before(near, vm_flags,
856 NULL, vma->vm_file, vma->vm_pgoff +
857 ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT)))
858 return near->anon_vma;
859try_prev: 894try_prev:
860 /* 895 /*
861 * It is potentially slow to have to call find_vma_prev here. 896 * It is potentially slow to have to call find_vma_prev here.
@@ -868,14 +903,9 @@ try_prev:
868 if (!near) 903 if (!near)
869 goto none; 904 goto none;
870 905
871 vm_flags = vma->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC); 906 anon_vma = reusable_anon_vma(near, near, vma);
872 vm_flags |= near->vm_flags & (VM_READ|VM_WRITE|VM_EXEC); 907 if (anon_vma)
873 908 return anon_vma;
874 if (near->anon_vma && near->vm_end == vma->vm_start &&
875 mpol_equal(vma_policy(near), vma_policy(vma)) &&
876 can_vma_merge_after(near, vm_flags,
877 NULL, vma->vm_file, vma->vm_pgoff))
878 return near->anon_vma;
879none: 909none:
880 /* 910 /*
881 * There's no absolute need to look only at touching neighbours: 911 * There's no absolute need to look only at touching neighbours:
diff --git a/mm/rmap.c b/mm/rmap.c
index eaa7a09eb72e..526704e8215d 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -182,7 +182,7 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
182{ 182{
183 struct anon_vma_chain *avc, *pavc; 183 struct anon_vma_chain *avc, *pavc;
184 184
185 list_for_each_entry(pavc, &src->anon_vma_chain, same_vma) { 185 list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) {
186 avc = anon_vma_chain_alloc(); 186 avc = anon_vma_chain_alloc();
187 if (!avc) 187 if (!avc)
188 goto enomem_failure; 188 goto enomem_failure;
@@ -730,13 +730,29 @@ void page_move_anon_rmap(struct page *page,
730 * @page: the page to add the mapping to 730 * @page: the page to add the mapping to
731 * @vma: the vm area in which the mapping is added 731 * @vma: the vm area in which the mapping is added
732 * @address: the user virtual address mapped 732 * @address: the user virtual address mapped
733 * @exclusive: the page is exclusively owned by the current process
733 */ 734 */
734static void __page_set_anon_rmap(struct page *page, 735static void __page_set_anon_rmap(struct page *page,
735 struct vm_area_struct *vma, unsigned long address) 736 struct vm_area_struct *vma, unsigned long address, int exclusive)
736{ 737{
737 struct anon_vma *anon_vma = vma->anon_vma; 738 struct anon_vma *anon_vma = vma->anon_vma;
738 739
739 BUG_ON(!anon_vma); 740 BUG_ON(!anon_vma);
741
742 /*
743 * If the page isn't exclusively mapped into this vma,
744 * we must use the _oldest_ possible anon_vma for the
745 * page mapping!
746 *
747 * So take the last AVC chain entry in the vma, which is
748 * the deepest ancestor, and use the anon_vma from that.
749 */
750 if (!exclusive) {
751 struct anon_vma_chain *avc;
752 avc = list_entry(vma->anon_vma_chain.prev, struct anon_vma_chain, same_vma);
753 anon_vma = avc->anon_vma;
754 }
755
740 anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; 756 anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
741 page->mapping = (struct address_space *) anon_vma; 757 page->mapping = (struct address_space *) anon_vma;
742 page->index = linear_page_index(vma, address); 758 page->index = linear_page_index(vma, address);
@@ -791,7 +807,7 @@ void page_add_anon_rmap(struct page *page,
791 VM_BUG_ON(!PageLocked(page)); 807 VM_BUG_ON(!PageLocked(page));
792 VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end); 808 VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
793 if (first) 809 if (first)
794 __page_set_anon_rmap(page, vma, address); 810 __page_set_anon_rmap(page, vma, address, 0);
795 else 811 else
796 __page_check_anon_rmap(page, vma, address); 812 __page_check_anon_rmap(page, vma, address);
797} 813}
@@ -813,7 +829,7 @@ void page_add_new_anon_rmap(struct page *page,
813 SetPageSwapBacked(page); 829 SetPageSwapBacked(page);
814 atomic_set(&page->_mapcount, 0); /* increment count (starts at -1) */ 830 atomic_set(&page->_mapcount, 0); /* increment count (starts at -1) */
815 __inc_zone_page_state(page, NR_ANON_PAGES); 831 __inc_zone_page_state(page, NR_ANON_PAGES);
816 __page_set_anon_rmap(page, vma, address); 832 __page_set_anon_rmap(page, vma, address, 1);
817 if (page_evictable(page, vma)) 833 if (page_evictable(page, vma))
818 lru_cache_add_lru(page, LRU_ACTIVE_ANON); 834 lru_cache_add_lru(page, LRU_ACTIVE_ANON);
819 else 835 else
diff --git a/mm/slab.c b/mm/slab.c
index a9f325b28bed..bac0f4fcc216 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3602,21 +3602,10 @@ EXPORT_SYMBOL(kmem_cache_alloc_notrace);
3602 */ 3602 */
3603int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr) 3603int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr)
3604{ 3604{
3605 unsigned long addr = (unsigned long)ptr;
3606 unsigned long min_addr = PAGE_OFFSET;
3607 unsigned long align_mask = BYTES_PER_WORD - 1;
3608 unsigned long size = cachep->buffer_size; 3605 unsigned long size = cachep->buffer_size;
3609 struct page *page; 3606 struct page *page;
3610 3607
3611 if (unlikely(addr < min_addr)) 3608 if (unlikely(!kern_ptr_validate(ptr, size)))
3612 goto out;
3613 if (unlikely(addr > (unsigned long)high_memory - size))
3614 goto out;
3615 if (unlikely(addr & align_mask))
3616 goto out;
3617 if (unlikely(!kern_addr_valid(addr)))
3618 goto out;
3619 if (unlikely(!kern_addr_valid(addr + size - 1)))
3620 goto out; 3609 goto out;
3621 page = virt_to_page(ptr); 3610 page = virt_to_page(ptr);
3622 if (unlikely(!PageSlab(page))) 3611 if (unlikely(!PageSlab(page)))
diff --git a/mm/slub.c b/mm/slub.c
index b364844a1068..7d6c8b1ccf63 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2386,6 +2386,9 @@ int kmem_ptr_validate(struct kmem_cache *s, const void *object)
2386{ 2386{
2387 struct page *page; 2387 struct page *page;
2388 2388
2389 if (!kern_ptr_validate(object, s->size))
2390 return 0;
2391
2389 page = get_object_page(object); 2392 page = get_object_page(object);
2390 2393
2391 if (!page || s != page->slab) 2394 if (!page || s != page->slab)
diff --git a/mm/util.c b/mm/util.c
index 834db7be240f..f5712e8964be 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -186,6 +186,27 @@ void kzfree(const void *p)
186} 186}
187EXPORT_SYMBOL(kzfree); 187EXPORT_SYMBOL(kzfree);
188 188
189int kern_ptr_validate(const void *ptr, unsigned long size)
190{
191 unsigned long addr = (unsigned long)ptr;
192 unsigned long min_addr = PAGE_OFFSET;
193 unsigned long align_mask = sizeof(void *) - 1;
194
195 if (unlikely(addr < min_addr))
196 goto out;
197 if (unlikely(addr > (unsigned long)high_memory - size))
198 goto out;
199 if (unlikely(addr & align_mask))
200 goto out;
201 if (unlikely(!kern_addr_valid(addr)))
202 goto out;
203 if (unlikely(!kern_addr_valid(addr + size - 1)))
204 goto out;
205 return 1;
206out:
207 return 0;
208}
209
189/* 210/*
190 * strndup_user - duplicate an existing string from user space 211 * strndup_user - duplicate an existing string from user space
191 * @s: The string to duplicate 212 * @s: The string to duplicate