aboutsummaryrefslogtreecommitdiffstats
path: root/mm/mmap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/mmap.c')
-rw-r--r--mm/mmap.c398
1 files changed, 268 insertions, 130 deletions
diff --git a/mm/mmap.c b/mm/mmap.c
index 73f5e4b64010..456ec6f27889 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -20,7 +20,6 @@
20#include <linux/fs.h> 20#include <linux/fs.h>
21#include <linux/personality.h> 21#include <linux/personality.h>
22#include <linux/security.h> 22#include <linux/security.h>
23#include <linux/ima.h>
24#include <linux/hugetlb.h> 23#include <linux/hugetlb.h>
25#include <linux/profile.h> 24#include <linux/profile.h>
26#include <linux/module.h> 25#include <linux/module.h>
@@ -266,7 +265,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
266 * segment grow beyond its set limit the in case where the limit is 265 * segment grow beyond its set limit the in case where the limit is
267 * not page aligned -Ram Gupta 266 * not page aligned -Ram Gupta
268 */ 267 */
269 rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur; 268 rlim = rlimit(RLIMIT_DATA);
270 if (rlim < RLIM_INFINITY && (brk - mm->start_brk) + 269 if (rlim < RLIM_INFINITY && (brk - mm->start_brk) +
271 (mm->end_data - mm->start_data) > rlim) 270 (mm->end_data - mm->start_data) > rlim)
272 goto out; 271 goto out;
@@ -438,7 +437,6 @@ __vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
438{ 437{
439 __vma_link_list(mm, vma, prev, rb_parent); 438 __vma_link_list(mm, vma, prev, rb_parent);
440 __vma_link_rb(mm, vma, rb_link, rb_parent); 439 __vma_link_rb(mm, vma, rb_link, rb_parent);
441 __anon_vma_link(vma);
442} 440}
443 441
444static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma, 442static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
@@ -500,7 +498,7 @@ __vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
500 * are necessary. The "insert" vma (if any) is to be inserted 498 * are necessary. The "insert" vma (if any) is to be inserted
501 * before we drop the necessary locks. 499 * before we drop the necessary locks.
502 */ 500 */
503void vma_adjust(struct vm_area_struct *vma, unsigned long start, 501int vma_adjust(struct vm_area_struct *vma, unsigned long start,
504 unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert) 502 unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert)
505{ 503{
506 struct mm_struct *mm = vma->vm_mm; 504 struct mm_struct *mm = vma->vm_mm;
@@ -509,11 +507,12 @@ void vma_adjust(struct vm_area_struct *vma, unsigned long start,
509 struct address_space *mapping = NULL; 507 struct address_space *mapping = NULL;
510 struct prio_tree_root *root = NULL; 508 struct prio_tree_root *root = NULL;
511 struct file *file = vma->vm_file; 509 struct file *file = vma->vm_file;
512 struct anon_vma *anon_vma = NULL;
513 long adjust_next = 0; 510 long adjust_next = 0;
514 int remove_next = 0; 511 int remove_next = 0;
515 512
516 if (next && !insert) { 513 if (next && !insert) {
514 struct vm_area_struct *exporter = NULL;
515
517 if (end >= next->vm_end) { 516 if (end >= next->vm_end) {
518 /* 517 /*
519 * vma expands, overlapping all the next, and 518 * vma expands, overlapping all the next, and
@@ -521,7 +520,7 @@ void vma_adjust(struct vm_area_struct *vma, unsigned long start,
521 */ 520 */
522again: remove_next = 1 + (end > next->vm_end); 521again: remove_next = 1 + (end > next->vm_end);
523 end = next->vm_end; 522 end = next->vm_end;
524 anon_vma = next->anon_vma; 523 exporter = next;
525 importer = vma; 524 importer = vma;
526 } else if (end > next->vm_start) { 525 } else if (end > next->vm_start) {
527 /* 526 /*
@@ -529,7 +528,7 @@ again: remove_next = 1 + (end > next->vm_end);
529 * mprotect case 5 shifting the boundary up. 528 * mprotect case 5 shifting the boundary up.
530 */ 529 */
531 adjust_next = (end - next->vm_start) >> PAGE_SHIFT; 530 adjust_next = (end - next->vm_start) >> PAGE_SHIFT;
532 anon_vma = next->anon_vma; 531 exporter = next;
533 importer = vma; 532 importer = vma;
534 } else if (end < vma->vm_end) { 533 } else if (end < vma->vm_end) {
535 /* 534 /*
@@ -538,9 +537,20 @@ again: remove_next = 1 + (end > next->vm_end);
538 * mprotect case 4 shifting the boundary down. 537 * mprotect case 4 shifting the boundary down.
539 */ 538 */
540 adjust_next = - ((vma->vm_end - end) >> PAGE_SHIFT); 539 adjust_next = - ((vma->vm_end - end) >> PAGE_SHIFT);
541 anon_vma = next->anon_vma; 540 exporter = vma;
542 importer = next; 541 importer = next;
543 } 542 }
543
544 /*
545 * Easily overlooked: when mprotect shifts the boundary,
546 * make sure the expanding vma has anon_vma set if the
547 * shrinking vma had, to cover any anon pages imported.
548 */
549 if (exporter && exporter->anon_vma && !importer->anon_vma) {
550 if (anon_vma_clone(importer, exporter))
551 return -ENOMEM;
552 importer->anon_vma = exporter->anon_vma;
553 }
544 } 554 }
545 555
546 if (file) { 556 if (file) {
@@ -568,25 +578,6 @@ again: remove_next = 1 + (end > next->vm_end);
568 } 578 }
569 } 579 }
570 580
571 /*
572 * When changing only vma->vm_end, we don't really need
573 * anon_vma lock.
574 */
575 if (vma->anon_vma && (insert || importer || start != vma->vm_start))
576 anon_vma = vma->anon_vma;
577 if (anon_vma) {
578 spin_lock(&anon_vma->lock);
579 /*
580 * Easily overlooked: when mprotect shifts the boundary,
581 * make sure the expanding vma has anon_vma set if the
582 * shrinking vma had, to cover any anon pages imported.
583 */
584 if (importer && !importer->anon_vma) {
585 importer->anon_vma = anon_vma;
586 __anon_vma_link(importer);
587 }
588 }
589
590 if (root) { 581 if (root) {
591 flush_dcache_mmap_lock(mapping); 582 flush_dcache_mmap_lock(mapping);
592 vma_prio_tree_remove(vma, root); 583 vma_prio_tree_remove(vma, root);
@@ -617,8 +608,6 @@ again: remove_next = 1 + (end > next->vm_end);
617 __vma_unlink(mm, next, vma); 608 __vma_unlink(mm, next, vma);
618 if (file) 609 if (file)
619 __remove_shared_vm_struct(next, file, mapping); 610 __remove_shared_vm_struct(next, file, mapping);
620 if (next->anon_vma)
621 __anon_vma_merge(vma, next);
622 } else if (insert) { 611 } else if (insert) {
623 /* 612 /*
624 * split_vma has split insert from vma, and needs 613 * split_vma has split insert from vma, and needs
@@ -628,8 +617,6 @@ again: remove_next = 1 + (end > next->vm_end);
628 __insert_vm_struct(mm, insert); 617 __insert_vm_struct(mm, insert);
629 } 618 }
630 619
631 if (anon_vma)
632 spin_unlock(&anon_vma->lock);
633 if (mapping) 620 if (mapping)
634 spin_unlock(&mapping->i_mmap_lock); 621 spin_unlock(&mapping->i_mmap_lock);
635 622
@@ -639,6 +626,8 @@ again: remove_next = 1 + (end > next->vm_end);
639 if (next->vm_flags & VM_EXECUTABLE) 626 if (next->vm_flags & VM_EXECUTABLE)
640 removed_exe_file_vma(mm); 627 removed_exe_file_vma(mm);
641 } 628 }
629 if (next->anon_vma)
630 anon_vma_merge(vma, next);
642 mm->map_count--; 631 mm->map_count--;
643 mpol_put(vma_policy(next)); 632 mpol_put(vma_policy(next));
644 kmem_cache_free(vm_area_cachep, next); 633 kmem_cache_free(vm_area_cachep, next);
@@ -654,6 +643,8 @@ again: remove_next = 1 + (end > next->vm_end);
654 } 643 }
655 644
656 validate_mm(mm); 645 validate_mm(mm);
646
647 return 0;
657} 648}
658 649
659/* 650/*
@@ -760,6 +751,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
760{ 751{
761 pgoff_t pglen = (end - addr) >> PAGE_SHIFT; 752 pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
762 struct vm_area_struct *area, *next; 753 struct vm_area_struct *area, *next;
754 int err;
763 755
764 /* 756 /*
765 * We later require that vma->vm_flags == vm_flags, 757 * We later require that vma->vm_flags == vm_flags,
@@ -793,11 +785,13 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
793 is_mergeable_anon_vma(prev->anon_vma, 785 is_mergeable_anon_vma(prev->anon_vma,
794 next->anon_vma)) { 786 next->anon_vma)) {
795 /* cases 1, 6 */ 787 /* cases 1, 6 */
796 vma_adjust(prev, prev->vm_start, 788 err = vma_adjust(prev, prev->vm_start,
797 next->vm_end, prev->vm_pgoff, NULL); 789 next->vm_end, prev->vm_pgoff, NULL);
798 } else /* cases 2, 5, 7 */ 790 } else /* cases 2, 5, 7 */
799 vma_adjust(prev, prev->vm_start, 791 err = vma_adjust(prev, prev->vm_start,
800 end, prev->vm_pgoff, NULL); 792 end, prev->vm_pgoff, NULL);
793 if (err)
794 return NULL;
801 return prev; 795 return prev;
802 } 796 }
803 797
@@ -809,11 +803,13 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
809 can_vma_merge_before(next, vm_flags, 803 can_vma_merge_before(next, vm_flags,
810 anon_vma, file, pgoff+pglen)) { 804 anon_vma, file, pgoff+pglen)) {
811 if (prev && addr < prev->vm_end) /* case 4 */ 805 if (prev && addr < prev->vm_end) /* case 4 */
812 vma_adjust(prev, prev->vm_start, 806 err = vma_adjust(prev, prev->vm_start,
813 addr, prev->vm_pgoff, NULL); 807 addr, prev->vm_pgoff, NULL);
814 else /* cases 3, 8 */ 808 else /* cases 3, 8 */
815 vma_adjust(area, addr, next->vm_end, 809 err = vma_adjust(area, addr, next->vm_end,
816 next->vm_pgoff - pglen, NULL); 810 next->vm_pgoff - pglen, NULL);
811 if (err)
812 return NULL;
817 return area; 813 return area;
818 } 814 }
819 815
@@ -821,6 +817,61 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
821} 817}
822 818
823/* 819/*
820 * Rough compatbility check to quickly see if it's even worth looking
821 * at sharing an anon_vma.
822 *
823 * They need to have the same vm_file, and the flags can only differ
824 * in things that mprotect may change.
825 *
826 * NOTE! The fact that we share an anon_vma doesn't _have_ to mean that
827 * we can merge the two vma's. For example, we refuse to merge a vma if
828 * there is a vm_ops->close() function, because that indicates that the
829 * driver is doing some kind of reference counting. But that doesn't
830 * really matter for the anon_vma sharing case.
831 */
832static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *b)
833{
834 return a->vm_end == b->vm_start &&
835 mpol_equal(vma_policy(a), vma_policy(b)) &&
836 a->vm_file == b->vm_file &&
837 !((a->vm_flags ^ b->vm_flags) & ~(VM_READ|VM_WRITE|VM_EXEC)) &&
838 b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT);
839}
840
841/*
842 * Do some basic sanity checking to see if we can re-use the anon_vma
843 * from 'old'. The 'a'/'b' vma's are in VM order - one of them will be
844 * the same as 'old', the other will be the new one that is trying
845 * to share the anon_vma.
846 *
847 * NOTE! This runs with mm_sem held for reading, so it is possible that
848 * the anon_vma of 'old' is concurrently in the process of being set up
849 * by another page fault trying to merge _that_. But that's ok: if it
850 * is being set up, that automatically means that it will be a singleton
851 * acceptable for merging, so we can do all of this optimistically. But
852 * we do that ACCESS_ONCE() to make sure that we never re-load the pointer.
853 *
854 * IOW: that the "list_is_singular()" test on the anon_vma_chain only
855 * matters for the 'stable anon_vma' case (ie the thing we want to avoid
856 * is to return an anon_vma that is "complex" due to having gone through
857 * a fork).
858 *
859 * We also make sure that the two vma's are compatible (adjacent,
860 * and with the same memory policies). That's all stable, even with just
861 * a read lock on the mm_sem.
862 */
863static struct anon_vma *reusable_anon_vma(struct vm_area_struct *old, struct vm_area_struct *a, struct vm_area_struct *b)
864{
865 if (anon_vma_compatible(a, b)) {
866 struct anon_vma *anon_vma = ACCESS_ONCE(old->anon_vma);
867
868 if (anon_vma && list_is_singular(&old->anon_vma_chain))
869 return anon_vma;
870 }
871 return NULL;
872}
873
874/*
824 * find_mergeable_anon_vma is used by anon_vma_prepare, to check 875 * find_mergeable_anon_vma is used by anon_vma_prepare, to check
825 * neighbouring vmas for a suitable anon_vma, before it goes off 876 * neighbouring vmas for a suitable anon_vma, before it goes off
826 * to allocate a new anon_vma. It checks because a repetitive 877 * to allocate a new anon_vma. It checks because a repetitive
@@ -830,28 +881,16 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
830 */ 881 */
831struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma) 882struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
832{ 883{
884 struct anon_vma *anon_vma;
833 struct vm_area_struct *near; 885 struct vm_area_struct *near;
834 unsigned long vm_flags;
835 886
836 near = vma->vm_next; 887 near = vma->vm_next;
837 if (!near) 888 if (!near)
838 goto try_prev; 889 goto try_prev;
839 890
840 /* 891 anon_vma = reusable_anon_vma(near, vma, near);
841 * Since only mprotect tries to remerge vmas, match flags 892 if (anon_vma)
842 * which might be mprotected into each other later on. 893 return anon_vma;
843 * Neither mlock nor madvise tries to remerge at present,
844 * so leave their flags as obstructing a merge.
845 */
846 vm_flags = vma->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC);
847 vm_flags |= near->vm_flags & (VM_READ|VM_WRITE|VM_EXEC);
848
849 if (near->anon_vma && vma->vm_end == near->vm_start &&
850 mpol_equal(vma_policy(vma), vma_policy(near)) &&
851 can_vma_merge_before(near, vm_flags,
852 NULL, vma->vm_file, vma->vm_pgoff +
853 ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT)))
854 return near->anon_vma;
855try_prev: 894try_prev:
856 /* 895 /*
857 * It is potentially slow to have to call find_vma_prev here. 896 * It is potentially slow to have to call find_vma_prev here.
@@ -864,14 +903,9 @@ try_prev:
864 if (!near) 903 if (!near)
865 goto none; 904 goto none;
866 905
867 vm_flags = vma->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC); 906 anon_vma = reusable_anon_vma(near, near, vma);
868 vm_flags |= near->vm_flags & (VM_READ|VM_WRITE|VM_EXEC); 907 if (anon_vma)
869 908 return anon_vma;
870 if (near->anon_vma && near->vm_end == vma->vm_start &&
871 mpol_equal(vma_policy(near), vma_policy(vma)) &&
872 can_vma_merge_after(near, vm_flags,
873 NULL, vma->vm_file, vma->vm_pgoff))
874 return near->anon_vma;
875none: 909none:
876 /* 910 /*
877 * There's no absolute need to look only at touching neighbours: 911 * There's no absolute need to look only at touching neighbours:
@@ -932,13 +966,9 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
932 if (!(flags & MAP_FIXED)) 966 if (!(flags & MAP_FIXED))
933 addr = round_hint_to_min(addr); 967 addr = round_hint_to_min(addr);
934 968
935 error = arch_mmap_check(addr, len, flags);
936 if (error)
937 return error;
938
939 /* Careful about overflows.. */ 969 /* Careful about overflows.. */
940 len = PAGE_ALIGN(len); 970 len = PAGE_ALIGN(len);
941 if (!len || len > TASK_SIZE) 971 if (!len)
942 return -ENOMEM; 972 return -ENOMEM;
943 973
944 /* offset overflow? */ 974 /* offset overflow? */
@@ -949,24 +979,6 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
949 if (mm->map_count > sysctl_max_map_count) 979 if (mm->map_count > sysctl_max_map_count)
950 return -ENOMEM; 980 return -ENOMEM;
951 981
952 if (flags & MAP_HUGETLB) {
953 struct user_struct *user = NULL;
954 if (file)
955 return -EINVAL;
956
957 /*
958 * VM_NORESERVE is used because the reservations will be
959 * taken when vm_ops->mmap() is called
960 * A dummy user value is used because we are not locking
961 * memory so no accounting is necessary
962 */
963 len = ALIGN(len, huge_page_size(&default_hstate));
964 file = hugetlb_file_setup(HUGETLB_ANON_FILE, len, VM_NORESERVE,
965 &user, HUGETLB_ANONHUGE_INODE);
966 if (IS_ERR(file))
967 return PTR_ERR(file);
968 }
969
970 /* Obtain the address to map to. we verify (or select) it and ensure 982 /* Obtain the address to map to. we verify (or select) it and ensure
971 * that it represents a valid section of the address space. 983 * that it represents a valid section of the address space.
972 */ 984 */
@@ -990,7 +1002,7 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
990 unsigned long locked, lock_limit; 1002 unsigned long locked, lock_limit;
991 locked = len >> PAGE_SHIFT; 1003 locked = len >> PAGE_SHIFT;
992 locked += mm->locked_vm; 1004 locked += mm->locked_vm;
993 lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; 1005 lock_limit = rlimit(RLIMIT_MEMLOCK);
994 lock_limit >>= PAGE_SHIFT; 1006 lock_limit >>= PAGE_SHIFT;
995 if (locked > lock_limit && !capable(CAP_IPC_LOCK)) 1007 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
996 return -EAGAIN; 1008 return -EAGAIN;
@@ -1061,14 +1073,75 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
1061 error = security_file_mmap(file, reqprot, prot, flags, addr, 0); 1073 error = security_file_mmap(file, reqprot, prot, flags, addr, 0);
1062 if (error) 1074 if (error)
1063 return error; 1075 return error;
1064 error = ima_file_mmap(file, prot);
1065 if (error)
1066 return error;
1067 1076
1068 return mmap_region(file, addr, len, flags, vm_flags, pgoff); 1077 return mmap_region(file, addr, len, flags, vm_flags, pgoff);
1069} 1078}
1070EXPORT_SYMBOL(do_mmap_pgoff); 1079EXPORT_SYMBOL(do_mmap_pgoff);
1071 1080
1081SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
1082 unsigned long, prot, unsigned long, flags,
1083 unsigned long, fd, unsigned long, pgoff)
1084{
1085 struct file *file = NULL;
1086 unsigned long retval = -EBADF;
1087
1088 if (!(flags & MAP_ANONYMOUS)) {
1089 if (unlikely(flags & MAP_HUGETLB))
1090 return -EINVAL;
1091 file = fget(fd);
1092 if (!file)
1093 goto out;
1094 } else if (flags & MAP_HUGETLB) {
1095 struct user_struct *user = NULL;
1096 /*
1097 * VM_NORESERVE is used because the reservations will be
1098 * taken when vm_ops->mmap() is called
1099 * A dummy user value is used because we are not locking
1100 * memory so no accounting is necessary
1101 */
1102 len = ALIGN(len, huge_page_size(&default_hstate));
1103 file = hugetlb_file_setup(HUGETLB_ANON_FILE, len, VM_NORESERVE,
1104 &user, HUGETLB_ANONHUGE_INODE);
1105 if (IS_ERR(file))
1106 return PTR_ERR(file);
1107 }
1108
1109 flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
1110
1111 down_write(&current->mm->mmap_sem);
1112 retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
1113 up_write(&current->mm->mmap_sem);
1114
1115 if (file)
1116 fput(file);
1117out:
1118 return retval;
1119}
1120
1121#ifdef __ARCH_WANT_SYS_OLD_MMAP
1122struct mmap_arg_struct {
1123 unsigned long addr;
1124 unsigned long len;
1125 unsigned long prot;
1126 unsigned long flags;
1127 unsigned long fd;
1128 unsigned long offset;
1129};
1130
1131SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
1132{
1133 struct mmap_arg_struct a;
1134
1135 if (copy_from_user(&a, arg, sizeof(a)))
1136 return -EFAULT;
1137 if (a.offset & ~PAGE_MASK)
1138 return -EINVAL;
1139
1140 return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
1141 a.offset >> PAGE_SHIFT);
1142}
1143#endif /* __ARCH_WANT_SYS_OLD_MMAP */
1144
1072/* 1145/*
1073 * Some shared mappigns will want the pages marked read-only 1146 * Some shared mappigns will want the pages marked read-only
1074 * to track write events. If so, we'll downgrade vm_page_prot 1147 * to track write events. If so, we'll downgrade vm_page_prot
@@ -1191,6 +1264,7 @@ munmap_back:
1191 vma->vm_flags = vm_flags; 1264 vma->vm_flags = vm_flags;
1192 vma->vm_page_prot = vm_get_page_prot(vm_flags); 1265 vma->vm_page_prot = vm_get_page_prot(vm_flags);
1193 vma->vm_pgoff = pgoff; 1266 vma->vm_pgoff = pgoff;
1267 INIT_LIST_HEAD(&vma->anon_vma_chain);
1194 1268
1195 if (file) { 1269 if (file) {
1196 error = -EINVAL; 1270 error = -EINVAL;
@@ -1224,8 +1298,20 @@ munmap_back:
1224 goto free_vma; 1298 goto free_vma;
1225 } 1299 }
1226 1300
1227 if (vma_wants_writenotify(vma)) 1301 if (vma_wants_writenotify(vma)) {
1302 pgprot_t pprot = vma->vm_page_prot;
1303
1304 /* Can vma->vm_page_prot have changed??
1305 *
1306 * Answer: Yes, drivers may have changed it in their
1307 * f_op->mmap method.
1308 *
1309 * Ensures that vmas marked as uncached stay that way.
1310 */
1228 vma->vm_page_prot = vm_get_page_prot(vm_flags & ~VM_SHARED); 1311 vma->vm_page_prot = vm_get_page_prot(vm_flags & ~VM_SHARED);
1312 if (pgprot_val(pprot) == pgprot_val(pgprot_noncached(pprot)))
1313 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
1314 }
1229 1315
1230 vma_link(mm, vma, prev, rb_link, rb_parent); 1316 vma_link(mm, vma, prev, rb_link, rb_parent);
1231 file = vma->vm_file; 1317 file = vma->vm_file;
@@ -1239,13 +1325,8 @@ out:
1239 mm->total_vm += len >> PAGE_SHIFT; 1325 mm->total_vm += len >> PAGE_SHIFT;
1240 vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT); 1326 vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
1241 if (vm_flags & VM_LOCKED) { 1327 if (vm_flags & VM_LOCKED) {
1242 /* 1328 if (!mlock_vma_pages_range(vma, addr, addr + len))
1243 * makes pages present; downgrades, drops, reacquires mmap_sem 1329 mm->locked_vm += (len >> PAGE_SHIFT);
1244 */
1245 long nr_pages = mlock_vma_pages_range(vma, addr, addr + len);
1246 if (nr_pages < 0)
1247 return nr_pages; /* vma gone! */
1248 mm->locked_vm += (len >> PAGE_SHIFT) - nr_pages;
1249 } else if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK)) 1330 } else if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK))
1250 make_pages_present(addr, addr + len); 1331 make_pages_present(addr, addr + len);
1251 return addr; 1332 return addr;
@@ -1459,6 +1540,14 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
1459 unsigned long (*get_area)(struct file *, unsigned long, 1540 unsigned long (*get_area)(struct file *, unsigned long,
1460 unsigned long, unsigned long, unsigned long); 1541 unsigned long, unsigned long, unsigned long);
1461 1542
1543 unsigned long error = arch_mmap_check(addr, len, flags);
1544 if (error)
1545 return error;
1546
1547 /* Careful about overflows.. */
1548 if (len > TASK_SIZE)
1549 return -ENOMEM;
1550
1462 get_area = current->mm->get_unmapped_area; 1551 get_area = current->mm->get_unmapped_area;
1463 if (file && file->f_op && file->f_op->get_unmapped_area) 1552 if (file && file->f_op && file->f_op->get_unmapped_area)
1464 get_area = file->f_op->get_unmapped_area; 1553 get_area = file->f_op->get_unmapped_area;
@@ -1565,7 +1654,7 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
1565 return -ENOMEM; 1654 return -ENOMEM;
1566 1655
1567 /* Stack limit test */ 1656 /* Stack limit test */
1568 if (size > rlim[RLIMIT_STACK].rlim_cur) 1657 if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur))
1569 return -ENOMEM; 1658 return -ENOMEM;
1570 1659
1571 /* mlock limit tests */ 1660 /* mlock limit tests */
@@ -1573,7 +1662,8 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
1573 unsigned long locked; 1662 unsigned long locked;
1574 unsigned long limit; 1663 unsigned long limit;
1575 locked = mm->locked_vm + grow; 1664 locked = mm->locked_vm + grow;
1576 limit = rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; 1665 limit = ACCESS_ONCE(rlim[RLIMIT_MEMLOCK].rlim_cur);
1666 limit >>= PAGE_SHIFT;
1577 if (locked > limit && !capable(CAP_IPC_LOCK)) 1667 if (locked > limit && !capable(CAP_IPC_LOCK))
1578 return -ENOMEM; 1668 return -ENOMEM;
1579 } 1669 }
@@ -1720,8 +1810,7 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
1720 if (!prev || expand_stack(prev, addr)) 1810 if (!prev || expand_stack(prev, addr))
1721 return NULL; 1811 return NULL;
1722 if (prev->vm_flags & VM_LOCKED) { 1812 if (prev->vm_flags & VM_LOCKED) {
1723 if (mlock_vma_pages_range(prev, addr, prev->vm_end) < 0) 1813 mlock_vma_pages_range(prev, addr, prev->vm_end);
1724 return NULL; /* vma gone! */
1725 } 1814 }
1726 return prev; 1815 return prev;
1727} 1816}
@@ -1749,8 +1838,7 @@ find_extend_vma(struct mm_struct * mm, unsigned long addr)
1749 if (expand_stack(vma, addr)) 1838 if (expand_stack(vma, addr))
1750 return NULL; 1839 return NULL;
1751 if (vma->vm_flags & VM_LOCKED) { 1840 if (vma->vm_flags & VM_LOCKED) {
1752 if (mlock_vma_pages_range(vma, addr, start) < 0) 1841 mlock_vma_pages_range(vma, addr, start);
1753 return NULL; /* vma gone! */
1754 } 1842 }
1755 return vma; 1843 return vma;
1756} 1844}
@@ -1829,29 +1917,29 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
1829} 1917}
1830 1918
1831/* 1919/*
1832 * Split a vma into two pieces at address 'addr', a new vma is allocated 1920 * __split_vma() bypasses sysctl_max_map_count checking. We use this on the
1833 * either for the first part or the tail. 1921 * munmap path where it doesn't make sense to fail.
1834 */ 1922 */
1835int split_vma(struct mm_struct * mm, struct vm_area_struct * vma, 1923static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
1836 unsigned long addr, int new_below) 1924 unsigned long addr, int new_below)
1837{ 1925{
1838 struct mempolicy *pol; 1926 struct mempolicy *pol;
1839 struct vm_area_struct *new; 1927 struct vm_area_struct *new;
1928 int err = -ENOMEM;
1840 1929
1841 if (is_vm_hugetlb_page(vma) && (addr & 1930 if (is_vm_hugetlb_page(vma) && (addr &
1842 ~(huge_page_mask(hstate_vma(vma))))) 1931 ~(huge_page_mask(hstate_vma(vma)))))
1843 return -EINVAL; 1932 return -EINVAL;
1844 1933
1845 if (mm->map_count >= sysctl_max_map_count)
1846 return -ENOMEM;
1847
1848 new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); 1934 new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
1849 if (!new) 1935 if (!new)
1850 return -ENOMEM; 1936 goto out_err;
1851 1937
1852 /* most fields are the same, copy all, and then fixup */ 1938 /* most fields are the same, copy all, and then fixup */
1853 *new = *vma; 1939 *new = *vma;
1854 1940
1941 INIT_LIST_HEAD(&new->anon_vma_chain);
1942
1855 if (new_below) 1943 if (new_below)
1856 new->vm_end = addr; 1944 new->vm_end = addr;
1857 else { 1945 else {
@@ -1861,11 +1949,14 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
1861 1949
1862 pol = mpol_dup(vma_policy(vma)); 1950 pol = mpol_dup(vma_policy(vma));
1863 if (IS_ERR(pol)) { 1951 if (IS_ERR(pol)) {
1864 kmem_cache_free(vm_area_cachep, new); 1952 err = PTR_ERR(pol);
1865 return PTR_ERR(pol); 1953 goto out_free_vma;
1866 } 1954 }
1867 vma_set_policy(new, pol); 1955 vma_set_policy(new, pol);
1868 1956
1957 if (anon_vma_clone(new, vma))
1958 goto out_free_mpol;
1959
1869 if (new->vm_file) { 1960 if (new->vm_file) {
1870 get_file(new->vm_file); 1961 get_file(new->vm_file);
1871 if (vma->vm_flags & VM_EXECUTABLE) 1962 if (vma->vm_flags & VM_EXECUTABLE)
@@ -1876,12 +1967,42 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
1876 new->vm_ops->open(new); 1967 new->vm_ops->open(new);
1877 1968
1878 if (new_below) 1969 if (new_below)
1879 vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff + 1970 err = vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
1880 ((addr - new->vm_start) >> PAGE_SHIFT), new); 1971 ((addr - new->vm_start) >> PAGE_SHIFT), new);
1881 else 1972 else
1882 vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new); 1973 err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
1883 1974
1884 return 0; 1975 /* Success. */
1976 if (!err)
1977 return 0;
1978
1979 /* Clean everything up if vma_adjust failed. */
1980 if (new->vm_ops && new->vm_ops->close)
1981 new->vm_ops->close(new);
1982 if (new->vm_file) {
1983 if (vma->vm_flags & VM_EXECUTABLE)
1984 removed_exe_file_vma(mm);
1985 fput(new->vm_file);
1986 }
1987 out_free_mpol:
1988 mpol_put(pol);
1989 out_free_vma:
1990 kmem_cache_free(vm_area_cachep, new);
1991 out_err:
1992 return err;
1993}
1994
1995/*
1996 * Split a vma into two pieces at address 'addr', a new vma is allocated
1997 * either for the first part or the tail.
1998 */
1999int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
2000 unsigned long addr, int new_below)
2001{
2002 if (mm->map_count >= sysctl_max_map_count)
2003 return -ENOMEM;
2004
2005 return __split_vma(mm, vma, addr, new_below);
1885} 2006}
1886 2007
1887/* Munmap is split into 2 main parts -- this part which finds 2008/* Munmap is split into 2 main parts -- this part which finds
@@ -1919,7 +2040,17 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
1919 * places tmp vma above, and higher split_vma places tmp vma below. 2040 * places tmp vma above, and higher split_vma places tmp vma below.
1920 */ 2041 */
1921 if (start > vma->vm_start) { 2042 if (start > vma->vm_start) {
1922 int error = split_vma(mm, vma, start, 0); 2043 int error;
2044
2045 /*
2046 * Make sure that map_count on return from munmap() will
2047 * not exceed its limit; but let map_count go just above
2048 * its limit temporarily, to help free resources as expected.
2049 */
2050 if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count)
2051 return -ENOMEM;
2052
2053 error = __split_vma(mm, vma, start, 0);
1923 if (error) 2054 if (error)
1924 return error; 2055 return error;
1925 prev = vma; 2056 prev = vma;
@@ -1928,7 +2059,7 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
1928 /* Does it split the last one? */ 2059 /* Does it split the last one? */
1929 last = find_vma(mm, end); 2060 last = find_vma(mm, end);
1930 if (last && end > last->vm_start) { 2061 if (last && end > last->vm_start) {
1931 int error = split_vma(mm, last, end, 1); 2062 int error = __split_vma(mm, last, end, 1);
1932 if (error) 2063 if (error)
1933 return error; 2064 return error;
1934 } 2065 }
@@ -2003,20 +2134,14 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
2003 if (!len) 2134 if (!len)
2004 return addr; 2135 return addr;
2005 2136
2006 if ((addr + len) > TASK_SIZE || (addr + len) < addr)
2007 return -EINVAL;
2008
2009 if (is_hugepage_only_range(mm, addr, len))
2010 return -EINVAL;
2011
2012 error = security_file_mmap(NULL, 0, 0, 0, addr, 1); 2137 error = security_file_mmap(NULL, 0, 0, 0, addr, 1);
2013 if (error) 2138 if (error)
2014 return error; 2139 return error;
2015 2140
2016 flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags; 2141 flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
2017 2142
2018 error = arch_mmap_check(addr, len, flags); 2143 error = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED);
2019 if (error) 2144 if (error & ~PAGE_MASK)
2020 return error; 2145 return error;
2021 2146
2022 /* 2147 /*
@@ -2026,7 +2151,7 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
2026 unsigned long locked, lock_limit; 2151 unsigned long locked, lock_limit;
2027 locked = len >> PAGE_SHIFT; 2152 locked = len >> PAGE_SHIFT;
2028 locked += mm->locked_vm; 2153 locked += mm->locked_vm;
2029 lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; 2154 lock_limit = rlimit(RLIMIT_MEMLOCK);
2030 lock_limit >>= PAGE_SHIFT; 2155 lock_limit >>= PAGE_SHIFT;
2031 if (locked > lock_limit && !capable(CAP_IPC_LOCK)) 2156 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
2032 return -EAGAIN; 2157 return -EAGAIN;
@@ -2074,6 +2199,7 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
2074 return -ENOMEM; 2199 return -ENOMEM;
2075 } 2200 }
2076 2201
2202 INIT_LIST_HEAD(&vma->anon_vma_chain);
2077 vma->vm_mm = mm; 2203 vma->vm_mm = mm;
2078 vma->vm_start = addr; 2204 vma->vm_start = addr;
2079 vma->vm_end = addr + len; 2205 vma->vm_end = addr + len;
@@ -2210,10 +2336,11 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
2210 if (new_vma) { 2336 if (new_vma) {
2211 *new_vma = *vma; 2337 *new_vma = *vma;
2212 pol = mpol_dup(vma_policy(vma)); 2338 pol = mpol_dup(vma_policy(vma));
2213 if (IS_ERR(pol)) { 2339 if (IS_ERR(pol))
2214 kmem_cache_free(vm_area_cachep, new_vma); 2340 goto out_free_vma;
2215 return NULL; 2341 INIT_LIST_HEAD(&new_vma->anon_vma_chain);
2216 } 2342 if (anon_vma_clone(new_vma, vma))
2343 goto out_free_mempol;
2217 vma_set_policy(new_vma, pol); 2344 vma_set_policy(new_vma, pol);
2218 new_vma->vm_start = addr; 2345 new_vma->vm_start = addr;
2219 new_vma->vm_end = addr + len; 2346 new_vma->vm_end = addr + len;
@@ -2229,6 +2356,12 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
2229 } 2356 }
2230 } 2357 }
2231 return new_vma; 2358 return new_vma;
2359
2360 out_free_mempol:
2361 mpol_put(pol);
2362 out_free_vma:
2363 kmem_cache_free(vm_area_cachep, new_vma);
2364 return NULL;
2232} 2365}
2233 2366
2234/* 2367/*
@@ -2240,7 +2373,7 @@ int may_expand_vm(struct mm_struct *mm, unsigned long npages)
2240 unsigned long cur = mm->total_vm; /* pages */ 2373 unsigned long cur = mm->total_vm; /* pages */
2241 unsigned long lim; 2374 unsigned long lim;
2242 2375
2243 lim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; 2376 lim = rlimit(RLIMIT_AS) >> PAGE_SHIFT;
2244 2377
2245 if (cur + npages > lim) 2378 if (cur + npages > lim)
2246 return 0; 2379 return 0;
@@ -2306,6 +2439,7 @@ int install_special_mapping(struct mm_struct *mm,
2306 if (unlikely(vma == NULL)) 2439 if (unlikely(vma == NULL))
2307 return -ENOMEM; 2440 return -ENOMEM;
2308 2441
2442 INIT_LIST_HEAD(&vma->anon_vma_chain);
2309 vma->vm_mm = mm; 2443 vma->vm_mm = mm;
2310 vma->vm_start = addr; 2444 vma->vm_start = addr;
2311 vma->vm_end = addr + len; 2445 vma->vm_end = addr + len;
@@ -2406,6 +2540,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
2406int mm_take_all_locks(struct mm_struct *mm) 2540int mm_take_all_locks(struct mm_struct *mm)
2407{ 2541{
2408 struct vm_area_struct *vma; 2542 struct vm_area_struct *vma;
2543 struct anon_vma_chain *avc;
2409 int ret = -EINTR; 2544 int ret = -EINTR;
2410 2545
2411 BUG_ON(down_read_trylock(&mm->mmap_sem)); 2546 BUG_ON(down_read_trylock(&mm->mmap_sem));
@@ -2423,7 +2558,8 @@ int mm_take_all_locks(struct mm_struct *mm)
2423 if (signal_pending(current)) 2558 if (signal_pending(current))
2424 goto out_unlock; 2559 goto out_unlock;
2425 if (vma->anon_vma) 2560 if (vma->anon_vma)
2426 vm_lock_anon_vma(mm, vma->anon_vma); 2561 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
2562 vm_lock_anon_vma(mm, avc->anon_vma);
2427 } 2563 }
2428 2564
2429 ret = 0; 2565 ret = 0;
@@ -2478,13 +2614,15 @@ static void vm_unlock_mapping(struct address_space *mapping)
2478void mm_drop_all_locks(struct mm_struct *mm) 2614void mm_drop_all_locks(struct mm_struct *mm)
2479{ 2615{
2480 struct vm_area_struct *vma; 2616 struct vm_area_struct *vma;
2617 struct anon_vma_chain *avc;
2481 2618
2482 BUG_ON(down_read_trylock(&mm->mmap_sem)); 2619 BUG_ON(down_read_trylock(&mm->mmap_sem));
2483 BUG_ON(!mutex_is_locked(&mm_all_locks_mutex)); 2620 BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
2484 2621
2485 for (vma = mm->mmap; vma; vma = vma->vm_next) { 2622 for (vma = mm->mmap; vma; vma = vma->vm_next) {
2486 if (vma->anon_vma) 2623 if (vma->anon_vma)
2487 vm_unlock_anon_vma(vma->anon_vma); 2624 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
2625 vm_unlock_anon_vma(avc->anon_vma);
2488 if (vma->vm_file && vma->vm_file->f_mapping) 2626 if (vma->vm_file && vma->vm_file->f_mapping)
2489 vm_unlock_mapping(vma->vm_file->f_mapping); 2627 vm_unlock_mapping(vma->vm_file->f_mapping);
2490 } 2628 }