aboutsummaryrefslogtreecommitdiffstats
path: root/mm/mmap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/mmap.c')
-rw-r--r--mm/mmap.c311
1 files changed, 209 insertions, 102 deletions
diff --git a/mm/mmap.c b/mm/mmap.c
index 73f5e4b64010..75557c639ad4 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -20,7 +20,6 @@
20#include <linux/fs.h> 20#include <linux/fs.h>
21#include <linux/personality.h> 21#include <linux/personality.h>
22#include <linux/security.h> 22#include <linux/security.h>
23#include <linux/ima.h>
24#include <linux/hugetlb.h> 23#include <linux/hugetlb.h>
25#include <linux/profile.h> 24#include <linux/profile.h>
26#include <linux/module.h> 25#include <linux/module.h>
@@ -266,7 +265,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
266 * segment grow beyond its set limit the in case where the limit is 265 * segment grow beyond its set limit the in case where the limit is
267 * not page aligned -Ram Gupta 266 * not page aligned -Ram Gupta
268 */ 267 */
269 rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur; 268 rlim = rlimit(RLIMIT_DATA);
270 if (rlim < RLIM_INFINITY && (brk - mm->start_brk) + 269 if (rlim < RLIM_INFINITY && (brk - mm->start_brk) +
271 (mm->end_data - mm->start_data) > rlim) 270 (mm->end_data - mm->start_data) > rlim)
272 goto out; 271 goto out;
@@ -438,7 +437,6 @@ __vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
438{ 437{
439 __vma_link_list(mm, vma, prev, rb_parent); 438 __vma_link_list(mm, vma, prev, rb_parent);
440 __vma_link_rb(mm, vma, rb_link, rb_parent); 439 __vma_link_rb(mm, vma, rb_link, rb_parent);
441 __anon_vma_link(vma);
442} 440}
443 441
444static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma, 442static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
@@ -500,7 +498,7 @@ __vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma,
500 * are necessary. The "insert" vma (if any) is to be inserted 498 * are necessary. The "insert" vma (if any) is to be inserted
501 * before we drop the necessary locks. 499 * before we drop the necessary locks.
502 */ 500 */
503void vma_adjust(struct vm_area_struct *vma, unsigned long start, 501int vma_adjust(struct vm_area_struct *vma, unsigned long start,
504 unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert) 502 unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert)
505{ 503{
506 struct mm_struct *mm = vma->vm_mm; 504 struct mm_struct *mm = vma->vm_mm;
@@ -543,6 +541,26 @@ again: remove_next = 1 + (end > next->vm_end);
543 } 541 }
544 } 542 }
545 543
544 /*
545 * When changing only vma->vm_end, we don't really need anon_vma lock.
546 */
547 if (vma->anon_vma && (insert || importer || start != vma->vm_start))
548 anon_vma = vma->anon_vma;
549 if (anon_vma) {
550 /*
551 * Easily overlooked: when mprotect shifts the boundary,
552 * make sure the expanding vma has anon_vma set if the
553 * shrinking vma had, to cover any anon pages imported.
554 */
555 if (importer && !importer->anon_vma) {
556 /* Block reverse map lookups until things are set up. */
557 if (anon_vma_clone(importer, vma)) {
558 return -ENOMEM;
559 }
560 importer->anon_vma = anon_vma;
561 }
562 }
563
546 if (file) { 564 if (file) {
547 mapping = file->f_mapping; 565 mapping = file->f_mapping;
548 if (!(vma->vm_flags & VM_NONLINEAR)) 566 if (!(vma->vm_flags & VM_NONLINEAR))
@@ -568,25 +586,6 @@ again: remove_next = 1 + (end > next->vm_end);
568 } 586 }
569 } 587 }
570 588
571 /*
572 * When changing only vma->vm_end, we don't really need
573 * anon_vma lock.
574 */
575 if (vma->anon_vma && (insert || importer || start != vma->vm_start))
576 anon_vma = vma->anon_vma;
577 if (anon_vma) {
578 spin_lock(&anon_vma->lock);
579 /*
580 * Easily overlooked: when mprotect shifts the boundary,
581 * make sure the expanding vma has anon_vma set if the
582 * shrinking vma had, to cover any anon pages imported.
583 */
584 if (importer && !importer->anon_vma) {
585 importer->anon_vma = anon_vma;
586 __anon_vma_link(importer);
587 }
588 }
589
590 if (root) { 589 if (root) {
591 flush_dcache_mmap_lock(mapping); 590 flush_dcache_mmap_lock(mapping);
592 vma_prio_tree_remove(vma, root); 591 vma_prio_tree_remove(vma, root);
@@ -617,8 +616,6 @@ again: remove_next = 1 + (end > next->vm_end);
617 __vma_unlink(mm, next, vma); 616 __vma_unlink(mm, next, vma);
618 if (file) 617 if (file)
619 __remove_shared_vm_struct(next, file, mapping); 618 __remove_shared_vm_struct(next, file, mapping);
620 if (next->anon_vma)
621 __anon_vma_merge(vma, next);
622 } else if (insert) { 619 } else if (insert) {
623 /* 620 /*
624 * split_vma has split insert from vma, and needs 621 * split_vma has split insert from vma, and needs
@@ -628,8 +625,6 @@ again: remove_next = 1 + (end > next->vm_end);
628 __insert_vm_struct(mm, insert); 625 __insert_vm_struct(mm, insert);
629 } 626 }
630 627
631 if (anon_vma)
632 spin_unlock(&anon_vma->lock);
633 if (mapping) 628 if (mapping)
634 spin_unlock(&mapping->i_mmap_lock); 629 spin_unlock(&mapping->i_mmap_lock);
635 630
@@ -639,6 +634,8 @@ again: remove_next = 1 + (end > next->vm_end);
639 if (next->vm_flags & VM_EXECUTABLE) 634 if (next->vm_flags & VM_EXECUTABLE)
640 removed_exe_file_vma(mm); 635 removed_exe_file_vma(mm);
641 } 636 }
637 if (next->anon_vma)
638 anon_vma_merge(vma, next);
642 mm->map_count--; 639 mm->map_count--;
643 mpol_put(vma_policy(next)); 640 mpol_put(vma_policy(next));
644 kmem_cache_free(vm_area_cachep, next); 641 kmem_cache_free(vm_area_cachep, next);
@@ -654,6 +651,8 @@ again: remove_next = 1 + (end > next->vm_end);
654 } 651 }
655 652
656 validate_mm(mm); 653 validate_mm(mm);
654
655 return 0;
657} 656}
658 657
659/* 658/*
@@ -760,6 +759,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
760{ 759{
761 pgoff_t pglen = (end - addr) >> PAGE_SHIFT; 760 pgoff_t pglen = (end - addr) >> PAGE_SHIFT;
762 struct vm_area_struct *area, *next; 761 struct vm_area_struct *area, *next;
762 int err;
763 763
764 /* 764 /*
765 * We later require that vma->vm_flags == vm_flags, 765 * We later require that vma->vm_flags == vm_flags,
@@ -793,11 +793,13 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
793 is_mergeable_anon_vma(prev->anon_vma, 793 is_mergeable_anon_vma(prev->anon_vma,
794 next->anon_vma)) { 794 next->anon_vma)) {
795 /* cases 1, 6 */ 795 /* cases 1, 6 */
796 vma_adjust(prev, prev->vm_start, 796 err = vma_adjust(prev, prev->vm_start,
797 next->vm_end, prev->vm_pgoff, NULL); 797 next->vm_end, prev->vm_pgoff, NULL);
798 } else /* cases 2, 5, 7 */ 798 } else /* cases 2, 5, 7 */
799 vma_adjust(prev, prev->vm_start, 799 err = vma_adjust(prev, prev->vm_start,
800 end, prev->vm_pgoff, NULL); 800 end, prev->vm_pgoff, NULL);
801 if (err)
802 return NULL;
801 return prev; 803 return prev;
802 } 804 }
803 805
@@ -809,11 +811,13 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
809 can_vma_merge_before(next, vm_flags, 811 can_vma_merge_before(next, vm_flags,
810 anon_vma, file, pgoff+pglen)) { 812 anon_vma, file, pgoff+pglen)) {
811 if (prev && addr < prev->vm_end) /* case 4 */ 813 if (prev && addr < prev->vm_end) /* case 4 */
812 vma_adjust(prev, prev->vm_start, 814 err = vma_adjust(prev, prev->vm_start,
813 addr, prev->vm_pgoff, NULL); 815 addr, prev->vm_pgoff, NULL);
814 else /* cases 3, 8 */ 816 else /* cases 3, 8 */
815 vma_adjust(area, addr, next->vm_end, 817 err = vma_adjust(area, addr, next->vm_end,
816 next->vm_pgoff - pglen, NULL); 818 next->vm_pgoff - pglen, NULL);
819 if (err)
820 return NULL;
817 return area; 821 return area;
818 } 822 }
819 823
@@ -932,13 +936,9 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
932 if (!(flags & MAP_FIXED)) 936 if (!(flags & MAP_FIXED))
933 addr = round_hint_to_min(addr); 937 addr = round_hint_to_min(addr);
934 938
935 error = arch_mmap_check(addr, len, flags);
936 if (error)
937 return error;
938
939 /* Careful about overflows.. */ 939 /* Careful about overflows.. */
940 len = PAGE_ALIGN(len); 940 len = PAGE_ALIGN(len);
941 if (!len || len > TASK_SIZE) 941 if (!len)
942 return -ENOMEM; 942 return -ENOMEM;
943 943
944 /* offset overflow? */ 944 /* offset overflow? */
@@ -949,24 +949,6 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
949 if (mm->map_count > sysctl_max_map_count) 949 if (mm->map_count > sysctl_max_map_count)
950 return -ENOMEM; 950 return -ENOMEM;
951 951
952 if (flags & MAP_HUGETLB) {
953 struct user_struct *user = NULL;
954 if (file)
955 return -EINVAL;
956
957 /*
958 * VM_NORESERVE is used because the reservations will be
959 * taken when vm_ops->mmap() is called
960 * A dummy user value is used because we are not locking
961 * memory so no accounting is necessary
962 */
963 len = ALIGN(len, huge_page_size(&default_hstate));
964 file = hugetlb_file_setup(HUGETLB_ANON_FILE, len, VM_NORESERVE,
965 &user, HUGETLB_ANONHUGE_INODE);
966 if (IS_ERR(file))
967 return PTR_ERR(file);
968 }
969
970 /* Obtain the address to map to. we verify (or select) it and ensure 952 /* Obtain the address to map to. we verify (or select) it and ensure
971 * that it represents a valid section of the address space. 953 * that it represents a valid section of the address space.
972 */ 954 */
@@ -990,7 +972,7 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
990 unsigned long locked, lock_limit; 972 unsigned long locked, lock_limit;
991 locked = len >> PAGE_SHIFT; 973 locked = len >> PAGE_SHIFT;
992 locked += mm->locked_vm; 974 locked += mm->locked_vm;
993 lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; 975 lock_limit = rlimit(RLIMIT_MEMLOCK);
994 lock_limit >>= PAGE_SHIFT; 976 lock_limit >>= PAGE_SHIFT;
995 if (locked > lock_limit && !capable(CAP_IPC_LOCK)) 977 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
996 return -EAGAIN; 978 return -EAGAIN;
@@ -1061,14 +1043,75 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
1061 error = security_file_mmap(file, reqprot, prot, flags, addr, 0); 1043 error = security_file_mmap(file, reqprot, prot, flags, addr, 0);
1062 if (error) 1044 if (error)
1063 return error; 1045 return error;
1064 error = ima_file_mmap(file, prot);
1065 if (error)
1066 return error;
1067 1046
1068 return mmap_region(file, addr, len, flags, vm_flags, pgoff); 1047 return mmap_region(file, addr, len, flags, vm_flags, pgoff);
1069} 1048}
1070EXPORT_SYMBOL(do_mmap_pgoff); 1049EXPORT_SYMBOL(do_mmap_pgoff);
1071 1050
1051SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
1052 unsigned long, prot, unsigned long, flags,
1053 unsigned long, fd, unsigned long, pgoff)
1054{
1055 struct file *file = NULL;
1056 unsigned long retval = -EBADF;
1057
1058 if (!(flags & MAP_ANONYMOUS)) {
1059 if (unlikely(flags & MAP_HUGETLB))
1060 return -EINVAL;
1061 file = fget(fd);
1062 if (!file)
1063 goto out;
1064 } else if (flags & MAP_HUGETLB) {
1065 struct user_struct *user = NULL;
1066 /*
1067 * VM_NORESERVE is used because the reservations will be
1068 * taken when vm_ops->mmap() is called
1069 * A dummy user value is used because we are not locking
1070 * memory so no accounting is necessary
1071 */
1072 len = ALIGN(len, huge_page_size(&default_hstate));
1073 file = hugetlb_file_setup(HUGETLB_ANON_FILE, len, VM_NORESERVE,
1074 &user, HUGETLB_ANONHUGE_INODE);
1075 if (IS_ERR(file))
1076 return PTR_ERR(file);
1077 }
1078
1079 flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
1080
1081 down_write(&current->mm->mmap_sem);
1082 retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
1083 up_write(&current->mm->mmap_sem);
1084
1085 if (file)
1086 fput(file);
1087out:
1088 return retval;
1089}
1090
1091#ifdef __ARCH_WANT_SYS_OLD_MMAP
1092struct mmap_arg_struct {
1093 unsigned long addr;
1094 unsigned long len;
1095 unsigned long prot;
1096 unsigned long flags;
1097 unsigned long fd;
1098 unsigned long offset;
1099};
1100
1101SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
1102{
1103 struct mmap_arg_struct a;
1104
1105 if (copy_from_user(&a, arg, sizeof(a)))
1106 return -EFAULT;
1107 if (a.offset & ~PAGE_MASK)
1108 return -EINVAL;
1109
1110 return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
1111 a.offset >> PAGE_SHIFT);
1112}
1113#endif /* __ARCH_WANT_SYS_OLD_MMAP */
1114
1072/* 1115/*
1073 * Some shared mappigns will want the pages marked read-only 1116 * Some shared mappigns will want the pages marked read-only
1074 * to track write events. If so, we'll downgrade vm_page_prot 1117 * to track write events. If so, we'll downgrade vm_page_prot
@@ -1191,6 +1234,7 @@ munmap_back:
1191 vma->vm_flags = vm_flags; 1234 vma->vm_flags = vm_flags;
1192 vma->vm_page_prot = vm_get_page_prot(vm_flags); 1235 vma->vm_page_prot = vm_get_page_prot(vm_flags);
1193 vma->vm_pgoff = pgoff; 1236 vma->vm_pgoff = pgoff;
1237 INIT_LIST_HEAD(&vma->anon_vma_chain);
1194 1238
1195 if (file) { 1239 if (file) {
1196 error = -EINVAL; 1240 error = -EINVAL;
@@ -1224,8 +1268,20 @@ munmap_back:
1224 goto free_vma; 1268 goto free_vma;
1225 } 1269 }
1226 1270
1227 if (vma_wants_writenotify(vma)) 1271 if (vma_wants_writenotify(vma)) {
1272 pgprot_t pprot = vma->vm_page_prot;
1273
1274 /* Can vma->vm_page_prot have changed??
1275 *
1276 * Answer: Yes, drivers may have changed it in their
1277 * f_op->mmap method.
1278 *
1279 * Ensures that vmas marked as uncached stay that way.
1280 */
1228 vma->vm_page_prot = vm_get_page_prot(vm_flags & ~VM_SHARED); 1281 vma->vm_page_prot = vm_get_page_prot(vm_flags & ~VM_SHARED);
1282 if (pgprot_val(pprot) == pgprot_val(pgprot_noncached(pprot)))
1283 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
1284 }
1229 1285
1230 vma_link(mm, vma, prev, rb_link, rb_parent); 1286 vma_link(mm, vma, prev, rb_link, rb_parent);
1231 file = vma->vm_file; 1287 file = vma->vm_file;
@@ -1239,13 +1295,8 @@ out:
1239 mm->total_vm += len >> PAGE_SHIFT; 1295 mm->total_vm += len >> PAGE_SHIFT;
1240 vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT); 1296 vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
1241 if (vm_flags & VM_LOCKED) { 1297 if (vm_flags & VM_LOCKED) {
1242 /* 1298 if (!mlock_vma_pages_range(vma, addr, addr + len))
1243 * makes pages present; downgrades, drops, reacquires mmap_sem 1299 mm->locked_vm += (len >> PAGE_SHIFT);
1244 */
1245 long nr_pages = mlock_vma_pages_range(vma, addr, addr + len);
1246 if (nr_pages < 0)
1247 return nr_pages; /* vma gone! */
1248 mm->locked_vm += (len >> PAGE_SHIFT) - nr_pages;
1249 } else if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK)) 1300 } else if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK))
1250 make_pages_present(addr, addr + len); 1301 make_pages_present(addr, addr + len);
1251 return addr; 1302 return addr;
@@ -1459,6 +1510,14 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
1459 unsigned long (*get_area)(struct file *, unsigned long, 1510 unsigned long (*get_area)(struct file *, unsigned long,
1460 unsigned long, unsigned long, unsigned long); 1511 unsigned long, unsigned long, unsigned long);
1461 1512
1513 unsigned long error = arch_mmap_check(addr, len, flags);
1514 if (error)
1515 return error;
1516
1517 /* Careful about overflows.. */
1518 if (len > TASK_SIZE)
1519 return -ENOMEM;
1520
1462 get_area = current->mm->get_unmapped_area; 1521 get_area = current->mm->get_unmapped_area;
1463 if (file && file->f_op && file->f_op->get_unmapped_area) 1522 if (file && file->f_op && file->f_op->get_unmapped_area)
1464 get_area = file->f_op->get_unmapped_area; 1523 get_area = file->f_op->get_unmapped_area;
@@ -1565,7 +1624,7 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
1565 return -ENOMEM; 1624 return -ENOMEM;
1566 1625
1567 /* Stack limit test */ 1626 /* Stack limit test */
1568 if (size > rlim[RLIMIT_STACK].rlim_cur) 1627 if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur))
1569 return -ENOMEM; 1628 return -ENOMEM;
1570 1629
1571 /* mlock limit tests */ 1630 /* mlock limit tests */
@@ -1573,7 +1632,8 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns
1573 unsigned long locked; 1632 unsigned long locked;
1574 unsigned long limit; 1633 unsigned long limit;
1575 locked = mm->locked_vm + grow; 1634 locked = mm->locked_vm + grow;
1576 limit = rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; 1635 limit = ACCESS_ONCE(rlim[RLIMIT_MEMLOCK].rlim_cur);
1636 limit >>= PAGE_SHIFT;
1577 if (locked > limit && !capable(CAP_IPC_LOCK)) 1637 if (locked > limit && !capable(CAP_IPC_LOCK))
1578 return -ENOMEM; 1638 return -ENOMEM;
1579 } 1639 }
@@ -1720,8 +1780,7 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
1720 if (!prev || expand_stack(prev, addr)) 1780 if (!prev || expand_stack(prev, addr))
1721 return NULL; 1781 return NULL;
1722 if (prev->vm_flags & VM_LOCKED) { 1782 if (prev->vm_flags & VM_LOCKED) {
1723 if (mlock_vma_pages_range(prev, addr, prev->vm_end) < 0) 1783 mlock_vma_pages_range(prev, addr, prev->vm_end);
1724 return NULL; /* vma gone! */
1725 } 1784 }
1726 return prev; 1785 return prev;
1727} 1786}
@@ -1749,8 +1808,7 @@ find_extend_vma(struct mm_struct * mm, unsigned long addr)
1749 if (expand_stack(vma, addr)) 1808 if (expand_stack(vma, addr))
1750 return NULL; 1809 return NULL;
1751 if (vma->vm_flags & VM_LOCKED) { 1810 if (vma->vm_flags & VM_LOCKED) {
1752 if (mlock_vma_pages_range(vma, addr, start) < 0) 1811 mlock_vma_pages_range(vma, addr, start);
1753 return NULL; /* vma gone! */
1754 } 1812 }
1755 return vma; 1813 return vma;
1756} 1814}
@@ -1829,29 +1887,29 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma,
1829} 1887}
1830 1888
1831/* 1889/*
1832 * Split a vma into two pieces at address 'addr', a new vma is allocated 1890 * __split_vma() bypasses sysctl_max_map_count checking. We use this on the
1833 * either for the first part or the tail. 1891 * munmap path where it doesn't make sense to fail.
1834 */ 1892 */
1835int split_vma(struct mm_struct * mm, struct vm_area_struct * vma, 1893static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
1836 unsigned long addr, int new_below) 1894 unsigned long addr, int new_below)
1837{ 1895{
1838 struct mempolicy *pol; 1896 struct mempolicy *pol;
1839 struct vm_area_struct *new; 1897 struct vm_area_struct *new;
1898 int err = -ENOMEM;
1840 1899
1841 if (is_vm_hugetlb_page(vma) && (addr & 1900 if (is_vm_hugetlb_page(vma) && (addr &
1842 ~(huge_page_mask(hstate_vma(vma))))) 1901 ~(huge_page_mask(hstate_vma(vma)))))
1843 return -EINVAL; 1902 return -EINVAL;
1844 1903
1845 if (mm->map_count >= sysctl_max_map_count)
1846 return -ENOMEM;
1847
1848 new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); 1904 new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
1849 if (!new) 1905 if (!new)
1850 return -ENOMEM; 1906 goto out_err;
1851 1907
1852 /* most fields are the same, copy all, and then fixup */ 1908 /* most fields are the same, copy all, and then fixup */
1853 *new = *vma; 1909 *new = *vma;
1854 1910
1911 INIT_LIST_HEAD(&new->anon_vma_chain);
1912
1855 if (new_below) 1913 if (new_below)
1856 new->vm_end = addr; 1914 new->vm_end = addr;
1857 else { 1915 else {
@@ -1861,11 +1919,14 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
1861 1919
1862 pol = mpol_dup(vma_policy(vma)); 1920 pol = mpol_dup(vma_policy(vma));
1863 if (IS_ERR(pol)) { 1921 if (IS_ERR(pol)) {
1864 kmem_cache_free(vm_area_cachep, new); 1922 err = PTR_ERR(pol);
1865 return PTR_ERR(pol); 1923 goto out_free_vma;
1866 } 1924 }
1867 vma_set_policy(new, pol); 1925 vma_set_policy(new, pol);
1868 1926
1927 if (anon_vma_clone(new, vma))
1928 goto out_free_mpol;
1929
1869 if (new->vm_file) { 1930 if (new->vm_file) {
1870 get_file(new->vm_file); 1931 get_file(new->vm_file);
1871 if (vma->vm_flags & VM_EXECUTABLE) 1932 if (vma->vm_flags & VM_EXECUTABLE)
@@ -1876,12 +1937,41 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
1876 new->vm_ops->open(new); 1937 new->vm_ops->open(new);
1877 1938
1878 if (new_below) 1939 if (new_below)
1879 vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff + 1940 err = vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
1880 ((addr - new->vm_start) >> PAGE_SHIFT), new); 1941 ((addr - new->vm_start) >> PAGE_SHIFT), new);
1881 else 1942 else
1882 vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new); 1943 err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
1883 1944
1884 return 0; 1945 /* Success. */
1946 if (!err)
1947 return 0;
1948
1949 /* Clean everything up if vma_adjust failed. */
1950 new->vm_ops->close(new);
1951 if (new->vm_file) {
1952 if (vma->vm_flags & VM_EXECUTABLE)
1953 removed_exe_file_vma(mm);
1954 fput(new->vm_file);
1955 }
1956 out_free_mpol:
1957 mpol_put(pol);
1958 out_free_vma:
1959 kmem_cache_free(vm_area_cachep, new);
1960 out_err:
1961 return err;
1962}
1963
1964/*
1965 * Split a vma into two pieces at address 'addr', a new vma is allocated
1966 * either for the first part or the tail.
1967 */
1968int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
1969 unsigned long addr, int new_below)
1970{
1971 if (mm->map_count >= sysctl_max_map_count)
1972 return -ENOMEM;
1973
1974 return __split_vma(mm, vma, addr, new_below);
1885} 1975}
1886 1976
1887/* Munmap is split into 2 main parts -- this part which finds 1977/* Munmap is split into 2 main parts -- this part which finds
@@ -1919,7 +2009,17 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
1919 * places tmp vma above, and higher split_vma places tmp vma below. 2009 * places tmp vma above, and higher split_vma places tmp vma below.
1920 */ 2010 */
1921 if (start > vma->vm_start) { 2011 if (start > vma->vm_start) {
1922 int error = split_vma(mm, vma, start, 0); 2012 int error;
2013
2014 /*
2015 * Make sure that map_count on return from munmap() will
2016 * not exceed its limit; but let map_count go just above
2017 * its limit temporarily, to help free resources as expected.
2018 */
2019 if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count)
2020 return -ENOMEM;
2021
2022 error = __split_vma(mm, vma, start, 0);
1923 if (error) 2023 if (error)
1924 return error; 2024 return error;
1925 prev = vma; 2025 prev = vma;
@@ -1928,7 +2028,7 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
1928 /* Does it split the last one? */ 2028 /* Does it split the last one? */
1929 last = find_vma(mm, end); 2029 last = find_vma(mm, end);
1930 if (last && end > last->vm_start) { 2030 if (last && end > last->vm_start) {
1931 int error = split_vma(mm, last, end, 1); 2031 int error = __split_vma(mm, last, end, 1);
1932 if (error) 2032 if (error)
1933 return error; 2033 return error;
1934 } 2034 }
@@ -2003,20 +2103,14 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
2003 if (!len) 2103 if (!len)
2004 return addr; 2104 return addr;
2005 2105
2006 if ((addr + len) > TASK_SIZE || (addr + len) < addr)
2007 return -EINVAL;
2008
2009 if (is_hugepage_only_range(mm, addr, len))
2010 return -EINVAL;
2011
2012 error = security_file_mmap(NULL, 0, 0, 0, addr, 1); 2106 error = security_file_mmap(NULL, 0, 0, 0, addr, 1);
2013 if (error) 2107 if (error)
2014 return error; 2108 return error;
2015 2109
2016 flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags; 2110 flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
2017 2111
2018 error = arch_mmap_check(addr, len, flags); 2112 error = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED);
2019 if (error) 2113 if (error & ~PAGE_MASK)
2020 return error; 2114 return error;
2021 2115
2022 /* 2116 /*
@@ -2026,7 +2120,7 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
2026 unsigned long locked, lock_limit; 2120 unsigned long locked, lock_limit;
2027 locked = len >> PAGE_SHIFT; 2121 locked = len >> PAGE_SHIFT;
2028 locked += mm->locked_vm; 2122 locked += mm->locked_vm;
2029 lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; 2123 lock_limit = rlimit(RLIMIT_MEMLOCK);
2030 lock_limit >>= PAGE_SHIFT; 2124 lock_limit >>= PAGE_SHIFT;
2031 if (locked > lock_limit && !capable(CAP_IPC_LOCK)) 2125 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
2032 return -EAGAIN; 2126 return -EAGAIN;
@@ -2074,6 +2168,7 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
2074 return -ENOMEM; 2168 return -ENOMEM;
2075 } 2169 }
2076 2170
2171 INIT_LIST_HEAD(&vma->anon_vma_chain);
2077 vma->vm_mm = mm; 2172 vma->vm_mm = mm;
2078 vma->vm_start = addr; 2173 vma->vm_start = addr;
2079 vma->vm_end = addr + len; 2174 vma->vm_end = addr + len;
@@ -2210,10 +2305,11 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
2210 if (new_vma) { 2305 if (new_vma) {
2211 *new_vma = *vma; 2306 *new_vma = *vma;
2212 pol = mpol_dup(vma_policy(vma)); 2307 pol = mpol_dup(vma_policy(vma));
2213 if (IS_ERR(pol)) { 2308 if (IS_ERR(pol))
2214 kmem_cache_free(vm_area_cachep, new_vma); 2309 goto out_free_vma;
2215 return NULL; 2310 INIT_LIST_HEAD(&new_vma->anon_vma_chain);
2216 } 2311 if (anon_vma_clone(new_vma, vma))
2312 goto out_free_mempol;
2217 vma_set_policy(new_vma, pol); 2313 vma_set_policy(new_vma, pol);
2218 new_vma->vm_start = addr; 2314 new_vma->vm_start = addr;
2219 new_vma->vm_end = addr + len; 2315 new_vma->vm_end = addr + len;
@@ -2229,6 +2325,12 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
2229 } 2325 }
2230 } 2326 }
2231 return new_vma; 2327 return new_vma;
2328
2329 out_free_mempol:
2330 mpol_put(pol);
2331 out_free_vma:
2332 kmem_cache_free(vm_area_cachep, new_vma);
2333 return NULL;
2232} 2334}
2233 2335
2234/* 2336/*
@@ -2240,7 +2342,7 @@ int may_expand_vm(struct mm_struct *mm, unsigned long npages)
2240 unsigned long cur = mm->total_vm; /* pages */ 2342 unsigned long cur = mm->total_vm; /* pages */
2241 unsigned long lim; 2343 unsigned long lim;
2242 2344
2243 lim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; 2345 lim = rlimit(RLIMIT_AS) >> PAGE_SHIFT;
2244 2346
2245 if (cur + npages > lim) 2347 if (cur + npages > lim)
2246 return 0; 2348 return 0;
@@ -2306,6 +2408,7 @@ int install_special_mapping(struct mm_struct *mm,
2306 if (unlikely(vma == NULL)) 2408 if (unlikely(vma == NULL))
2307 return -ENOMEM; 2409 return -ENOMEM;
2308 2410
2411 INIT_LIST_HEAD(&vma->anon_vma_chain);
2309 vma->vm_mm = mm; 2412 vma->vm_mm = mm;
2310 vma->vm_start = addr; 2413 vma->vm_start = addr;
2311 vma->vm_end = addr + len; 2414 vma->vm_end = addr + len;
@@ -2406,6 +2509,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
2406int mm_take_all_locks(struct mm_struct *mm) 2509int mm_take_all_locks(struct mm_struct *mm)
2407{ 2510{
2408 struct vm_area_struct *vma; 2511 struct vm_area_struct *vma;
2512 struct anon_vma_chain *avc;
2409 int ret = -EINTR; 2513 int ret = -EINTR;
2410 2514
2411 BUG_ON(down_read_trylock(&mm->mmap_sem)); 2515 BUG_ON(down_read_trylock(&mm->mmap_sem));
@@ -2423,7 +2527,8 @@ int mm_take_all_locks(struct mm_struct *mm)
2423 if (signal_pending(current)) 2527 if (signal_pending(current))
2424 goto out_unlock; 2528 goto out_unlock;
2425 if (vma->anon_vma) 2529 if (vma->anon_vma)
2426 vm_lock_anon_vma(mm, vma->anon_vma); 2530 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
2531 vm_lock_anon_vma(mm, avc->anon_vma);
2427 } 2532 }
2428 2533
2429 ret = 0; 2534 ret = 0;
@@ -2478,13 +2583,15 @@ static void vm_unlock_mapping(struct address_space *mapping)
2478void mm_drop_all_locks(struct mm_struct *mm) 2583void mm_drop_all_locks(struct mm_struct *mm)
2479{ 2584{
2480 struct vm_area_struct *vma; 2585 struct vm_area_struct *vma;
2586 struct anon_vma_chain *avc;
2481 2587
2482 BUG_ON(down_read_trylock(&mm->mmap_sem)); 2588 BUG_ON(down_read_trylock(&mm->mmap_sem));
2483 BUG_ON(!mutex_is_locked(&mm_all_locks_mutex)); 2589 BUG_ON(!mutex_is_locked(&mm_all_locks_mutex));
2484 2590
2485 for (vma = mm->mmap; vma; vma = vma->vm_next) { 2591 for (vma = mm->mmap; vma; vma = vma->vm_next) {
2486 if (vma->anon_vma) 2592 if (vma->anon_vma)
2487 vm_unlock_anon_vma(vma->anon_vma); 2593 list_for_each_entry(avc, &vma->anon_vma_chain, same_vma)
2594 vm_unlock_anon_vma(avc->anon_vma);
2488 if (vma->vm_file && vma->vm_file->f_mapping) 2595 if (vma->vm_file && vma->vm_file->f_mapping)
2489 vm_unlock_mapping(vma->vm_file->f_mapping); 2596 vm_unlock_mapping(vma->vm_file->f_mapping);
2490 } 2597 }