diff options
Diffstat (limited to 'mm/mmap.c')
-rw-r--r-- | mm/mmap.c | 311 |
1 files changed, 209 insertions, 102 deletions
@@ -20,7 +20,6 @@ | |||
20 | #include <linux/fs.h> | 20 | #include <linux/fs.h> |
21 | #include <linux/personality.h> | 21 | #include <linux/personality.h> |
22 | #include <linux/security.h> | 22 | #include <linux/security.h> |
23 | #include <linux/ima.h> | ||
24 | #include <linux/hugetlb.h> | 23 | #include <linux/hugetlb.h> |
25 | #include <linux/profile.h> | 24 | #include <linux/profile.h> |
26 | #include <linux/module.h> | 25 | #include <linux/module.h> |
@@ -266,7 +265,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) | |||
266 | * segment grow beyond its set limit the in case where the limit is | 265 | * segment grow beyond its set limit the in case where the limit is |
267 | * not page aligned -Ram Gupta | 266 | * not page aligned -Ram Gupta |
268 | */ | 267 | */ |
269 | rlim = current->signal->rlim[RLIMIT_DATA].rlim_cur; | 268 | rlim = rlimit(RLIMIT_DATA); |
270 | if (rlim < RLIM_INFINITY && (brk - mm->start_brk) + | 269 | if (rlim < RLIM_INFINITY && (brk - mm->start_brk) + |
271 | (mm->end_data - mm->start_data) > rlim) | 270 | (mm->end_data - mm->start_data) > rlim) |
272 | goto out; | 271 | goto out; |
@@ -438,7 +437,6 @@ __vma_link(struct mm_struct *mm, struct vm_area_struct *vma, | |||
438 | { | 437 | { |
439 | __vma_link_list(mm, vma, prev, rb_parent); | 438 | __vma_link_list(mm, vma, prev, rb_parent); |
440 | __vma_link_rb(mm, vma, rb_link, rb_parent); | 439 | __vma_link_rb(mm, vma, rb_link, rb_parent); |
441 | __anon_vma_link(vma); | ||
442 | } | 440 | } |
443 | 441 | ||
444 | static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma, | 442 | static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma, |
@@ -500,7 +498,7 @@ __vma_unlink(struct mm_struct *mm, struct vm_area_struct *vma, | |||
500 | * are necessary. The "insert" vma (if any) is to be inserted | 498 | * are necessary. The "insert" vma (if any) is to be inserted |
501 | * before we drop the necessary locks. | 499 | * before we drop the necessary locks. |
502 | */ | 500 | */ |
503 | void vma_adjust(struct vm_area_struct *vma, unsigned long start, | 501 | int vma_adjust(struct vm_area_struct *vma, unsigned long start, |
504 | unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert) | 502 | unsigned long end, pgoff_t pgoff, struct vm_area_struct *insert) |
505 | { | 503 | { |
506 | struct mm_struct *mm = vma->vm_mm; | 504 | struct mm_struct *mm = vma->vm_mm; |
@@ -543,6 +541,26 @@ again: remove_next = 1 + (end > next->vm_end); | |||
543 | } | 541 | } |
544 | } | 542 | } |
545 | 543 | ||
544 | /* | ||
545 | * When changing only vma->vm_end, we don't really need anon_vma lock. | ||
546 | */ | ||
547 | if (vma->anon_vma && (insert || importer || start != vma->vm_start)) | ||
548 | anon_vma = vma->anon_vma; | ||
549 | if (anon_vma) { | ||
550 | /* | ||
551 | * Easily overlooked: when mprotect shifts the boundary, | ||
552 | * make sure the expanding vma has anon_vma set if the | ||
553 | * shrinking vma had, to cover any anon pages imported. | ||
554 | */ | ||
555 | if (importer && !importer->anon_vma) { | ||
556 | /* Block reverse map lookups until things are set up. */ | ||
557 | if (anon_vma_clone(importer, vma)) { | ||
558 | return -ENOMEM; | ||
559 | } | ||
560 | importer->anon_vma = anon_vma; | ||
561 | } | ||
562 | } | ||
563 | |||
546 | if (file) { | 564 | if (file) { |
547 | mapping = file->f_mapping; | 565 | mapping = file->f_mapping; |
548 | if (!(vma->vm_flags & VM_NONLINEAR)) | 566 | if (!(vma->vm_flags & VM_NONLINEAR)) |
@@ -568,25 +586,6 @@ again: remove_next = 1 + (end > next->vm_end); | |||
568 | } | 586 | } |
569 | } | 587 | } |
570 | 588 | ||
571 | /* | ||
572 | * When changing only vma->vm_end, we don't really need | ||
573 | * anon_vma lock. | ||
574 | */ | ||
575 | if (vma->anon_vma && (insert || importer || start != vma->vm_start)) | ||
576 | anon_vma = vma->anon_vma; | ||
577 | if (anon_vma) { | ||
578 | spin_lock(&anon_vma->lock); | ||
579 | /* | ||
580 | * Easily overlooked: when mprotect shifts the boundary, | ||
581 | * make sure the expanding vma has anon_vma set if the | ||
582 | * shrinking vma had, to cover any anon pages imported. | ||
583 | */ | ||
584 | if (importer && !importer->anon_vma) { | ||
585 | importer->anon_vma = anon_vma; | ||
586 | __anon_vma_link(importer); | ||
587 | } | ||
588 | } | ||
589 | |||
590 | if (root) { | 589 | if (root) { |
591 | flush_dcache_mmap_lock(mapping); | 590 | flush_dcache_mmap_lock(mapping); |
592 | vma_prio_tree_remove(vma, root); | 591 | vma_prio_tree_remove(vma, root); |
@@ -617,8 +616,6 @@ again: remove_next = 1 + (end > next->vm_end); | |||
617 | __vma_unlink(mm, next, vma); | 616 | __vma_unlink(mm, next, vma); |
618 | if (file) | 617 | if (file) |
619 | __remove_shared_vm_struct(next, file, mapping); | 618 | __remove_shared_vm_struct(next, file, mapping); |
620 | if (next->anon_vma) | ||
621 | __anon_vma_merge(vma, next); | ||
622 | } else if (insert) { | 619 | } else if (insert) { |
623 | /* | 620 | /* |
624 | * split_vma has split insert from vma, and needs | 621 | * split_vma has split insert from vma, and needs |
@@ -628,8 +625,6 @@ again: remove_next = 1 + (end > next->vm_end); | |||
628 | __insert_vm_struct(mm, insert); | 625 | __insert_vm_struct(mm, insert); |
629 | } | 626 | } |
630 | 627 | ||
631 | if (anon_vma) | ||
632 | spin_unlock(&anon_vma->lock); | ||
633 | if (mapping) | 628 | if (mapping) |
634 | spin_unlock(&mapping->i_mmap_lock); | 629 | spin_unlock(&mapping->i_mmap_lock); |
635 | 630 | ||
@@ -639,6 +634,8 @@ again: remove_next = 1 + (end > next->vm_end); | |||
639 | if (next->vm_flags & VM_EXECUTABLE) | 634 | if (next->vm_flags & VM_EXECUTABLE) |
640 | removed_exe_file_vma(mm); | 635 | removed_exe_file_vma(mm); |
641 | } | 636 | } |
637 | if (next->anon_vma) | ||
638 | anon_vma_merge(vma, next); | ||
642 | mm->map_count--; | 639 | mm->map_count--; |
643 | mpol_put(vma_policy(next)); | 640 | mpol_put(vma_policy(next)); |
644 | kmem_cache_free(vm_area_cachep, next); | 641 | kmem_cache_free(vm_area_cachep, next); |
@@ -654,6 +651,8 @@ again: remove_next = 1 + (end > next->vm_end); | |||
654 | } | 651 | } |
655 | 652 | ||
656 | validate_mm(mm); | 653 | validate_mm(mm); |
654 | |||
655 | return 0; | ||
657 | } | 656 | } |
658 | 657 | ||
659 | /* | 658 | /* |
@@ -760,6 +759,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, | |||
760 | { | 759 | { |
761 | pgoff_t pglen = (end - addr) >> PAGE_SHIFT; | 760 | pgoff_t pglen = (end - addr) >> PAGE_SHIFT; |
762 | struct vm_area_struct *area, *next; | 761 | struct vm_area_struct *area, *next; |
762 | int err; | ||
763 | 763 | ||
764 | /* | 764 | /* |
765 | * We later require that vma->vm_flags == vm_flags, | 765 | * We later require that vma->vm_flags == vm_flags, |
@@ -793,11 +793,13 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, | |||
793 | is_mergeable_anon_vma(prev->anon_vma, | 793 | is_mergeable_anon_vma(prev->anon_vma, |
794 | next->anon_vma)) { | 794 | next->anon_vma)) { |
795 | /* cases 1, 6 */ | 795 | /* cases 1, 6 */ |
796 | vma_adjust(prev, prev->vm_start, | 796 | err = vma_adjust(prev, prev->vm_start, |
797 | next->vm_end, prev->vm_pgoff, NULL); | 797 | next->vm_end, prev->vm_pgoff, NULL); |
798 | } else /* cases 2, 5, 7 */ | 798 | } else /* cases 2, 5, 7 */ |
799 | vma_adjust(prev, prev->vm_start, | 799 | err = vma_adjust(prev, prev->vm_start, |
800 | end, prev->vm_pgoff, NULL); | 800 | end, prev->vm_pgoff, NULL); |
801 | if (err) | ||
802 | return NULL; | ||
801 | return prev; | 803 | return prev; |
802 | } | 804 | } |
803 | 805 | ||
@@ -809,11 +811,13 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, | |||
809 | can_vma_merge_before(next, vm_flags, | 811 | can_vma_merge_before(next, vm_flags, |
810 | anon_vma, file, pgoff+pglen)) { | 812 | anon_vma, file, pgoff+pglen)) { |
811 | if (prev && addr < prev->vm_end) /* case 4 */ | 813 | if (prev && addr < prev->vm_end) /* case 4 */ |
812 | vma_adjust(prev, prev->vm_start, | 814 | err = vma_adjust(prev, prev->vm_start, |
813 | addr, prev->vm_pgoff, NULL); | 815 | addr, prev->vm_pgoff, NULL); |
814 | else /* cases 3, 8 */ | 816 | else /* cases 3, 8 */ |
815 | vma_adjust(area, addr, next->vm_end, | 817 | err = vma_adjust(area, addr, next->vm_end, |
816 | next->vm_pgoff - pglen, NULL); | 818 | next->vm_pgoff - pglen, NULL); |
819 | if (err) | ||
820 | return NULL; | ||
817 | return area; | 821 | return area; |
818 | } | 822 | } |
819 | 823 | ||
@@ -932,13 +936,9 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, | |||
932 | if (!(flags & MAP_FIXED)) | 936 | if (!(flags & MAP_FIXED)) |
933 | addr = round_hint_to_min(addr); | 937 | addr = round_hint_to_min(addr); |
934 | 938 | ||
935 | error = arch_mmap_check(addr, len, flags); | ||
936 | if (error) | ||
937 | return error; | ||
938 | |||
939 | /* Careful about overflows.. */ | 939 | /* Careful about overflows.. */ |
940 | len = PAGE_ALIGN(len); | 940 | len = PAGE_ALIGN(len); |
941 | if (!len || len > TASK_SIZE) | 941 | if (!len) |
942 | return -ENOMEM; | 942 | return -ENOMEM; |
943 | 943 | ||
944 | /* offset overflow? */ | 944 | /* offset overflow? */ |
@@ -949,24 +949,6 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, | |||
949 | if (mm->map_count > sysctl_max_map_count) | 949 | if (mm->map_count > sysctl_max_map_count) |
950 | return -ENOMEM; | 950 | return -ENOMEM; |
951 | 951 | ||
952 | if (flags & MAP_HUGETLB) { | ||
953 | struct user_struct *user = NULL; | ||
954 | if (file) | ||
955 | return -EINVAL; | ||
956 | |||
957 | /* | ||
958 | * VM_NORESERVE is used because the reservations will be | ||
959 | * taken when vm_ops->mmap() is called | ||
960 | * A dummy user value is used because we are not locking | ||
961 | * memory so no accounting is necessary | ||
962 | */ | ||
963 | len = ALIGN(len, huge_page_size(&default_hstate)); | ||
964 | file = hugetlb_file_setup(HUGETLB_ANON_FILE, len, VM_NORESERVE, | ||
965 | &user, HUGETLB_ANONHUGE_INODE); | ||
966 | if (IS_ERR(file)) | ||
967 | return PTR_ERR(file); | ||
968 | } | ||
969 | |||
970 | /* Obtain the address to map to. we verify (or select) it and ensure | 952 | /* Obtain the address to map to. we verify (or select) it and ensure |
971 | * that it represents a valid section of the address space. | 953 | * that it represents a valid section of the address space. |
972 | */ | 954 | */ |
@@ -990,7 +972,7 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, | |||
990 | unsigned long locked, lock_limit; | 972 | unsigned long locked, lock_limit; |
991 | locked = len >> PAGE_SHIFT; | 973 | locked = len >> PAGE_SHIFT; |
992 | locked += mm->locked_vm; | 974 | locked += mm->locked_vm; |
993 | lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; | 975 | lock_limit = rlimit(RLIMIT_MEMLOCK); |
994 | lock_limit >>= PAGE_SHIFT; | 976 | lock_limit >>= PAGE_SHIFT; |
995 | if (locked > lock_limit && !capable(CAP_IPC_LOCK)) | 977 | if (locked > lock_limit && !capable(CAP_IPC_LOCK)) |
996 | return -EAGAIN; | 978 | return -EAGAIN; |
@@ -1061,14 +1043,75 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, | |||
1061 | error = security_file_mmap(file, reqprot, prot, flags, addr, 0); | 1043 | error = security_file_mmap(file, reqprot, prot, flags, addr, 0); |
1062 | if (error) | 1044 | if (error) |
1063 | return error; | 1045 | return error; |
1064 | error = ima_file_mmap(file, prot); | ||
1065 | if (error) | ||
1066 | return error; | ||
1067 | 1046 | ||
1068 | return mmap_region(file, addr, len, flags, vm_flags, pgoff); | 1047 | return mmap_region(file, addr, len, flags, vm_flags, pgoff); |
1069 | } | 1048 | } |
1070 | EXPORT_SYMBOL(do_mmap_pgoff); | 1049 | EXPORT_SYMBOL(do_mmap_pgoff); |
1071 | 1050 | ||
1051 | SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, | ||
1052 | unsigned long, prot, unsigned long, flags, | ||
1053 | unsigned long, fd, unsigned long, pgoff) | ||
1054 | { | ||
1055 | struct file *file = NULL; | ||
1056 | unsigned long retval = -EBADF; | ||
1057 | |||
1058 | if (!(flags & MAP_ANONYMOUS)) { | ||
1059 | if (unlikely(flags & MAP_HUGETLB)) | ||
1060 | return -EINVAL; | ||
1061 | file = fget(fd); | ||
1062 | if (!file) | ||
1063 | goto out; | ||
1064 | } else if (flags & MAP_HUGETLB) { | ||
1065 | struct user_struct *user = NULL; | ||
1066 | /* | ||
1067 | * VM_NORESERVE is used because the reservations will be | ||
1068 | * taken when vm_ops->mmap() is called | ||
1069 | * A dummy user value is used because we are not locking | ||
1070 | * memory so no accounting is necessary | ||
1071 | */ | ||
1072 | len = ALIGN(len, huge_page_size(&default_hstate)); | ||
1073 | file = hugetlb_file_setup(HUGETLB_ANON_FILE, len, VM_NORESERVE, | ||
1074 | &user, HUGETLB_ANONHUGE_INODE); | ||
1075 | if (IS_ERR(file)) | ||
1076 | return PTR_ERR(file); | ||
1077 | } | ||
1078 | |||
1079 | flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); | ||
1080 | |||
1081 | down_write(¤t->mm->mmap_sem); | ||
1082 | retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); | ||
1083 | up_write(¤t->mm->mmap_sem); | ||
1084 | |||
1085 | if (file) | ||
1086 | fput(file); | ||
1087 | out: | ||
1088 | return retval; | ||
1089 | } | ||
1090 | |||
1091 | #ifdef __ARCH_WANT_SYS_OLD_MMAP | ||
1092 | struct mmap_arg_struct { | ||
1093 | unsigned long addr; | ||
1094 | unsigned long len; | ||
1095 | unsigned long prot; | ||
1096 | unsigned long flags; | ||
1097 | unsigned long fd; | ||
1098 | unsigned long offset; | ||
1099 | }; | ||
1100 | |||
1101 | SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg) | ||
1102 | { | ||
1103 | struct mmap_arg_struct a; | ||
1104 | |||
1105 | if (copy_from_user(&a, arg, sizeof(a))) | ||
1106 | return -EFAULT; | ||
1107 | if (a.offset & ~PAGE_MASK) | ||
1108 | return -EINVAL; | ||
1109 | |||
1110 | return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, | ||
1111 | a.offset >> PAGE_SHIFT); | ||
1112 | } | ||
1113 | #endif /* __ARCH_WANT_SYS_OLD_MMAP */ | ||
1114 | |||
1072 | /* | 1115 | /* |
1073 | * Some shared mappigns will want the pages marked read-only | 1116 | * Some shared mappigns will want the pages marked read-only |
1074 | * to track write events. If so, we'll downgrade vm_page_prot | 1117 | * to track write events. If so, we'll downgrade vm_page_prot |
@@ -1191,6 +1234,7 @@ munmap_back: | |||
1191 | vma->vm_flags = vm_flags; | 1234 | vma->vm_flags = vm_flags; |
1192 | vma->vm_page_prot = vm_get_page_prot(vm_flags); | 1235 | vma->vm_page_prot = vm_get_page_prot(vm_flags); |
1193 | vma->vm_pgoff = pgoff; | 1236 | vma->vm_pgoff = pgoff; |
1237 | INIT_LIST_HEAD(&vma->anon_vma_chain); | ||
1194 | 1238 | ||
1195 | if (file) { | 1239 | if (file) { |
1196 | error = -EINVAL; | 1240 | error = -EINVAL; |
@@ -1224,8 +1268,20 @@ munmap_back: | |||
1224 | goto free_vma; | 1268 | goto free_vma; |
1225 | } | 1269 | } |
1226 | 1270 | ||
1227 | if (vma_wants_writenotify(vma)) | 1271 | if (vma_wants_writenotify(vma)) { |
1272 | pgprot_t pprot = vma->vm_page_prot; | ||
1273 | |||
1274 | /* Can vma->vm_page_prot have changed?? | ||
1275 | * | ||
1276 | * Answer: Yes, drivers may have changed it in their | ||
1277 | * f_op->mmap method. | ||
1278 | * | ||
1279 | * Ensures that vmas marked as uncached stay that way. | ||
1280 | */ | ||
1228 | vma->vm_page_prot = vm_get_page_prot(vm_flags & ~VM_SHARED); | 1281 | vma->vm_page_prot = vm_get_page_prot(vm_flags & ~VM_SHARED); |
1282 | if (pgprot_val(pprot) == pgprot_val(pgprot_noncached(pprot))) | ||
1283 | vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); | ||
1284 | } | ||
1229 | 1285 | ||
1230 | vma_link(mm, vma, prev, rb_link, rb_parent); | 1286 | vma_link(mm, vma, prev, rb_link, rb_parent); |
1231 | file = vma->vm_file; | 1287 | file = vma->vm_file; |
@@ -1239,13 +1295,8 @@ out: | |||
1239 | mm->total_vm += len >> PAGE_SHIFT; | 1295 | mm->total_vm += len >> PAGE_SHIFT; |
1240 | vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT); | 1296 | vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT); |
1241 | if (vm_flags & VM_LOCKED) { | 1297 | if (vm_flags & VM_LOCKED) { |
1242 | /* | 1298 | if (!mlock_vma_pages_range(vma, addr, addr + len)) |
1243 | * makes pages present; downgrades, drops, reacquires mmap_sem | 1299 | mm->locked_vm += (len >> PAGE_SHIFT); |
1244 | */ | ||
1245 | long nr_pages = mlock_vma_pages_range(vma, addr, addr + len); | ||
1246 | if (nr_pages < 0) | ||
1247 | return nr_pages; /* vma gone! */ | ||
1248 | mm->locked_vm += (len >> PAGE_SHIFT) - nr_pages; | ||
1249 | } else if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK)) | 1300 | } else if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK)) |
1250 | make_pages_present(addr, addr + len); | 1301 | make_pages_present(addr, addr + len); |
1251 | return addr; | 1302 | return addr; |
@@ -1459,6 +1510,14 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, | |||
1459 | unsigned long (*get_area)(struct file *, unsigned long, | 1510 | unsigned long (*get_area)(struct file *, unsigned long, |
1460 | unsigned long, unsigned long, unsigned long); | 1511 | unsigned long, unsigned long, unsigned long); |
1461 | 1512 | ||
1513 | unsigned long error = arch_mmap_check(addr, len, flags); | ||
1514 | if (error) | ||
1515 | return error; | ||
1516 | |||
1517 | /* Careful about overflows.. */ | ||
1518 | if (len > TASK_SIZE) | ||
1519 | return -ENOMEM; | ||
1520 | |||
1462 | get_area = current->mm->get_unmapped_area; | 1521 | get_area = current->mm->get_unmapped_area; |
1463 | if (file && file->f_op && file->f_op->get_unmapped_area) | 1522 | if (file && file->f_op && file->f_op->get_unmapped_area) |
1464 | get_area = file->f_op->get_unmapped_area; | 1523 | get_area = file->f_op->get_unmapped_area; |
@@ -1565,7 +1624,7 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns | |||
1565 | return -ENOMEM; | 1624 | return -ENOMEM; |
1566 | 1625 | ||
1567 | /* Stack limit test */ | 1626 | /* Stack limit test */ |
1568 | if (size > rlim[RLIMIT_STACK].rlim_cur) | 1627 | if (size > ACCESS_ONCE(rlim[RLIMIT_STACK].rlim_cur)) |
1569 | return -ENOMEM; | 1628 | return -ENOMEM; |
1570 | 1629 | ||
1571 | /* mlock limit tests */ | 1630 | /* mlock limit tests */ |
@@ -1573,7 +1632,8 @@ static int acct_stack_growth(struct vm_area_struct *vma, unsigned long size, uns | |||
1573 | unsigned long locked; | 1632 | unsigned long locked; |
1574 | unsigned long limit; | 1633 | unsigned long limit; |
1575 | locked = mm->locked_vm + grow; | 1634 | locked = mm->locked_vm + grow; |
1576 | limit = rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; | 1635 | limit = ACCESS_ONCE(rlim[RLIMIT_MEMLOCK].rlim_cur); |
1636 | limit >>= PAGE_SHIFT; | ||
1577 | if (locked > limit && !capable(CAP_IPC_LOCK)) | 1637 | if (locked > limit && !capable(CAP_IPC_LOCK)) |
1578 | return -ENOMEM; | 1638 | return -ENOMEM; |
1579 | } | 1639 | } |
@@ -1720,8 +1780,7 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr) | |||
1720 | if (!prev || expand_stack(prev, addr)) | 1780 | if (!prev || expand_stack(prev, addr)) |
1721 | return NULL; | 1781 | return NULL; |
1722 | if (prev->vm_flags & VM_LOCKED) { | 1782 | if (prev->vm_flags & VM_LOCKED) { |
1723 | if (mlock_vma_pages_range(prev, addr, prev->vm_end) < 0) | 1783 | mlock_vma_pages_range(prev, addr, prev->vm_end); |
1724 | return NULL; /* vma gone! */ | ||
1725 | } | 1784 | } |
1726 | return prev; | 1785 | return prev; |
1727 | } | 1786 | } |
@@ -1749,8 +1808,7 @@ find_extend_vma(struct mm_struct * mm, unsigned long addr) | |||
1749 | if (expand_stack(vma, addr)) | 1808 | if (expand_stack(vma, addr)) |
1750 | return NULL; | 1809 | return NULL; |
1751 | if (vma->vm_flags & VM_LOCKED) { | 1810 | if (vma->vm_flags & VM_LOCKED) { |
1752 | if (mlock_vma_pages_range(vma, addr, start) < 0) | 1811 | mlock_vma_pages_range(vma, addr, start); |
1753 | return NULL; /* vma gone! */ | ||
1754 | } | 1812 | } |
1755 | return vma; | 1813 | return vma; |
1756 | } | 1814 | } |
@@ -1829,29 +1887,29 @@ detach_vmas_to_be_unmapped(struct mm_struct *mm, struct vm_area_struct *vma, | |||
1829 | } | 1887 | } |
1830 | 1888 | ||
1831 | /* | 1889 | /* |
1832 | * Split a vma into two pieces at address 'addr', a new vma is allocated | 1890 | * __split_vma() bypasses sysctl_max_map_count checking. We use this on the |
1833 | * either for the first part or the tail. | 1891 | * munmap path where it doesn't make sense to fail. |
1834 | */ | 1892 | */ |
1835 | int split_vma(struct mm_struct * mm, struct vm_area_struct * vma, | 1893 | static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma, |
1836 | unsigned long addr, int new_below) | 1894 | unsigned long addr, int new_below) |
1837 | { | 1895 | { |
1838 | struct mempolicy *pol; | 1896 | struct mempolicy *pol; |
1839 | struct vm_area_struct *new; | 1897 | struct vm_area_struct *new; |
1898 | int err = -ENOMEM; | ||
1840 | 1899 | ||
1841 | if (is_vm_hugetlb_page(vma) && (addr & | 1900 | if (is_vm_hugetlb_page(vma) && (addr & |
1842 | ~(huge_page_mask(hstate_vma(vma))))) | 1901 | ~(huge_page_mask(hstate_vma(vma))))) |
1843 | return -EINVAL; | 1902 | return -EINVAL; |
1844 | 1903 | ||
1845 | if (mm->map_count >= sysctl_max_map_count) | ||
1846 | return -ENOMEM; | ||
1847 | |||
1848 | new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); | 1904 | new = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL); |
1849 | if (!new) | 1905 | if (!new) |
1850 | return -ENOMEM; | 1906 | goto out_err; |
1851 | 1907 | ||
1852 | /* most fields are the same, copy all, and then fixup */ | 1908 | /* most fields are the same, copy all, and then fixup */ |
1853 | *new = *vma; | 1909 | *new = *vma; |
1854 | 1910 | ||
1911 | INIT_LIST_HEAD(&new->anon_vma_chain); | ||
1912 | |||
1855 | if (new_below) | 1913 | if (new_below) |
1856 | new->vm_end = addr; | 1914 | new->vm_end = addr; |
1857 | else { | 1915 | else { |
@@ -1861,11 +1919,14 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma, | |||
1861 | 1919 | ||
1862 | pol = mpol_dup(vma_policy(vma)); | 1920 | pol = mpol_dup(vma_policy(vma)); |
1863 | if (IS_ERR(pol)) { | 1921 | if (IS_ERR(pol)) { |
1864 | kmem_cache_free(vm_area_cachep, new); | 1922 | err = PTR_ERR(pol); |
1865 | return PTR_ERR(pol); | 1923 | goto out_free_vma; |
1866 | } | 1924 | } |
1867 | vma_set_policy(new, pol); | 1925 | vma_set_policy(new, pol); |
1868 | 1926 | ||
1927 | if (anon_vma_clone(new, vma)) | ||
1928 | goto out_free_mpol; | ||
1929 | |||
1869 | if (new->vm_file) { | 1930 | if (new->vm_file) { |
1870 | get_file(new->vm_file); | 1931 | get_file(new->vm_file); |
1871 | if (vma->vm_flags & VM_EXECUTABLE) | 1932 | if (vma->vm_flags & VM_EXECUTABLE) |
@@ -1876,12 +1937,41 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma, | |||
1876 | new->vm_ops->open(new); | 1937 | new->vm_ops->open(new); |
1877 | 1938 | ||
1878 | if (new_below) | 1939 | if (new_below) |
1879 | vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff + | 1940 | err = vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff + |
1880 | ((addr - new->vm_start) >> PAGE_SHIFT), new); | 1941 | ((addr - new->vm_start) >> PAGE_SHIFT), new); |
1881 | else | 1942 | else |
1882 | vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new); | 1943 | err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new); |
1883 | 1944 | ||
1884 | return 0; | 1945 | /* Success. */ |
1946 | if (!err) | ||
1947 | return 0; | ||
1948 | |||
1949 | /* Clean everything up if vma_adjust failed. */ | ||
1950 | new->vm_ops->close(new); | ||
1951 | if (new->vm_file) { | ||
1952 | if (vma->vm_flags & VM_EXECUTABLE) | ||
1953 | removed_exe_file_vma(mm); | ||
1954 | fput(new->vm_file); | ||
1955 | } | ||
1956 | out_free_mpol: | ||
1957 | mpol_put(pol); | ||
1958 | out_free_vma: | ||
1959 | kmem_cache_free(vm_area_cachep, new); | ||
1960 | out_err: | ||
1961 | return err; | ||
1962 | } | ||
1963 | |||
1964 | /* | ||
1965 | * Split a vma into two pieces at address 'addr', a new vma is allocated | ||
1966 | * either for the first part or the tail. | ||
1967 | */ | ||
1968 | int split_vma(struct mm_struct *mm, struct vm_area_struct *vma, | ||
1969 | unsigned long addr, int new_below) | ||
1970 | { | ||
1971 | if (mm->map_count >= sysctl_max_map_count) | ||
1972 | return -ENOMEM; | ||
1973 | |||
1974 | return __split_vma(mm, vma, addr, new_below); | ||
1885 | } | 1975 | } |
1886 | 1976 | ||
1887 | /* Munmap is split into 2 main parts -- this part which finds | 1977 | /* Munmap is split into 2 main parts -- this part which finds |
@@ -1919,7 +2009,17 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len) | |||
1919 | * places tmp vma above, and higher split_vma places tmp vma below. | 2009 | * places tmp vma above, and higher split_vma places tmp vma below. |
1920 | */ | 2010 | */ |
1921 | if (start > vma->vm_start) { | 2011 | if (start > vma->vm_start) { |
1922 | int error = split_vma(mm, vma, start, 0); | 2012 | int error; |
2013 | |||
2014 | /* | ||
2015 | * Make sure that map_count on return from munmap() will | ||
2016 | * not exceed its limit; but let map_count go just above | ||
2017 | * its limit temporarily, to help free resources as expected. | ||
2018 | */ | ||
2019 | if (end < vma->vm_end && mm->map_count >= sysctl_max_map_count) | ||
2020 | return -ENOMEM; | ||
2021 | |||
2022 | error = __split_vma(mm, vma, start, 0); | ||
1923 | if (error) | 2023 | if (error) |
1924 | return error; | 2024 | return error; |
1925 | prev = vma; | 2025 | prev = vma; |
@@ -1928,7 +2028,7 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len) | |||
1928 | /* Does it split the last one? */ | 2028 | /* Does it split the last one? */ |
1929 | last = find_vma(mm, end); | 2029 | last = find_vma(mm, end); |
1930 | if (last && end > last->vm_start) { | 2030 | if (last && end > last->vm_start) { |
1931 | int error = split_vma(mm, last, end, 1); | 2031 | int error = __split_vma(mm, last, end, 1); |
1932 | if (error) | 2032 | if (error) |
1933 | return error; | 2033 | return error; |
1934 | } | 2034 | } |
@@ -2003,20 +2103,14 @@ unsigned long do_brk(unsigned long addr, unsigned long len) | |||
2003 | if (!len) | 2103 | if (!len) |
2004 | return addr; | 2104 | return addr; |
2005 | 2105 | ||
2006 | if ((addr + len) > TASK_SIZE || (addr + len) < addr) | ||
2007 | return -EINVAL; | ||
2008 | |||
2009 | if (is_hugepage_only_range(mm, addr, len)) | ||
2010 | return -EINVAL; | ||
2011 | |||
2012 | error = security_file_mmap(NULL, 0, 0, 0, addr, 1); | 2106 | error = security_file_mmap(NULL, 0, 0, 0, addr, 1); |
2013 | if (error) | 2107 | if (error) |
2014 | return error; | 2108 | return error; |
2015 | 2109 | ||
2016 | flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags; | 2110 | flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags; |
2017 | 2111 | ||
2018 | error = arch_mmap_check(addr, len, flags); | 2112 | error = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED); |
2019 | if (error) | 2113 | if (error & ~PAGE_MASK) |
2020 | return error; | 2114 | return error; |
2021 | 2115 | ||
2022 | /* | 2116 | /* |
@@ -2026,7 +2120,7 @@ unsigned long do_brk(unsigned long addr, unsigned long len) | |||
2026 | unsigned long locked, lock_limit; | 2120 | unsigned long locked, lock_limit; |
2027 | locked = len >> PAGE_SHIFT; | 2121 | locked = len >> PAGE_SHIFT; |
2028 | locked += mm->locked_vm; | 2122 | locked += mm->locked_vm; |
2029 | lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; | 2123 | lock_limit = rlimit(RLIMIT_MEMLOCK); |
2030 | lock_limit >>= PAGE_SHIFT; | 2124 | lock_limit >>= PAGE_SHIFT; |
2031 | if (locked > lock_limit && !capable(CAP_IPC_LOCK)) | 2125 | if (locked > lock_limit && !capable(CAP_IPC_LOCK)) |
2032 | return -EAGAIN; | 2126 | return -EAGAIN; |
@@ -2074,6 +2168,7 @@ unsigned long do_brk(unsigned long addr, unsigned long len) | |||
2074 | return -ENOMEM; | 2168 | return -ENOMEM; |
2075 | } | 2169 | } |
2076 | 2170 | ||
2171 | INIT_LIST_HEAD(&vma->anon_vma_chain); | ||
2077 | vma->vm_mm = mm; | 2172 | vma->vm_mm = mm; |
2078 | vma->vm_start = addr; | 2173 | vma->vm_start = addr; |
2079 | vma->vm_end = addr + len; | 2174 | vma->vm_end = addr + len; |
@@ -2210,10 +2305,11 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, | |||
2210 | if (new_vma) { | 2305 | if (new_vma) { |
2211 | *new_vma = *vma; | 2306 | *new_vma = *vma; |
2212 | pol = mpol_dup(vma_policy(vma)); | 2307 | pol = mpol_dup(vma_policy(vma)); |
2213 | if (IS_ERR(pol)) { | 2308 | if (IS_ERR(pol)) |
2214 | kmem_cache_free(vm_area_cachep, new_vma); | 2309 | goto out_free_vma; |
2215 | return NULL; | 2310 | INIT_LIST_HEAD(&new_vma->anon_vma_chain); |
2216 | } | 2311 | if (anon_vma_clone(new_vma, vma)) |
2312 | goto out_free_mempol; | ||
2217 | vma_set_policy(new_vma, pol); | 2313 | vma_set_policy(new_vma, pol); |
2218 | new_vma->vm_start = addr; | 2314 | new_vma->vm_start = addr; |
2219 | new_vma->vm_end = addr + len; | 2315 | new_vma->vm_end = addr + len; |
@@ -2229,6 +2325,12 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap, | |||
2229 | } | 2325 | } |
2230 | } | 2326 | } |
2231 | return new_vma; | 2327 | return new_vma; |
2328 | |||
2329 | out_free_mempol: | ||
2330 | mpol_put(pol); | ||
2331 | out_free_vma: | ||
2332 | kmem_cache_free(vm_area_cachep, new_vma); | ||
2333 | return NULL; | ||
2232 | } | 2334 | } |
2233 | 2335 | ||
2234 | /* | 2336 | /* |
@@ -2240,7 +2342,7 @@ int may_expand_vm(struct mm_struct *mm, unsigned long npages) | |||
2240 | unsigned long cur = mm->total_vm; /* pages */ | 2342 | unsigned long cur = mm->total_vm; /* pages */ |
2241 | unsigned long lim; | 2343 | unsigned long lim; |
2242 | 2344 | ||
2243 | lim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; | 2345 | lim = rlimit(RLIMIT_AS) >> PAGE_SHIFT; |
2244 | 2346 | ||
2245 | if (cur + npages > lim) | 2347 | if (cur + npages > lim) |
2246 | return 0; | 2348 | return 0; |
@@ -2306,6 +2408,7 @@ int install_special_mapping(struct mm_struct *mm, | |||
2306 | if (unlikely(vma == NULL)) | 2408 | if (unlikely(vma == NULL)) |
2307 | return -ENOMEM; | 2409 | return -ENOMEM; |
2308 | 2410 | ||
2411 | INIT_LIST_HEAD(&vma->anon_vma_chain); | ||
2309 | vma->vm_mm = mm; | 2412 | vma->vm_mm = mm; |
2310 | vma->vm_start = addr; | 2413 | vma->vm_start = addr; |
2311 | vma->vm_end = addr + len; | 2414 | vma->vm_end = addr + len; |
@@ -2406,6 +2509,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping) | |||
2406 | int mm_take_all_locks(struct mm_struct *mm) | 2509 | int mm_take_all_locks(struct mm_struct *mm) |
2407 | { | 2510 | { |
2408 | struct vm_area_struct *vma; | 2511 | struct vm_area_struct *vma; |
2512 | struct anon_vma_chain *avc; | ||
2409 | int ret = -EINTR; | 2513 | int ret = -EINTR; |
2410 | 2514 | ||
2411 | BUG_ON(down_read_trylock(&mm->mmap_sem)); | 2515 | BUG_ON(down_read_trylock(&mm->mmap_sem)); |
@@ -2423,7 +2527,8 @@ int mm_take_all_locks(struct mm_struct *mm) | |||
2423 | if (signal_pending(current)) | 2527 | if (signal_pending(current)) |
2424 | goto out_unlock; | 2528 | goto out_unlock; |
2425 | if (vma->anon_vma) | 2529 | if (vma->anon_vma) |
2426 | vm_lock_anon_vma(mm, vma->anon_vma); | 2530 | list_for_each_entry(avc, &vma->anon_vma_chain, same_vma) |
2531 | vm_lock_anon_vma(mm, avc->anon_vma); | ||
2427 | } | 2532 | } |
2428 | 2533 | ||
2429 | ret = 0; | 2534 | ret = 0; |
@@ -2478,13 +2583,15 @@ static void vm_unlock_mapping(struct address_space *mapping) | |||
2478 | void mm_drop_all_locks(struct mm_struct *mm) | 2583 | void mm_drop_all_locks(struct mm_struct *mm) |
2479 | { | 2584 | { |
2480 | struct vm_area_struct *vma; | 2585 | struct vm_area_struct *vma; |
2586 | struct anon_vma_chain *avc; | ||
2481 | 2587 | ||
2482 | BUG_ON(down_read_trylock(&mm->mmap_sem)); | 2588 | BUG_ON(down_read_trylock(&mm->mmap_sem)); |
2483 | BUG_ON(!mutex_is_locked(&mm_all_locks_mutex)); | 2589 | BUG_ON(!mutex_is_locked(&mm_all_locks_mutex)); |
2484 | 2590 | ||
2485 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | 2591 | for (vma = mm->mmap; vma; vma = vma->vm_next) { |
2486 | if (vma->anon_vma) | 2592 | if (vma->anon_vma) |
2487 | vm_unlock_anon_vma(vma->anon_vma); | 2593 | list_for_each_entry(avc, &vma->anon_vma_chain, same_vma) |
2594 | vm_unlock_anon_vma(avc->anon_vma); | ||
2488 | if (vma->vm_file && vma->vm_file->f_mapping) | 2595 | if (vma->vm_file && vma->vm_file->f_mapping) |
2489 | vm_unlock_mapping(vma->vm_file->f_mapping); | 2596 | vm_unlock_mapping(vma->vm_file->f_mapping); |
2490 | } | 2597 | } |