diff options
Diffstat (limited to 'mm/mmap.c')
-rw-r--r-- | mm/mmap.c | 200 |
1 files changed, 99 insertions, 101 deletions
@@ -28,6 +28,8 @@ | |||
28 | #include <linux/rmap.h> | 28 | #include <linux/rmap.h> |
29 | #include <linux/mmu_notifier.h> | 29 | #include <linux/mmu_notifier.h> |
30 | #include <linux/perf_event.h> | 30 | #include <linux/perf_event.h> |
31 | #include <linux/audit.h> | ||
32 | #include <linux/khugepaged.h> | ||
31 | 33 | ||
32 | #include <asm/uaccess.h> | 34 | #include <asm/uaccess.h> |
33 | #include <asm/cacheflush.h> | 35 | #include <asm/cacheflush.h> |
@@ -82,10 +84,14 @@ pgprot_t vm_get_page_prot(unsigned long vm_flags) | |||
82 | } | 84 | } |
83 | EXPORT_SYMBOL(vm_get_page_prot); | 85 | EXPORT_SYMBOL(vm_get_page_prot); |
84 | 86 | ||
85 | int sysctl_overcommit_memory = OVERCOMMIT_GUESS; /* heuristic overcommit */ | 87 | int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS; /* heuristic overcommit */ |
86 | int sysctl_overcommit_ratio = 50; /* default is 50% */ | 88 | int sysctl_overcommit_ratio __read_mostly = 50; /* default is 50% */ |
87 | int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT; | 89 | int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT; |
88 | struct percpu_counter vm_committed_as; | 90 | /* |
91 | * Make sure vm_committed_as in one cacheline and not cacheline shared with | ||
92 | * other variables. It can be updated by several CPUs frequently. | ||
93 | */ | ||
94 | struct percpu_counter vm_committed_as ____cacheline_aligned_in_smp; | ||
89 | 95 | ||
90 | /* | 96 | /* |
91 | * Check that a process has enough memory to allocate a new virtual | 97 | * Check that a process has enough memory to allocate a new virtual |
@@ -188,7 +194,7 @@ error: | |||
188 | } | 194 | } |
189 | 195 | ||
190 | /* | 196 | /* |
191 | * Requires inode->i_mapping->i_mmap_lock | 197 | * Requires inode->i_mapping->i_mmap_mutex |
192 | */ | 198 | */ |
193 | static void __remove_shared_vm_struct(struct vm_area_struct *vma, | 199 | static void __remove_shared_vm_struct(struct vm_area_struct *vma, |
194 | struct file *file, struct address_space *mapping) | 200 | struct file *file, struct address_space *mapping) |
@@ -216,9 +222,9 @@ void unlink_file_vma(struct vm_area_struct *vma) | |||
216 | 222 | ||
217 | if (file) { | 223 | if (file) { |
218 | struct address_space *mapping = file->f_mapping; | 224 | struct address_space *mapping = file->f_mapping; |
219 | spin_lock(&mapping->i_mmap_lock); | 225 | mutex_lock(&mapping->i_mmap_mutex); |
220 | __remove_shared_vm_struct(vma, file, mapping); | 226 | __remove_shared_vm_struct(vma, file, mapping); |
221 | spin_unlock(&mapping->i_mmap_lock); | 227 | mutex_unlock(&mapping->i_mmap_mutex); |
222 | } | 228 | } |
223 | } | 229 | } |
224 | 230 | ||
@@ -252,7 +258,15 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) | |||
252 | down_write(&mm->mmap_sem); | 258 | down_write(&mm->mmap_sem); |
253 | 259 | ||
254 | #ifdef CONFIG_COMPAT_BRK | 260 | #ifdef CONFIG_COMPAT_BRK |
255 | min_brk = mm->end_code; | 261 | /* |
262 | * CONFIG_COMPAT_BRK can still be overridden by setting | ||
263 | * randomize_va_space to 2, which will still cause mm->start_brk | ||
264 | * to be arbitrarily shifted | ||
265 | */ | ||
266 | if (current->brk_randomized) | ||
267 | min_brk = mm->start_brk; | ||
268 | else | ||
269 | min_brk = mm->end_data; | ||
256 | #else | 270 | #else |
257 | min_brk = mm->start_brk; | 271 | min_brk = mm->start_brk; |
258 | #endif | 272 | #endif |
@@ -384,29 +398,6 @@ find_vma_prepare(struct mm_struct *mm, unsigned long addr, | |||
384 | return vma; | 398 | return vma; |
385 | } | 399 | } |
386 | 400 | ||
387 | static inline void | ||
388 | __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma, | ||
389 | struct vm_area_struct *prev, struct rb_node *rb_parent) | ||
390 | { | ||
391 | struct vm_area_struct *next; | ||
392 | |||
393 | vma->vm_prev = prev; | ||
394 | if (prev) { | ||
395 | next = prev->vm_next; | ||
396 | prev->vm_next = vma; | ||
397 | } else { | ||
398 | mm->mmap = vma; | ||
399 | if (rb_parent) | ||
400 | next = rb_entry(rb_parent, | ||
401 | struct vm_area_struct, vm_rb); | ||
402 | else | ||
403 | next = NULL; | ||
404 | } | ||
405 | vma->vm_next = next; | ||
406 | if (next) | ||
407 | next->vm_prev = vma; | ||
408 | } | ||
409 | |||
410 | void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma, | 401 | void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma, |
411 | struct rb_node **rb_link, struct rb_node *rb_parent) | 402 | struct rb_node **rb_link, struct rb_node *rb_parent) |
412 | { | 403 | { |
@@ -454,16 +445,14 @@ static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma, | |||
454 | if (vma->vm_file) | 445 | if (vma->vm_file) |
455 | mapping = vma->vm_file->f_mapping; | 446 | mapping = vma->vm_file->f_mapping; |
456 | 447 | ||
457 | if (mapping) { | 448 | if (mapping) |
458 | spin_lock(&mapping->i_mmap_lock); | 449 | mutex_lock(&mapping->i_mmap_mutex); |
459 | vma->vm_truncate_count = mapping->truncate_count; | ||
460 | } | ||
461 | 450 | ||
462 | __vma_link(mm, vma, prev, rb_link, rb_parent); | 451 | __vma_link(mm, vma, prev, rb_link, rb_parent); |
463 | __vma_link_file(vma); | 452 | __vma_link_file(vma); |
464 | 453 | ||
465 | if (mapping) | 454 | if (mapping) |
466 | spin_unlock(&mapping->i_mmap_lock); | 455 | mutex_unlock(&mapping->i_mmap_mutex); |
467 | 456 | ||
468 | mm->map_count++; | 457 | mm->map_count++; |
469 | validate_mm(mm); | 458 | validate_mm(mm); |
@@ -566,17 +555,8 @@ again: remove_next = 1 + (end > next->vm_end); | |||
566 | mapping = file->f_mapping; | 555 | mapping = file->f_mapping; |
567 | if (!(vma->vm_flags & VM_NONLINEAR)) | 556 | if (!(vma->vm_flags & VM_NONLINEAR)) |
568 | root = &mapping->i_mmap; | 557 | root = &mapping->i_mmap; |
569 | spin_lock(&mapping->i_mmap_lock); | 558 | mutex_lock(&mapping->i_mmap_mutex); |
570 | if (importer && | ||
571 | vma->vm_truncate_count != next->vm_truncate_count) { | ||
572 | /* | ||
573 | * unmap_mapping_range might be in progress: | ||
574 | * ensure that the expanding vma is rescanned. | ||
575 | */ | ||
576 | importer->vm_truncate_count = 0; | ||
577 | } | ||
578 | if (insert) { | 559 | if (insert) { |
579 | insert->vm_truncate_count = vma->vm_truncate_count; | ||
580 | /* | 560 | /* |
581 | * Put into prio_tree now, so instantiated pages | 561 | * Put into prio_tree now, so instantiated pages |
582 | * are visible to arm/parisc __flush_dcache_page | 562 | * are visible to arm/parisc __flush_dcache_page |
@@ -587,13 +567,15 @@ again: remove_next = 1 + (end > next->vm_end); | |||
587 | } | 567 | } |
588 | } | 568 | } |
589 | 569 | ||
570 | vma_adjust_trans_huge(vma, start, end, adjust_next); | ||
571 | |||
590 | /* | 572 | /* |
591 | * When changing only vma->vm_end, we don't really need anon_vma | 573 | * When changing only vma->vm_end, we don't really need anon_vma |
592 | * lock. This is a fairly rare case by itself, but the anon_vma | 574 | * lock. This is a fairly rare case by itself, but the anon_vma |
593 | * lock may be shared between many sibling processes. Skipping | 575 | * lock may be shared between many sibling processes. Skipping |
594 | * the lock for brk adjustments makes a difference sometimes. | 576 | * the lock for brk adjustments makes a difference sometimes. |
595 | */ | 577 | */ |
596 | if (vma->anon_vma && (insert || importer || start != vma->vm_start)) { | 578 | if (vma->anon_vma && (importer || start != vma->vm_start)) { |
597 | anon_vma = vma->anon_vma; | 579 | anon_vma = vma->anon_vma; |
598 | anon_vma_lock(anon_vma); | 580 | anon_vma_lock(anon_vma); |
599 | } | 581 | } |
@@ -640,7 +622,7 @@ again: remove_next = 1 + (end > next->vm_end); | |||
640 | if (anon_vma) | 622 | if (anon_vma) |
641 | anon_vma_unlock(anon_vma); | 623 | anon_vma_unlock(anon_vma); |
642 | if (mapping) | 624 | if (mapping) |
643 | spin_unlock(&mapping->i_mmap_lock); | 625 | mutex_unlock(&mapping->i_mmap_mutex); |
644 | 626 | ||
645 | if (remove_next) { | 627 | if (remove_next) { |
646 | if (file) { | 628 | if (file) { |
@@ -687,9 +669,17 @@ static inline int is_mergeable_vma(struct vm_area_struct *vma, | |||
687 | } | 669 | } |
688 | 670 | ||
689 | static inline int is_mergeable_anon_vma(struct anon_vma *anon_vma1, | 671 | static inline int is_mergeable_anon_vma(struct anon_vma *anon_vma1, |
690 | struct anon_vma *anon_vma2) | 672 | struct anon_vma *anon_vma2, |
673 | struct vm_area_struct *vma) | ||
691 | { | 674 | { |
692 | return !anon_vma1 || !anon_vma2 || (anon_vma1 == anon_vma2); | 675 | /* |
676 | * The list_is_singular() test is to avoid merging VMA cloned from | ||
677 | * parents. This can improve scalability caused by anon_vma lock. | ||
678 | */ | ||
679 | if ((!anon_vma1 || !anon_vma2) && (!vma || | ||
680 | list_is_singular(&vma->anon_vma_chain))) | ||
681 | return 1; | ||
682 | return anon_vma1 == anon_vma2; | ||
693 | } | 683 | } |
694 | 684 | ||
695 | /* | 685 | /* |
@@ -708,7 +698,7 @@ can_vma_merge_before(struct vm_area_struct *vma, unsigned long vm_flags, | |||
708 | struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff) | 698 | struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff) |
709 | { | 699 | { |
710 | if (is_mergeable_vma(vma, file, vm_flags) && | 700 | if (is_mergeable_vma(vma, file, vm_flags) && |
711 | is_mergeable_anon_vma(anon_vma, vma->anon_vma)) { | 701 | is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) { |
712 | if (vma->vm_pgoff == vm_pgoff) | 702 | if (vma->vm_pgoff == vm_pgoff) |
713 | return 1; | 703 | return 1; |
714 | } | 704 | } |
@@ -727,7 +717,7 @@ can_vma_merge_after(struct vm_area_struct *vma, unsigned long vm_flags, | |||
727 | struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff) | 717 | struct anon_vma *anon_vma, struct file *file, pgoff_t vm_pgoff) |
728 | { | 718 | { |
729 | if (is_mergeable_vma(vma, file, vm_flags) && | 719 | if (is_mergeable_vma(vma, file, vm_flags) && |
730 | is_mergeable_anon_vma(anon_vma, vma->anon_vma)) { | 720 | is_mergeable_anon_vma(anon_vma, vma->anon_vma, vma)) { |
731 | pgoff_t vm_pglen; | 721 | pgoff_t vm_pglen; |
732 | vm_pglen = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; | 722 | vm_pglen = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT; |
733 | if (vma->vm_pgoff + vm_pglen == vm_pgoff) | 723 | if (vma->vm_pgoff + vm_pglen == vm_pgoff) |
@@ -805,7 +795,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, | |||
805 | can_vma_merge_before(next, vm_flags, | 795 | can_vma_merge_before(next, vm_flags, |
806 | anon_vma, file, pgoff+pglen) && | 796 | anon_vma, file, pgoff+pglen) && |
807 | is_mergeable_anon_vma(prev->anon_vma, | 797 | is_mergeable_anon_vma(prev->anon_vma, |
808 | next->anon_vma)) { | 798 | next->anon_vma, NULL)) { |
809 | /* cases 1, 6 */ | 799 | /* cases 1, 6 */ |
810 | err = vma_adjust(prev, prev->vm_start, | 800 | err = vma_adjust(prev, prev->vm_start, |
811 | next->vm_end, prev->vm_pgoff, NULL); | 801 | next->vm_end, prev->vm_pgoff, NULL); |
@@ -814,6 +804,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, | |||
814 | end, prev->vm_pgoff, NULL); | 804 | end, prev->vm_pgoff, NULL); |
815 | if (err) | 805 | if (err) |
816 | return NULL; | 806 | return NULL; |
807 | khugepaged_enter_vma_merge(prev); | ||
817 | return prev; | 808 | return prev; |
818 | } | 809 | } |
819 | 810 | ||
@@ -832,6 +823,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm, | |||
832 | next->vm_pgoff - pglen, NULL); | 823 | next->vm_pgoff - pglen, NULL); |
833 | if (err) | 824 | if (err) |
834 | return NULL; | 825 | return NULL; |
826 | khugepaged_enter_vma_merge(area); | ||
835 | return area; | 827 | return area; |
836 | } | 828 | } |
837 | 829 | ||
@@ -914,14 +906,7 @@ struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma) | |||
914 | if (anon_vma) | 906 | if (anon_vma) |
915 | return anon_vma; | 907 | return anon_vma; |
916 | try_prev: | 908 | try_prev: |
917 | /* | 909 | near = vma->vm_prev; |
918 | * It is potentially slow to have to call find_vma_prev here. | ||
919 | * But it's only on the first write fault on the vma, not | ||
920 | * every time, and we could devise a way to avoid it later | ||
921 | * (e.g. stash info in next's anon_vma_node when assigning | ||
922 | * an anon_vma, or when trying vma_merge). Another time. | ||
923 | */ | ||
924 | BUG_ON(find_vma_prev(vma->vm_mm, vma->vm_start, &near) != vma); | ||
925 | if (!near) | 910 | if (!near) |
926 | goto none; | 911 | goto none; |
927 | 912 | ||
@@ -968,7 +953,7 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, | |||
968 | { | 953 | { |
969 | struct mm_struct * mm = current->mm; | 954 | struct mm_struct * mm = current->mm; |
970 | struct inode *inode; | 955 | struct inode *inode; |
971 | unsigned int vm_flags; | 956 | vm_flags_t vm_flags; |
972 | int error; | 957 | int error; |
973 | unsigned long reqprot = prot; | 958 | unsigned long reqprot = prot; |
974 | 959 | ||
@@ -1108,6 +1093,7 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, | |||
1108 | unsigned long retval = -EBADF; | 1093 | unsigned long retval = -EBADF; |
1109 | 1094 | ||
1110 | if (!(flags & MAP_ANONYMOUS)) { | 1095 | if (!(flags & MAP_ANONYMOUS)) { |
1096 | audit_mmap_fd(fd, flags); | ||
1111 | if (unlikely(flags & MAP_HUGETLB)) | 1097 | if (unlikely(flags & MAP_HUGETLB)) |
1112 | return -EINVAL; | 1098 | return -EINVAL; |
1113 | file = fget(fd); | 1099 | file = fget(fd); |
@@ -1172,7 +1158,7 @@ SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg) | |||
1172 | */ | 1158 | */ |
1173 | int vma_wants_writenotify(struct vm_area_struct *vma) | 1159 | int vma_wants_writenotify(struct vm_area_struct *vma) |
1174 | { | 1160 | { |
1175 | unsigned int vm_flags = vma->vm_flags; | 1161 | vm_flags_t vm_flags = vma->vm_flags; |
1176 | 1162 | ||
1177 | /* If it was private or non-writable, the write bit is already clear */ | 1163 | /* If it was private or non-writable, the write bit is already clear */ |
1178 | if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED))) | 1164 | if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED))) |
@@ -1200,7 +1186,7 @@ int vma_wants_writenotify(struct vm_area_struct *vma) | |||
1200 | * We account for memory if it's a private writeable mapping, | 1186 | * We account for memory if it's a private writeable mapping, |
1201 | * not hugepages and VM_NORESERVE wasn't set. | 1187 | * not hugepages and VM_NORESERVE wasn't set. |
1202 | */ | 1188 | */ |
1203 | static inline int accountable_mapping(struct file *file, unsigned int vm_flags) | 1189 | static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags) |
1204 | { | 1190 | { |
1205 | /* | 1191 | /* |
1206 | * hugetlb has its own accounting separate from the core VM | 1192 | * hugetlb has its own accounting separate from the core VM |
@@ -1214,7 +1200,7 @@ static inline int accountable_mapping(struct file *file, unsigned int vm_flags) | |||
1214 | 1200 | ||
1215 | unsigned long mmap_region(struct file *file, unsigned long addr, | 1201 | unsigned long mmap_region(struct file *file, unsigned long addr, |
1216 | unsigned long len, unsigned long flags, | 1202 | unsigned long len, unsigned long flags, |
1217 | unsigned int vm_flags, unsigned long pgoff) | 1203 | vm_flags_t vm_flags, unsigned long pgoff) |
1218 | { | 1204 | { |
1219 | struct mm_struct *mm = current->mm; | 1205 | struct mm_struct *mm = current->mm; |
1220 | struct vm_area_struct *vma, *prev; | 1206 | struct vm_area_struct *vma, *prev; |
@@ -1752,13 +1738,17 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address) | |||
1752 | size = address - vma->vm_start; | 1738 | size = address - vma->vm_start; |
1753 | grow = (address - vma->vm_end) >> PAGE_SHIFT; | 1739 | grow = (address - vma->vm_end) >> PAGE_SHIFT; |
1754 | 1740 | ||
1755 | error = acct_stack_growth(vma, size, grow); | 1741 | error = -ENOMEM; |
1756 | if (!error) { | 1742 | if (vma->vm_pgoff + (size >> PAGE_SHIFT) >= vma->vm_pgoff) { |
1757 | vma->vm_end = address; | 1743 | error = acct_stack_growth(vma, size, grow); |
1758 | perf_event_mmap(vma); | 1744 | if (!error) { |
1745 | vma->vm_end = address; | ||
1746 | perf_event_mmap(vma); | ||
1747 | } | ||
1759 | } | 1748 | } |
1760 | } | 1749 | } |
1761 | vma_unlock_anon_vma(vma); | 1750 | vma_unlock_anon_vma(vma); |
1751 | khugepaged_enter_vma_merge(vma); | ||
1762 | return error; | 1752 | return error; |
1763 | } | 1753 | } |
1764 | #endif /* CONFIG_STACK_GROWSUP || CONFIG_IA64 */ | 1754 | #endif /* CONFIG_STACK_GROWSUP || CONFIG_IA64 */ |
@@ -1766,7 +1756,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address) | |||
1766 | /* | 1756 | /* |
1767 | * vma is the first one with address < vma->vm_start. Have to extend vma. | 1757 | * vma is the first one with address < vma->vm_start. Have to extend vma. |
1768 | */ | 1758 | */ |
1769 | static int expand_downwards(struct vm_area_struct *vma, | 1759 | int expand_downwards(struct vm_area_struct *vma, |
1770 | unsigned long address) | 1760 | unsigned long address) |
1771 | { | 1761 | { |
1772 | int error; | 1762 | int error; |
@@ -1798,22 +1788,21 @@ static int expand_downwards(struct vm_area_struct *vma, | |||
1798 | size = vma->vm_end - address; | 1788 | size = vma->vm_end - address; |
1799 | grow = (vma->vm_start - address) >> PAGE_SHIFT; | 1789 | grow = (vma->vm_start - address) >> PAGE_SHIFT; |
1800 | 1790 | ||
1801 | error = acct_stack_growth(vma, size, grow); | 1791 | error = -ENOMEM; |
1802 | if (!error) { | 1792 | if (grow <= vma->vm_pgoff) { |
1803 | vma->vm_start = address; | 1793 | error = acct_stack_growth(vma, size, grow); |
1804 | vma->vm_pgoff -= grow; | 1794 | if (!error) { |
1805 | perf_event_mmap(vma); | 1795 | vma->vm_start = address; |
1796 | vma->vm_pgoff -= grow; | ||
1797 | perf_event_mmap(vma); | ||
1798 | } | ||
1806 | } | 1799 | } |
1807 | } | 1800 | } |
1808 | vma_unlock_anon_vma(vma); | 1801 | vma_unlock_anon_vma(vma); |
1802 | khugepaged_enter_vma_merge(vma); | ||
1809 | return error; | 1803 | return error; |
1810 | } | 1804 | } |
1811 | 1805 | ||
1812 | int expand_stack_downwards(struct vm_area_struct *vma, unsigned long address) | ||
1813 | { | ||
1814 | return expand_downwards(vma, address); | ||
1815 | } | ||
1816 | |||
1817 | #ifdef CONFIG_STACK_GROWSUP | 1806 | #ifdef CONFIG_STACK_GROWSUP |
1818 | int expand_stack(struct vm_area_struct *vma, unsigned long address) | 1807 | int expand_stack(struct vm_area_struct *vma, unsigned long address) |
1819 | { | 1808 | { |
@@ -1896,17 +1885,17 @@ static void unmap_region(struct mm_struct *mm, | |||
1896 | unsigned long start, unsigned long end) | 1885 | unsigned long start, unsigned long end) |
1897 | { | 1886 | { |
1898 | struct vm_area_struct *next = prev? prev->vm_next: mm->mmap; | 1887 | struct vm_area_struct *next = prev? prev->vm_next: mm->mmap; |
1899 | struct mmu_gather *tlb; | 1888 | struct mmu_gather tlb; |
1900 | unsigned long nr_accounted = 0; | 1889 | unsigned long nr_accounted = 0; |
1901 | 1890 | ||
1902 | lru_add_drain(); | 1891 | lru_add_drain(); |
1903 | tlb = tlb_gather_mmu(mm, 0); | 1892 | tlb_gather_mmu(&tlb, mm, 0); |
1904 | update_hiwater_rss(mm); | 1893 | update_hiwater_rss(mm); |
1905 | unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL); | 1894 | unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL); |
1906 | vm_unacct_memory(nr_accounted); | 1895 | vm_unacct_memory(nr_accounted); |
1907 | free_pgtables(tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS, | 1896 | free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS, |
1908 | next? next->vm_start: 0); | 1897 | next ? next->vm_start : 0); |
1909 | tlb_finish_mmu(tlb, start, end); | 1898 | tlb_finish_mmu(&tlb, start, end); |
1910 | } | 1899 | } |
1911 | 1900 | ||
1912 | /* | 1901 | /* |
@@ -2048,9 +2037,10 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len) | |||
2048 | return -EINVAL; | 2037 | return -EINVAL; |
2049 | 2038 | ||
2050 | /* Find the first overlapping VMA */ | 2039 | /* Find the first overlapping VMA */ |
2051 | vma = find_vma_prev(mm, start, &prev); | 2040 | vma = find_vma(mm, start); |
2052 | if (!vma) | 2041 | if (!vma) |
2053 | return 0; | 2042 | return 0; |
2043 | prev = vma->vm_prev; | ||
2054 | /* we have start < vma->vm_end */ | 2044 | /* we have start < vma->vm_end */ |
2055 | 2045 | ||
2056 | /* if it doesn't overlap, we have nothing.. */ | 2046 | /* if it doesn't overlap, we have nothing.. */ |
@@ -2248,7 +2238,7 @@ EXPORT_SYMBOL(do_brk); | |||
2248 | /* Release all mmaps. */ | 2238 | /* Release all mmaps. */ |
2249 | void exit_mmap(struct mm_struct *mm) | 2239 | void exit_mmap(struct mm_struct *mm) |
2250 | { | 2240 | { |
2251 | struct mmu_gather *tlb; | 2241 | struct mmu_gather tlb; |
2252 | struct vm_area_struct *vma; | 2242 | struct vm_area_struct *vma; |
2253 | unsigned long nr_accounted = 0; | 2243 | unsigned long nr_accounted = 0; |
2254 | unsigned long end; | 2244 | unsigned long end; |
@@ -2273,14 +2263,14 @@ void exit_mmap(struct mm_struct *mm) | |||
2273 | 2263 | ||
2274 | lru_add_drain(); | 2264 | lru_add_drain(); |
2275 | flush_cache_mm(mm); | 2265 | flush_cache_mm(mm); |
2276 | tlb = tlb_gather_mmu(mm, 1); | 2266 | tlb_gather_mmu(&tlb, mm, 1); |
2277 | /* update_hiwater_rss(mm) here? but nobody should be looking */ | 2267 | /* update_hiwater_rss(mm) here? but nobody should be looking */ |
2278 | /* Use -1 here to ensure all VMAs in the mm are unmapped */ | 2268 | /* Use -1 here to ensure all VMAs in the mm are unmapped */ |
2279 | end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL); | 2269 | end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL); |
2280 | vm_unacct_memory(nr_accounted); | 2270 | vm_unacct_memory(nr_accounted); |
2281 | 2271 | ||
2282 | free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0); | 2272 | free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0); |
2283 | tlb_finish_mmu(tlb, 0, end); | 2273 | tlb_finish_mmu(&tlb, 0, end); |
2284 | 2274 | ||
2285 | /* | 2275 | /* |
2286 | * Walk the list again, actually closing and freeing it, | 2276 | * Walk the list again, actually closing and freeing it, |
@@ -2294,7 +2284,7 @@ void exit_mmap(struct mm_struct *mm) | |||
2294 | 2284 | ||
2295 | /* Insert vm structure into process list sorted by address | 2285 | /* Insert vm structure into process list sorted by address |
2296 | * and into the inode's i_mmap tree. If vm_file is non-NULL | 2286 | * and into the inode's i_mmap tree. If vm_file is non-NULL |
2297 | * then i_mmap_lock is taken here. | 2287 | * then i_mmap_mutex is taken here. |
2298 | */ | 2288 | */ |
2299 | int insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma) | 2289 | int insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma) |
2300 | { | 2290 | { |
@@ -2460,6 +2450,7 @@ int install_special_mapping(struct mm_struct *mm, | |||
2460 | unsigned long addr, unsigned long len, | 2450 | unsigned long addr, unsigned long len, |
2461 | unsigned long vm_flags, struct page **pages) | 2451 | unsigned long vm_flags, struct page **pages) |
2462 | { | 2452 | { |
2453 | int ret; | ||
2463 | struct vm_area_struct *vma; | 2454 | struct vm_area_struct *vma; |
2464 | 2455 | ||
2465 | vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); | 2456 | vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); |
@@ -2477,16 +2468,23 @@ int install_special_mapping(struct mm_struct *mm, | |||
2477 | vma->vm_ops = &special_mapping_vmops; | 2468 | vma->vm_ops = &special_mapping_vmops; |
2478 | vma->vm_private_data = pages; | 2469 | vma->vm_private_data = pages; |
2479 | 2470 | ||
2480 | if (unlikely(insert_vm_struct(mm, vma))) { | 2471 | ret = security_file_mmap(NULL, 0, 0, 0, vma->vm_start, 1); |
2481 | kmem_cache_free(vm_area_cachep, vma); | 2472 | if (ret) |
2482 | return -ENOMEM; | 2473 | goto out; |
2483 | } | 2474 | |
2475 | ret = insert_vm_struct(mm, vma); | ||
2476 | if (ret) | ||
2477 | goto out; | ||
2484 | 2478 | ||
2485 | mm->total_vm += len >> PAGE_SHIFT; | 2479 | mm->total_vm += len >> PAGE_SHIFT; |
2486 | 2480 | ||
2487 | perf_event_mmap(vma); | 2481 | perf_event_mmap(vma); |
2488 | 2482 | ||
2489 | return 0; | 2483 | return 0; |
2484 | |||
2485 | out: | ||
2486 | kmem_cache_free(vm_area_cachep, vma); | ||
2487 | return ret; | ||
2490 | } | 2488 | } |
2491 | 2489 | ||
2492 | static DEFINE_MUTEX(mm_all_locks_mutex); | 2490 | static DEFINE_MUTEX(mm_all_locks_mutex); |
@@ -2498,15 +2496,15 @@ static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma) | |||
2498 | * The LSB of head.next can't change from under us | 2496 | * The LSB of head.next can't change from under us |
2499 | * because we hold the mm_all_locks_mutex. | 2497 | * because we hold the mm_all_locks_mutex. |
2500 | */ | 2498 | */ |
2501 | spin_lock_nest_lock(&anon_vma->root->lock, &mm->mmap_sem); | 2499 | mutex_lock_nest_lock(&anon_vma->root->mutex, &mm->mmap_sem); |
2502 | /* | 2500 | /* |
2503 | * We can safely modify head.next after taking the | 2501 | * We can safely modify head.next after taking the |
2504 | * anon_vma->root->lock. If some other vma in this mm shares | 2502 | * anon_vma->root->mutex. If some other vma in this mm shares |
2505 | * the same anon_vma we won't take it again. | 2503 | * the same anon_vma we won't take it again. |
2506 | * | 2504 | * |
2507 | * No need of atomic instructions here, head.next | 2505 | * No need of atomic instructions here, head.next |
2508 | * can't change from under us thanks to the | 2506 | * can't change from under us thanks to the |
2509 | * anon_vma->root->lock. | 2507 | * anon_vma->root->mutex. |
2510 | */ | 2508 | */ |
2511 | if (__test_and_set_bit(0, (unsigned long *) | 2509 | if (__test_and_set_bit(0, (unsigned long *) |
2512 | &anon_vma->root->head.next)) | 2510 | &anon_vma->root->head.next)) |
@@ -2528,7 +2526,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping) | |||
2528 | */ | 2526 | */ |
2529 | if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags)) | 2527 | if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags)) |
2530 | BUG(); | 2528 | BUG(); |
2531 | spin_lock_nest_lock(&mapping->i_mmap_lock, &mm->mmap_sem); | 2529 | mutex_lock_nest_lock(&mapping->i_mmap_mutex, &mm->mmap_sem); |
2532 | } | 2530 | } |
2533 | } | 2531 | } |
2534 | 2532 | ||
@@ -2555,7 +2553,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping) | |||
2555 | * vma in this mm is backed by the same anon_vma or address_space. | 2553 | * vma in this mm is backed by the same anon_vma or address_space. |
2556 | * | 2554 | * |
2557 | * We can take all the locks in random order because the VM code | 2555 | * We can take all the locks in random order because the VM code |
2558 | * taking i_mmap_lock or anon_vma->lock outside the mmap_sem never | 2556 | * taking i_mmap_mutex or anon_vma->mutex outside the mmap_sem never |
2559 | * takes more than one of them in a row. Secondly we're protected | 2557 | * takes more than one of them in a row. Secondly we're protected |
2560 | * against a concurrent mm_take_all_locks() by the mm_all_locks_mutex. | 2558 | * against a concurrent mm_take_all_locks() by the mm_all_locks_mutex. |
2561 | * | 2559 | * |
@@ -2611,7 +2609,7 @@ static void vm_unlock_anon_vma(struct anon_vma *anon_vma) | |||
2611 | * | 2609 | * |
2612 | * No need of atomic instructions here, head.next | 2610 | * No need of atomic instructions here, head.next |
2613 | * can't change from under us until we release the | 2611 | * can't change from under us until we release the |
2614 | * anon_vma->root->lock. | 2612 | * anon_vma->root->mutex. |
2615 | */ | 2613 | */ |
2616 | if (!__test_and_clear_bit(0, (unsigned long *) | 2614 | if (!__test_and_clear_bit(0, (unsigned long *) |
2617 | &anon_vma->root->head.next)) | 2615 | &anon_vma->root->head.next)) |
@@ -2627,7 +2625,7 @@ static void vm_unlock_mapping(struct address_space *mapping) | |||
2627 | * AS_MM_ALL_LOCKS can't change to 0 from under us | 2625 | * AS_MM_ALL_LOCKS can't change to 0 from under us |
2628 | * because we hold the mm_all_locks_mutex. | 2626 | * because we hold the mm_all_locks_mutex. |
2629 | */ | 2627 | */ |
2630 | spin_unlock(&mapping->i_mmap_lock); | 2628 | mutex_unlock(&mapping->i_mmap_mutex); |
2631 | if (!test_and_clear_bit(AS_MM_ALL_LOCKS, | 2629 | if (!test_and_clear_bit(AS_MM_ALL_LOCKS, |
2632 | &mapping->flags)) | 2630 | &mapping->flags)) |
2633 | BUG(); | 2631 | BUG(); |