diff options
Diffstat (limited to 'mm/mmap.c')
| -rw-r--r-- | mm/mmap.c | 121 |
1 files changed, 83 insertions, 38 deletions
| @@ -32,6 +32,7 @@ | |||
| 32 | #include <linux/khugepaged.h> | 32 | #include <linux/khugepaged.h> |
| 33 | #include <linux/uprobes.h> | 33 | #include <linux/uprobes.h> |
| 34 | #include <linux/rbtree_augmented.h> | 34 | #include <linux/rbtree_augmented.h> |
| 35 | #include <linux/sched/sysctl.h> | ||
| 35 | 36 | ||
| 36 | #include <asm/uaccess.h> | 37 | #include <asm/uaccess.h> |
| 37 | #include <asm/cacheflush.h> | 38 | #include <asm/cacheflush.h> |
| @@ -143,7 +144,7 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin) | |||
| 143 | */ | 144 | */ |
| 144 | free -= global_page_state(NR_SHMEM); | 145 | free -= global_page_state(NR_SHMEM); |
| 145 | 146 | ||
| 146 | free += nr_swap_pages; | 147 | free += get_nr_swap_pages(); |
| 147 | 148 | ||
| 148 | /* | 149 | /* |
| 149 | * Any slabs which are created with the | 150 | * Any slabs which are created with the |
| @@ -202,7 +203,7 @@ static void __remove_shared_vm_struct(struct vm_area_struct *vma, | |||
| 202 | struct file *file, struct address_space *mapping) | 203 | struct file *file, struct address_space *mapping) |
| 203 | { | 204 | { |
| 204 | if (vma->vm_flags & VM_DENYWRITE) | 205 | if (vma->vm_flags & VM_DENYWRITE) |
| 205 | atomic_inc(&file->f_path.dentry->d_inode->i_writecount); | 206 | atomic_inc(&file_inode(file)->i_writecount); |
| 206 | if (vma->vm_flags & VM_SHARED) | 207 | if (vma->vm_flags & VM_SHARED) |
| 207 | mapping->i_mmap_writable--; | 208 | mapping->i_mmap_writable--; |
| 208 | 209 | ||
| @@ -255,6 +256,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) | |||
| 255 | unsigned long newbrk, oldbrk; | 256 | unsigned long newbrk, oldbrk; |
| 256 | struct mm_struct *mm = current->mm; | 257 | struct mm_struct *mm = current->mm; |
| 257 | unsigned long min_brk; | 258 | unsigned long min_brk; |
| 259 | bool populate; | ||
| 258 | 260 | ||
| 259 | down_write(&mm->mmap_sem); | 261 | down_write(&mm->mmap_sem); |
| 260 | 262 | ||
| @@ -304,8 +306,15 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) | |||
| 304 | /* Ok, looks good - let it rip. */ | 306 | /* Ok, looks good - let it rip. */ |
| 305 | if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk) | 307 | if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk) |
| 306 | goto out; | 308 | goto out; |
| 309 | |||
| 307 | set_brk: | 310 | set_brk: |
| 308 | mm->brk = brk; | 311 | mm->brk = brk; |
| 312 | populate = newbrk > oldbrk && (mm->def_flags & VM_LOCKED) != 0; | ||
| 313 | up_write(&mm->mmap_sem); | ||
| 314 | if (populate) | ||
| 315 | mm_populate(oldbrk, newbrk - oldbrk); | ||
| 316 | return brk; | ||
| 317 | |||
| 309 | out: | 318 | out: |
| 310 | retval = mm->brk; | 319 | retval = mm->brk; |
| 311 | up_write(&mm->mmap_sem); | 320 | up_write(&mm->mmap_sem); |
| @@ -567,7 +576,7 @@ static void __vma_link_file(struct vm_area_struct *vma) | |||
| 567 | struct address_space *mapping = file->f_mapping; | 576 | struct address_space *mapping = file->f_mapping; |
| 568 | 577 | ||
| 569 | if (vma->vm_flags & VM_DENYWRITE) | 578 | if (vma->vm_flags & VM_DENYWRITE) |
| 570 | atomic_dec(&file->f_path.dentry->d_inode->i_writecount); | 579 | atomic_dec(&file_inode(file)->i_writecount); |
| 571 | if (vma->vm_flags & VM_SHARED) | 580 | if (vma->vm_flags & VM_SHARED) |
| 572 | mapping->i_mmap_writable++; | 581 | mapping->i_mmap_writable++; |
| 573 | 582 | ||
| @@ -800,7 +809,7 @@ again: remove_next = 1 + (end > next->vm_end); | |||
| 800 | anon_vma_interval_tree_post_update_vma(vma); | 809 | anon_vma_interval_tree_post_update_vma(vma); |
| 801 | if (adjust_next) | 810 | if (adjust_next) |
| 802 | anon_vma_interval_tree_post_update_vma(next); | 811 | anon_vma_interval_tree_post_update_vma(next); |
| 803 | anon_vma_unlock(anon_vma); | 812 | anon_vma_unlock_write(anon_vma); |
| 804 | } | 813 | } |
| 805 | if (mapping) | 814 | if (mapping) |
| 806 | mutex_unlock(&mapping->i_mmap_mutex); | 815 | mutex_unlock(&mapping->i_mmap_mutex); |
| @@ -1153,12 +1162,15 @@ static inline unsigned long round_hint_to_min(unsigned long hint) | |||
| 1153 | 1162 | ||
| 1154 | unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, | 1163 | unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, |
| 1155 | unsigned long len, unsigned long prot, | 1164 | unsigned long len, unsigned long prot, |
| 1156 | unsigned long flags, unsigned long pgoff) | 1165 | unsigned long flags, unsigned long pgoff, |
| 1166 | unsigned long *populate) | ||
| 1157 | { | 1167 | { |
| 1158 | struct mm_struct * mm = current->mm; | 1168 | struct mm_struct * mm = current->mm; |
| 1159 | struct inode *inode; | 1169 | struct inode *inode; |
| 1160 | vm_flags_t vm_flags; | 1170 | vm_flags_t vm_flags; |
| 1161 | 1171 | ||
| 1172 | *populate = 0; | ||
| 1173 | |||
| 1162 | /* | 1174 | /* |
| 1163 | * Does the application expect PROT_READ to imply PROT_EXEC? | 1175 | * Does the application expect PROT_READ to imply PROT_EXEC? |
| 1164 | * | 1176 | * |
| @@ -1217,7 +1229,7 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, | |||
| 1217 | return -EAGAIN; | 1229 | return -EAGAIN; |
| 1218 | } | 1230 | } |
| 1219 | 1231 | ||
| 1220 | inode = file ? file->f_path.dentry->d_inode : NULL; | 1232 | inode = file ? file_inode(file) : NULL; |
| 1221 | 1233 | ||
| 1222 | if (file) { | 1234 | if (file) { |
| 1223 | switch (flags & MAP_TYPE) { | 1235 | switch (flags & MAP_TYPE) { |
| @@ -1279,7 +1291,26 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, | |||
| 1279 | } | 1291 | } |
| 1280 | } | 1292 | } |
| 1281 | 1293 | ||
| 1282 | return mmap_region(file, addr, len, flags, vm_flags, pgoff); | 1294 | /* |
| 1295 | * Set 'VM_NORESERVE' if we should not account for the | ||
| 1296 | * memory use of this mapping. | ||
| 1297 | */ | ||
| 1298 | if (flags & MAP_NORESERVE) { | ||
| 1299 | /* We honor MAP_NORESERVE if allowed to overcommit */ | ||
| 1300 | if (sysctl_overcommit_memory != OVERCOMMIT_NEVER) | ||
| 1301 | vm_flags |= VM_NORESERVE; | ||
| 1302 | |||
| 1303 | /* hugetlb applies strict overcommit unless MAP_NORESERVE */ | ||
| 1304 | if (file && is_file_hugepages(file)) | ||
| 1305 | vm_flags |= VM_NORESERVE; | ||
| 1306 | } | ||
| 1307 | |||
| 1308 | addr = mmap_region(file, addr, len, vm_flags, pgoff); | ||
| 1309 | if (!IS_ERR_VALUE(addr) && | ||
| 1310 | ((vm_flags & VM_LOCKED) || | ||
| 1311 | (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE)) | ||
| 1312 | *populate = len; | ||
| 1313 | return addr; | ||
| 1283 | } | 1314 | } |
| 1284 | 1315 | ||
| 1285 | SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, | 1316 | SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, |
| @@ -1394,8 +1425,7 @@ static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags) | |||
| 1394 | } | 1425 | } |
| 1395 | 1426 | ||
| 1396 | unsigned long mmap_region(struct file *file, unsigned long addr, | 1427 | unsigned long mmap_region(struct file *file, unsigned long addr, |
| 1397 | unsigned long len, unsigned long flags, | 1428 | unsigned long len, vm_flags_t vm_flags, unsigned long pgoff) |
| 1398 | vm_flags_t vm_flags, unsigned long pgoff) | ||
| 1399 | { | 1429 | { |
| 1400 | struct mm_struct *mm = current->mm; | 1430 | struct mm_struct *mm = current->mm; |
| 1401 | struct vm_area_struct *vma, *prev; | 1431 | struct vm_area_struct *vma, *prev; |
| @@ -1403,7 +1433,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, | |||
| 1403 | int error; | 1433 | int error; |
| 1404 | struct rb_node **rb_link, *rb_parent; | 1434 | struct rb_node **rb_link, *rb_parent; |
| 1405 | unsigned long charged = 0; | 1435 | unsigned long charged = 0; |
| 1406 | struct inode *inode = file ? file->f_path.dentry->d_inode : NULL; | 1436 | struct inode *inode = file ? file_inode(file) : NULL; |
| 1407 | 1437 | ||
| 1408 | /* Clear old maps */ | 1438 | /* Clear old maps */ |
| 1409 | error = -ENOMEM; | 1439 | error = -ENOMEM; |
| @@ -1419,20 +1449,6 @@ munmap_back: | |||
| 1419 | return -ENOMEM; | 1449 | return -ENOMEM; |
| 1420 | 1450 | ||
| 1421 | /* | 1451 | /* |
| 1422 | * Set 'VM_NORESERVE' if we should not account for the | ||
| 1423 | * memory use of this mapping. | ||
| 1424 | */ | ||
| 1425 | if ((flags & MAP_NORESERVE)) { | ||
| 1426 | /* We honor MAP_NORESERVE if allowed to overcommit */ | ||
| 1427 | if (sysctl_overcommit_memory != OVERCOMMIT_NEVER) | ||
| 1428 | vm_flags |= VM_NORESERVE; | ||
| 1429 | |||
| 1430 | /* hugetlb applies strict overcommit unless MAP_NORESERVE */ | ||
| 1431 | if (file && is_file_hugepages(file)) | ||
| 1432 | vm_flags |= VM_NORESERVE; | ||
| 1433 | } | ||
| 1434 | |||
| 1435 | /* | ||
| 1436 | * Private writable mapping: check memory availability | 1452 | * Private writable mapping: check memory availability |
| 1437 | */ | 1453 | */ |
| 1438 | if (accountable_mapping(file, vm_flags)) { | 1454 | if (accountable_mapping(file, vm_flags)) { |
| @@ -1530,10 +1546,12 @@ out: | |||
| 1530 | 1546 | ||
| 1531 | vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT); | 1547 | vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT); |
| 1532 | if (vm_flags & VM_LOCKED) { | 1548 | if (vm_flags & VM_LOCKED) { |
| 1533 | if (!mlock_vma_pages_range(vma, addr, addr + len)) | 1549 | if (!((vm_flags & VM_SPECIAL) || is_vm_hugetlb_page(vma) || |
| 1550 | vma == get_gate_vma(current->mm))) | ||
| 1534 | mm->locked_vm += (len >> PAGE_SHIFT); | 1551 | mm->locked_vm += (len >> PAGE_SHIFT); |
| 1535 | } else if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK)) | 1552 | else |
| 1536 | make_pages_present(addr, addr + len); | 1553 | vma->vm_flags &= ~VM_LOCKED; |
| 1554 | } | ||
| 1537 | 1555 | ||
| 1538 | if (file) | 1556 | if (file) |
| 1539 | uprobe_mmap(vma); | 1557 | uprobe_mmap(vma); |
| @@ -2169,9 +2187,28 @@ int expand_downwards(struct vm_area_struct *vma, | |||
| 2169 | return error; | 2187 | return error; |
| 2170 | } | 2188 | } |
| 2171 | 2189 | ||
| 2190 | /* | ||
| 2191 | * Note how expand_stack() refuses to expand the stack all the way to | ||
| 2192 | * abut the next virtual mapping, *unless* that mapping itself is also | ||
| 2193 | * a stack mapping. We want to leave room for a guard page, after all | ||
| 2194 | * (the guard page itself is not added here, that is done by the | ||
| 2195 | * actual page faulting logic) | ||
| 2196 | * | ||
| 2197 | * This matches the behavior of the guard page logic (see mm/memory.c: | ||
| 2198 | * check_stack_guard_page()), which only allows the guard page to be | ||
| 2199 | * removed under these circumstances. | ||
| 2200 | */ | ||
| 2172 | #ifdef CONFIG_STACK_GROWSUP | 2201 | #ifdef CONFIG_STACK_GROWSUP |
| 2173 | int expand_stack(struct vm_area_struct *vma, unsigned long address) | 2202 | int expand_stack(struct vm_area_struct *vma, unsigned long address) |
| 2174 | { | 2203 | { |
| 2204 | struct vm_area_struct *next; | ||
| 2205 | |||
| 2206 | address &= PAGE_MASK; | ||
| 2207 | next = vma->vm_next; | ||
| 2208 | if (next && next->vm_start == address + PAGE_SIZE) { | ||
| 2209 | if (!(next->vm_flags & VM_GROWSUP)) | ||
| 2210 | return -ENOMEM; | ||
| 2211 | } | ||
| 2175 | return expand_upwards(vma, address); | 2212 | return expand_upwards(vma, address); |
| 2176 | } | 2213 | } |
| 2177 | 2214 | ||
| @@ -2186,14 +2223,21 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr) | |||
| 2186 | return vma; | 2223 | return vma; |
| 2187 | if (!prev || expand_stack(prev, addr)) | 2224 | if (!prev || expand_stack(prev, addr)) |
| 2188 | return NULL; | 2225 | return NULL; |
| 2189 | if (prev->vm_flags & VM_LOCKED) { | 2226 | if (prev->vm_flags & VM_LOCKED) |
| 2190 | mlock_vma_pages_range(prev, addr, prev->vm_end); | 2227 | __mlock_vma_pages_range(prev, addr, prev->vm_end, NULL); |
| 2191 | } | ||
| 2192 | return prev; | 2228 | return prev; |
| 2193 | } | 2229 | } |
| 2194 | #else | 2230 | #else |
| 2195 | int expand_stack(struct vm_area_struct *vma, unsigned long address) | 2231 | int expand_stack(struct vm_area_struct *vma, unsigned long address) |
| 2196 | { | 2232 | { |
| 2233 | struct vm_area_struct *prev; | ||
| 2234 | |||
| 2235 | address &= PAGE_MASK; | ||
| 2236 | prev = vma->vm_prev; | ||
| 2237 | if (prev && prev->vm_end == address) { | ||
| 2238 | if (!(prev->vm_flags & VM_GROWSDOWN)) | ||
| 2239 | return -ENOMEM; | ||
| 2240 | } | ||
| 2197 | return expand_downwards(vma, address); | 2241 | return expand_downwards(vma, address); |
| 2198 | } | 2242 | } |
| 2199 | 2243 | ||
| @@ -2214,9 +2258,8 @@ find_extend_vma(struct mm_struct * mm, unsigned long addr) | |||
| 2214 | start = vma->vm_start; | 2258 | start = vma->vm_start; |
| 2215 | if (expand_stack(vma, addr)) | 2259 | if (expand_stack(vma, addr)) |
| 2216 | return NULL; | 2260 | return NULL; |
| 2217 | if (vma->vm_flags & VM_LOCKED) { | 2261 | if (vma->vm_flags & VM_LOCKED) |
| 2218 | mlock_vma_pages_range(vma, addr, start); | 2262 | __mlock_vma_pages_range(vma, addr, start, NULL); |
| 2219 | } | ||
| 2220 | return vma; | 2263 | return vma; |
| 2221 | } | 2264 | } |
| 2222 | #endif | 2265 | #endif |
| @@ -2589,10 +2632,8 @@ static unsigned long do_brk(unsigned long addr, unsigned long len) | |||
| 2589 | out: | 2632 | out: |
| 2590 | perf_event_mmap(vma); | 2633 | perf_event_mmap(vma); |
| 2591 | mm->total_vm += len >> PAGE_SHIFT; | 2634 | mm->total_vm += len >> PAGE_SHIFT; |
| 2592 | if (flags & VM_LOCKED) { | 2635 | if (flags & VM_LOCKED) |
| 2593 | if (!mlock_vma_pages_range(vma, addr, addr + len)) | 2636 | mm->locked_vm += (len >> PAGE_SHIFT); |
| 2594 | mm->locked_vm += (len >> PAGE_SHIFT); | ||
| 2595 | } | ||
| 2596 | return addr; | 2637 | return addr; |
| 2597 | } | 2638 | } |
| 2598 | 2639 | ||
| @@ -2600,10 +2641,14 @@ unsigned long vm_brk(unsigned long addr, unsigned long len) | |||
| 2600 | { | 2641 | { |
| 2601 | struct mm_struct *mm = current->mm; | 2642 | struct mm_struct *mm = current->mm; |
| 2602 | unsigned long ret; | 2643 | unsigned long ret; |
| 2644 | bool populate; | ||
| 2603 | 2645 | ||
| 2604 | down_write(&mm->mmap_sem); | 2646 | down_write(&mm->mmap_sem); |
| 2605 | ret = do_brk(addr, len); | 2647 | ret = do_brk(addr, len); |
| 2648 | populate = ((mm->def_flags & VM_LOCKED) != 0); | ||
| 2606 | up_write(&mm->mmap_sem); | 2649 | up_write(&mm->mmap_sem); |
| 2650 | if (populate) | ||
| 2651 | mm_populate(addr, len); | ||
| 2607 | return ret; | 2652 | return ret; |
| 2608 | } | 2653 | } |
| 2609 | EXPORT_SYMBOL(vm_brk); | 2654 | EXPORT_SYMBOL(vm_brk); |
| @@ -3001,7 +3046,7 @@ static void vm_unlock_anon_vma(struct anon_vma *anon_vma) | |||
| 3001 | if (!__test_and_clear_bit(0, (unsigned long *) | 3046 | if (!__test_and_clear_bit(0, (unsigned long *) |
| 3002 | &anon_vma->root->rb_root.rb_node)) | 3047 | &anon_vma->root->rb_root.rb_node)) |
| 3003 | BUG(); | 3048 | BUG(); |
| 3004 | anon_vma_unlock(anon_vma); | 3049 | anon_vma_unlock_write(anon_vma); |
| 3005 | } | 3050 | } |
| 3006 | } | 3051 | } |
| 3007 | 3052 | ||
