aboutsummaryrefslogtreecommitdiffstats
path: root/mm/mmap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/mmap.c')
-rw-r--r--mm/mmap.c121
1 files changed, 83 insertions, 38 deletions
diff --git a/mm/mmap.c b/mm/mmap.c
index d1e4124f3d0e..6466699b16cb 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -32,6 +32,7 @@
32#include <linux/khugepaged.h> 32#include <linux/khugepaged.h>
33#include <linux/uprobes.h> 33#include <linux/uprobes.h>
34#include <linux/rbtree_augmented.h> 34#include <linux/rbtree_augmented.h>
35#include <linux/sched/sysctl.h>
35 36
36#include <asm/uaccess.h> 37#include <asm/uaccess.h>
37#include <asm/cacheflush.h> 38#include <asm/cacheflush.h>
@@ -143,7 +144,7 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
143 */ 144 */
144 free -= global_page_state(NR_SHMEM); 145 free -= global_page_state(NR_SHMEM);
145 146
146 free += nr_swap_pages; 147 free += get_nr_swap_pages();
147 148
148 /* 149 /*
149 * Any slabs which are created with the 150 * Any slabs which are created with the
@@ -202,7 +203,7 @@ static void __remove_shared_vm_struct(struct vm_area_struct *vma,
202 struct file *file, struct address_space *mapping) 203 struct file *file, struct address_space *mapping)
203{ 204{
204 if (vma->vm_flags & VM_DENYWRITE) 205 if (vma->vm_flags & VM_DENYWRITE)
205 atomic_inc(&file->f_path.dentry->d_inode->i_writecount); 206 atomic_inc(&file_inode(file)->i_writecount);
206 if (vma->vm_flags & VM_SHARED) 207 if (vma->vm_flags & VM_SHARED)
207 mapping->i_mmap_writable--; 208 mapping->i_mmap_writable--;
208 209
@@ -255,6 +256,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
255 unsigned long newbrk, oldbrk; 256 unsigned long newbrk, oldbrk;
256 struct mm_struct *mm = current->mm; 257 struct mm_struct *mm = current->mm;
257 unsigned long min_brk; 258 unsigned long min_brk;
259 bool populate;
258 260
259 down_write(&mm->mmap_sem); 261 down_write(&mm->mmap_sem);
260 262
@@ -304,8 +306,15 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
304 /* Ok, looks good - let it rip. */ 306 /* Ok, looks good - let it rip. */
305 if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk) 307 if (do_brk(oldbrk, newbrk-oldbrk) != oldbrk)
306 goto out; 308 goto out;
309
307set_brk: 310set_brk:
308 mm->brk = brk; 311 mm->brk = brk;
312 populate = newbrk > oldbrk && (mm->def_flags & VM_LOCKED) != 0;
313 up_write(&mm->mmap_sem);
314 if (populate)
315 mm_populate(oldbrk, newbrk - oldbrk);
316 return brk;
317
309out: 318out:
310 retval = mm->brk; 319 retval = mm->brk;
311 up_write(&mm->mmap_sem); 320 up_write(&mm->mmap_sem);
@@ -567,7 +576,7 @@ static void __vma_link_file(struct vm_area_struct *vma)
567 struct address_space *mapping = file->f_mapping; 576 struct address_space *mapping = file->f_mapping;
568 577
569 if (vma->vm_flags & VM_DENYWRITE) 578 if (vma->vm_flags & VM_DENYWRITE)
570 atomic_dec(&file->f_path.dentry->d_inode->i_writecount); 579 atomic_dec(&file_inode(file)->i_writecount);
571 if (vma->vm_flags & VM_SHARED) 580 if (vma->vm_flags & VM_SHARED)
572 mapping->i_mmap_writable++; 581 mapping->i_mmap_writable++;
573 582
@@ -800,7 +809,7 @@ again: remove_next = 1 + (end > next->vm_end);
800 anon_vma_interval_tree_post_update_vma(vma); 809 anon_vma_interval_tree_post_update_vma(vma);
801 if (adjust_next) 810 if (adjust_next)
802 anon_vma_interval_tree_post_update_vma(next); 811 anon_vma_interval_tree_post_update_vma(next);
803 anon_vma_unlock(anon_vma); 812 anon_vma_unlock_write(anon_vma);
804 } 813 }
805 if (mapping) 814 if (mapping)
806 mutex_unlock(&mapping->i_mmap_mutex); 815 mutex_unlock(&mapping->i_mmap_mutex);
@@ -1153,12 +1162,15 @@ static inline unsigned long round_hint_to_min(unsigned long hint)
1153 1162
1154unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, 1163unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
1155 unsigned long len, unsigned long prot, 1164 unsigned long len, unsigned long prot,
1156 unsigned long flags, unsigned long pgoff) 1165 unsigned long flags, unsigned long pgoff,
1166 unsigned long *populate)
1157{ 1167{
1158 struct mm_struct * mm = current->mm; 1168 struct mm_struct * mm = current->mm;
1159 struct inode *inode; 1169 struct inode *inode;
1160 vm_flags_t vm_flags; 1170 vm_flags_t vm_flags;
1161 1171
1172 *populate = 0;
1173
1162 /* 1174 /*
1163 * Does the application expect PROT_READ to imply PROT_EXEC? 1175 * Does the application expect PROT_READ to imply PROT_EXEC?
1164 * 1176 *
@@ -1217,7 +1229,7 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
1217 return -EAGAIN; 1229 return -EAGAIN;
1218 } 1230 }
1219 1231
1220 inode = file ? file->f_path.dentry->d_inode : NULL; 1232 inode = file ? file_inode(file) : NULL;
1221 1233
1222 if (file) { 1234 if (file) {
1223 switch (flags & MAP_TYPE) { 1235 switch (flags & MAP_TYPE) {
@@ -1279,7 +1291,26 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
1279 } 1291 }
1280 } 1292 }
1281 1293
1282 return mmap_region(file, addr, len, flags, vm_flags, pgoff); 1294 /*
1295 * Set 'VM_NORESERVE' if we should not account for the
1296 * memory use of this mapping.
1297 */
1298 if (flags & MAP_NORESERVE) {
1299 /* We honor MAP_NORESERVE if allowed to overcommit */
1300 if (sysctl_overcommit_memory != OVERCOMMIT_NEVER)
1301 vm_flags |= VM_NORESERVE;
1302
1303 /* hugetlb applies strict overcommit unless MAP_NORESERVE */
1304 if (file && is_file_hugepages(file))
1305 vm_flags |= VM_NORESERVE;
1306 }
1307
1308 addr = mmap_region(file, addr, len, vm_flags, pgoff);
1309 if (!IS_ERR_VALUE(addr) &&
1310 ((vm_flags & VM_LOCKED) ||
1311 (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE))
1312 *populate = len;
1313 return addr;
1283} 1314}
1284 1315
1285SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, 1316SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
@@ -1394,8 +1425,7 @@ static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags)
1394} 1425}
1395 1426
1396unsigned long mmap_region(struct file *file, unsigned long addr, 1427unsigned long mmap_region(struct file *file, unsigned long addr,
1397 unsigned long len, unsigned long flags, 1428 unsigned long len, vm_flags_t vm_flags, unsigned long pgoff)
1398 vm_flags_t vm_flags, unsigned long pgoff)
1399{ 1429{
1400 struct mm_struct *mm = current->mm; 1430 struct mm_struct *mm = current->mm;
1401 struct vm_area_struct *vma, *prev; 1431 struct vm_area_struct *vma, *prev;
@@ -1403,7 +1433,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
1403 int error; 1433 int error;
1404 struct rb_node **rb_link, *rb_parent; 1434 struct rb_node **rb_link, *rb_parent;
1405 unsigned long charged = 0; 1435 unsigned long charged = 0;
1406 struct inode *inode = file ? file->f_path.dentry->d_inode : NULL; 1436 struct inode *inode = file ? file_inode(file) : NULL;
1407 1437
1408 /* Clear old maps */ 1438 /* Clear old maps */
1409 error = -ENOMEM; 1439 error = -ENOMEM;
@@ -1419,20 +1449,6 @@ munmap_back:
1419 return -ENOMEM; 1449 return -ENOMEM;
1420 1450
1421 /* 1451 /*
1422 * Set 'VM_NORESERVE' if we should not account for the
1423 * memory use of this mapping.
1424 */
1425 if ((flags & MAP_NORESERVE)) {
1426 /* We honor MAP_NORESERVE if allowed to overcommit */
1427 if (sysctl_overcommit_memory != OVERCOMMIT_NEVER)
1428 vm_flags |= VM_NORESERVE;
1429
1430 /* hugetlb applies strict overcommit unless MAP_NORESERVE */
1431 if (file && is_file_hugepages(file))
1432 vm_flags |= VM_NORESERVE;
1433 }
1434
1435 /*
1436 * Private writable mapping: check memory availability 1452 * Private writable mapping: check memory availability
1437 */ 1453 */
1438 if (accountable_mapping(file, vm_flags)) { 1454 if (accountable_mapping(file, vm_flags)) {
@@ -1530,10 +1546,12 @@ out:
1530 1546
1531 vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT); 1547 vm_stat_account(mm, vm_flags, file, len >> PAGE_SHIFT);
1532 if (vm_flags & VM_LOCKED) { 1548 if (vm_flags & VM_LOCKED) {
1533 if (!mlock_vma_pages_range(vma, addr, addr + len)) 1549 if (!((vm_flags & VM_SPECIAL) || is_vm_hugetlb_page(vma) ||
1550 vma == get_gate_vma(current->mm)))
1534 mm->locked_vm += (len >> PAGE_SHIFT); 1551 mm->locked_vm += (len >> PAGE_SHIFT);
1535 } else if ((flags & MAP_POPULATE) && !(flags & MAP_NONBLOCK)) 1552 else
1536 make_pages_present(addr, addr + len); 1553 vma->vm_flags &= ~VM_LOCKED;
1554 }
1537 1555
1538 if (file) 1556 if (file)
1539 uprobe_mmap(vma); 1557 uprobe_mmap(vma);
@@ -2169,9 +2187,28 @@ int expand_downwards(struct vm_area_struct *vma,
2169 return error; 2187 return error;
2170} 2188}
2171 2189
2190/*
2191 * Note how expand_stack() refuses to expand the stack all the way to
2192 * abut the next virtual mapping, *unless* that mapping itself is also
2193 * a stack mapping. We want to leave room for a guard page, after all
2194 * (the guard page itself is not added here, that is done by the
2195 * actual page faulting logic)
2196 *
2197 * This matches the behavior of the guard page logic (see mm/memory.c:
2198 * check_stack_guard_page()), which only allows the guard page to be
2199 * removed under these circumstances.
2200 */
2172#ifdef CONFIG_STACK_GROWSUP 2201#ifdef CONFIG_STACK_GROWSUP
2173int expand_stack(struct vm_area_struct *vma, unsigned long address) 2202int expand_stack(struct vm_area_struct *vma, unsigned long address)
2174{ 2203{
2204 struct vm_area_struct *next;
2205
2206 address &= PAGE_MASK;
2207 next = vma->vm_next;
2208 if (next && next->vm_start == address + PAGE_SIZE) {
2209 if (!(next->vm_flags & VM_GROWSUP))
2210 return -ENOMEM;
2211 }
2175 return expand_upwards(vma, address); 2212 return expand_upwards(vma, address);
2176} 2213}
2177 2214
@@ -2186,14 +2223,21 @@ find_extend_vma(struct mm_struct *mm, unsigned long addr)
2186 return vma; 2223 return vma;
2187 if (!prev || expand_stack(prev, addr)) 2224 if (!prev || expand_stack(prev, addr))
2188 return NULL; 2225 return NULL;
2189 if (prev->vm_flags & VM_LOCKED) { 2226 if (prev->vm_flags & VM_LOCKED)
2190 mlock_vma_pages_range(prev, addr, prev->vm_end); 2227 __mlock_vma_pages_range(prev, addr, prev->vm_end, NULL);
2191 }
2192 return prev; 2228 return prev;
2193} 2229}
2194#else 2230#else
2195int expand_stack(struct vm_area_struct *vma, unsigned long address) 2231int expand_stack(struct vm_area_struct *vma, unsigned long address)
2196{ 2232{
2233 struct vm_area_struct *prev;
2234
2235 address &= PAGE_MASK;
2236 prev = vma->vm_prev;
2237 if (prev && prev->vm_end == address) {
2238 if (!(prev->vm_flags & VM_GROWSDOWN))
2239 return -ENOMEM;
2240 }
2197 return expand_downwards(vma, address); 2241 return expand_downwards(vma, address);
2198} 2242}
2199 2243
@@ -2214,9 +2258,8 @@ find_extend_vma(struct mm_struct * mm, unsigned long addr)
2214 start = vma->vm_start; 2258 start = vma->vm_start;
2215 if (expand_stack(vma, addr)) 2259 if (expand_stack(vma, addr))
2216 return NULL; 2260 return NULL;
2217 if (vma->vm_flags & VM_LOCKED) { 2261 if (vma->vm_flags & VM_LOCKED)
2218 mlock_vma_pages_range(vma, addr, start); 2262 __mlock_vma_pages_range(vma, addr, start, NULL);
2219 }
2220 return vma; 2263 return vma;
2221} 2264}
2222#endif 2265#endif
@@ -2589,10 +2632,8 @@ static unsigned long do_brk(unsigned long addr, unsigned long len)
2589out: 2632out:
2590 perf_event_mmap(vma); 2633 perf_event_mmap(vma);
2591 mm->total_vm += len >> PAGE_SHIFT; 2634 mm->total_vm += len >> PAGE_SHIFT;
2592 if (flags & VM_LOCKED) { 2635 if (flags & VM_LOCKED)
2593 if (!mlock_vma_pages_range(vma, addr, addr + len)) 2636 mm->locked_vm += (len >> PAGE_SHIFT);
2594 mm->locked_vm += (len >> PAGE_SHIFT);
2595 }
2596 return addr; 2637 return addr;
2597} 2638}
2598 2639
@@ -2600,10 +2641,14 @@ unsigned long vm_brk(unsigned long addr, unsigned long len)
2600{ 2641{
2601 struct mm_struct *mm = current->mm; 2642 struct mm_struct *mm = current->mm;
2602 unsigned long ret; 2643 unsigned long ret;
2644 bool populate;
2603 2645
2604 down_write(&mm->mmap_sem); 2646 down_write(&mm->mmap_sem);
2605 ret = do_brk(addr, len); 2647 ret = do_brk(addr, len);
2648 populate = ((mm->def_flags & VM_LOCKED) != 0);
2606 up_write(&mm->mmap_sem); 2649 up_write(&mm->mmap_sem);
2650 if (populate)
2651 mm_populate(addr, len);
2607 return ret; 2652 return ret;
2608} 2653}
2609EXPORT_SYMBOL(vm_brk); 2654EXPORT_SYMBOL(vm_brk);
@@ -3001,7 +3046,7 @@ static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
3001 if (!__test_and_clear_bit(0, (unsigned long *) 3046 if (!__test_and_clear_bit(0, (unsigned long *)
3002 &anon_vma->root->rb_root.rb_node)) 3047 &anon_vma->root->rb_root.rb_node))
3003 BUG(); 3048 BUG();
3004 anon_vma_unlock(anon_vma); 3049 anon_vma_unlock_write(anon_vma);
3005 } 3050 }
3006} 3051}
3007 3052