diff options
Diffstat (limited to 'mm')
| -rw-r--r-- | mm/filemap.c | 51 | ||||
| -rw-r--r-- | mm/kmemleak.c | 4 | ||||
| -rw-r--r-- | mm/memcontrol.c | 6 | ||||
| -rw-r--r-- | mm/memory-failure.c | 2 | ||||
| -rw-r--r-- | mm/mmap.c | 42 | ||||
| -rw-r--r-- | mm/mremap.c | 241 | ||||
| -rw-r--r-- | mm/slab.c | 118 | ||||
| -rw-r--r-- | mm/slub.c | 20 | ||||
| -rw-r--r-- | mm/truncate.c | 2 | ||||
| -rw-r--r-- | mm/util.c | 44 |
10 files changed, 320 insertions, 210 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index ef169f37156d..8b4d88f9249e 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
| @@ -260,27 +260,27 @@ int filemap_flush(struct address_space *mapping) | |||
| 260 | EXPORT_SYMBOL(filemap_flush); | 260 | EXPORT_SYMBOL(filemap_flush); |
| 261 | 261 | ||
| 262 | /** | 262 | /** |
| 263 | * wait_on_page_writeback_range - wait for writeback to complete | 263 | * filemap_fdatawait_range - wait for writeback to complete |
| 264 | * @mapping: target address_space | 264 | * @mapping: address space structure to wait for |
| 265 | * @start: beginning page index | 265 | * @start_byte: offset in bytes where the range starts |
| 266 | * @end: ending page index | 266 | * @end_byte: offset in bytes where the range ends (inclusive) |
| 267 | * | 267 | * |
| 268 | * Wait for writeback to complete against pages indexed by start->end | 268 | * Walk the list of under-writeback pages of the given address space |
| 269 | * inclusive | 269 | * in the given range and wait for all of them. |
| 270 | */ | 270 | */ |
| 271 | int wait_on_page_writeback_range(struct address_space *mapping, | 271 | int filemap_fdatawait_range(struct address_space *mapping, loff_t start_byte, |
| 272 | pgoff_t start, pgoff_t end) | 272 | loff_t end_byte) |
| 273 | { | 273 | { |
| 274 | pgoff_t index = start_byte >> PAGE_CACHE_SHIFT; | ||
| 275 | pgoff_t end = end_byte >> PAGE_CACHE_SHIFT; | ||
| 274 | struct pagevec pvec; | 276 | struct pagevec pvec; |
| 275 | int nr_pages; | 277 | int nr_pages; |
| 276 | int ret = 0; | 278 | int ret = 0; |
| 277 | pgoff_t index; | ||
| 278 | 279 | ||
| 279 | if (end < start) | 280 | if (end_byte < start_byte) |
| 280 | return 0; | 281 | return 0; |
| 281 | 282 | ||
| 282 | pagevec_init(&pvec, 0); | 283 | pagevec_init(&pvec, 0); |
| 283 | index = start; | ||
| 284 | while ((index <= end) && | 284 | while ((index <= end) && |
| 285 | (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, | 285 | (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, |
| 286 | PAGECACHE_TAG_WRITEBACK, | 286 | PAGECACHE_TAG_WRITEBACK, |
| @@ -310,25 +310,6 @@ int wait_on_page_writeback_range(struct address_space *mapping, | |||
| 310 | 310 | ||
| 311 | return ret; | 311 | return ret; |
| 312 | } | 312 | } |
| 313 | |||
| 314 | /** | ||
| 315 | * filemap_fdatawait_range - wait for all under-writeback pages to complete in a given range | ||
| 316 | * @mapping: address space structure to wait for | ||
| 317 | * @start: offset in bytes where the range starts | ||
| 318 | * @end: offset in bytes where the range ends (inclusive) | ||
| 319 | * | ||
| 320 | * Walk the list of under-writeback pages of the given address space | ||
| 321 | * in the given range and wait for all of them. | ||
| 322 | * | ||
| 323 | * This is just a simple wrapper so that callers don't have to convert offsets | ||
| 324 | * to page indexes themselves | ||
| 325 | */ | ||
| 326 | int filemap_fdatawait_range(struct address_space *mapping, loff_t start, | ||
| 327 | loff_t end) | ||
| 328 | { | ||
| 329 | return wait_on_page_writeback_range(mapping, start >> PAGE_CACHE_SHIFT, | ||
| 330 | end >> PAGE_CACHE_SHIFT); | ||
| 331 | } | ||
| 332 | EXPORT_SYMBOL(filemap_fdatawait_range); | 313 | EXPORT_SYMBOL(filemap_fdatawait_range); |
| 333 | 314 | ||
| 334 | /** | 315 | /** |
| @@ -345,8 +326,7 @@ int filemap_fdatawait(struct address_space *mapping) | |||
| 345 | if (i_size == 0) | 326 | if (i_size == 0) |
| 346 | return 0; | 327 | return 0; |
| 347 | 328 | ||
| 348 | return wait_on_page_writeback_range(mapping, 0, | 329 | return filemap_fdatawait_range(mapping, 0, i_size - 1); |
| 349 | (i_size - 1) >> PAGE_CACHE_SHIFT); | ||
| 350 | } | 330 | } |
| 351 | EXPORT_SYMBOL(filemap_fdatawait); | 331 | EXPORT_SYMBOL(filemap_fdatawait); |
| 352 | 332 | ||
| @@ -393,9 +373,8 @@ int filemap_write_and_wait_range(struct address_space *mapping, | |||
| 393 | WB_SYNC_ALL); | 373 | WB_SYNC_ALL); |
| 394 | /* See comment of filemap_write_and_wait() */ | 374 | /* See comment of filemap_write_and_wait() */ |
| 395 | if (err != -EIO) { | 375 | if (err != -EIO) { |
| 396 | int err2 = wait_on_page_writeback_range(mapping, | 376 | int err2 = filemap_fdatawait_range(mapping, |
| 397 | lstart >> PAGE_CACHE_SHIFT, | 377 | lstart, lend); |
| 398 | lend >> PAGE_CACHE_SHIFT); | ||
| 399 | if (!err) | 378 | if (!err) |
| 400 | err = err2; | 379 | err = err2; |
| 401 | } | 380 | } |
| @@ -1844,7 +1823,7 @@ static size_t __iovec_copy_from_user_inatomic(char *vaddr, | |||
| 1844 | 1823 | ||
| 1845 | /* | 1824 | /* |
| 1846 | * Copy as much as we can into the page and return the number of bytes which | 1825 | * Copy as much as we can into the page and return the number of bytes which |
| 1847 | * were sucessfully copied. If a fault is encountered then return the number of | 1826 | * were successfully copied. If a fault is encountered then return the number of |
| 1848 | * bytes which were copied. | 1827 | * bytes which were copied. |
| 1849 | */ | 1828 | */ |
| 1850 | size_t iov_iter_copy_from_user_atomic(struct page *page, | 1829 | size_t iov_iter_copy_from_user_atomic(struct page *page, |
diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 8bf765c4f58d..13f33b3081ec 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c | |||
| @@ -1050,8 +1050,8 @@ static void scan_object(struct kmemleak_object *object) | |||
| 1050 | unsigned long flags; | 1050 | unsigned long flags; |
| 1051 | 1051 | ||
| 1052 | /* | 1052 | /* |
| 1053 | * Once the object->lock is aquired, the corresponding memory block | 1053 | * Once the object->lock is acquired, the corresponding memory block |
| 1054 | * cannot be freed (the same lock is aquired in delete_object). | 1054 | * cannot be freed (the same lock is acquired in delete_object). |
| 1055 | */ | 1055 | */ |
| 1056 | spin_lock_irqsave(&object->lock, flags); | 1056 | spin_lock_irqsave(&object->lock, flags); |
| 1057 | if (object->flags & OBJECT_NO_SCAN) | 1057 | if (object->flags & OBJECT_NO_SCAN) |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index f99f5991d6bb..c31a310aa146 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
| @@ -209,7 +209,7 @@ struct mem_cgroup { | |||
| 209 | int prev_priority; /* for recording reclaim priority */ | 209 | int prev_priority; /* for recording reclaim priority */ |
| 210 | 210 | ||
| 211 | /* | 211 | /* |
| 212 | * While reclaiming in a hiearchy, we cache the last child we | 212 | * While reclaiming in a hierarchy, we cache the last child we |
| 213 | * reclaimed from. | 213 | * reclaimed from. |
| 214 | */ | 214 | */ |
| 215 | int last_scanned_child; | 215 | int last_scanned_child; |
| @@ -1720,7 +1720,7 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, | |||
| 1720 | /* | 1720 | /* |
| 1721 | * While swap-in, try_charge -> commit or cancel, the page is locked. | 1721 | * While swap-in, try_charge -> commit or cancel, the page is locked. |
| 1722 | * And when try_charge() successfully returns, one refcnt to memcg without | 1722 | * And when try_charge() successfully returns, one refcnt to memcg without |
| 1723 | * struct page_cgroup is aquired. This refcnt will be cumsumed by | 1723 | * struct page_cgroup is acquired. This refcnt will be consumed by |
| 1724 | * "commit()" or removed by "cancel()" | 1724 | * "commit()" or removed by "cancel()" |
| 1725 | */ | 1725 | */ |
| 1726 | int mem_cgroup_try_charge_swapin(struct mm_struct *mm, | 1726 | int mem_cgroup_try_charge_swapin(struct mm_struct *mm, |
| @@ -2466,7 +2466,7 @@ static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft, | |||
| 2466 | 2466 | ||
| 2467 | cgroup_lock(); | 2467 | cgroup_lock(); |
| 2468 | /* | 2468 | /* |
| 2469 | * If parent's use_hiearchy is set, we can't make any modifications | 2469 | * If parent's use_hierarchy is set, we can't make any modifications |
| 2470 | * in the child subtrees. If it is unset, then the change can | 2470 | * in the child subtrees. If it is unset, then the change can |
| 2471 | * occur, provided the current cgroup has no children. | 2471 | * occur, provided the current cgroup has no children. |
| 2472 | * | 2472 | * |
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index dacc64183874..1ac49fef95ab 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c | |||
| @@ -174,7 +174,7 @@ static void kill_procs_ao(struct list_head *to_kill, int doit, int trapno, | |||
| 174 | list_for_each_entry_safe (tk, next, to_kill, nd) { | 174 | list_for_each_entry_safe (tk, next, to_kill, nd) { |
| 175 | if (doit) { | 175 | if (doit) { |
| 176 | /* | 176 | /* |
| 177 | * In case something went wrong with munmaping | 177 | * In case something went wrong with munmapping |
| 178 | * make sure the process doesn't catch the | 178 | * make sure the process doesn't catch the |
| 179 | * signal and then access the memory. Just kill it. | 179 | * signal and then access the memory. Just kill it. |
| 180 | * the signal handlers | 180 | * the signal handlers |
| @@ -931,13 +931,9 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, | |||
| 931 | if (!(flags & MAP_FIXED)) | 931 | if (!(flags & MAP_FIXED)) |
| 932 | addr = round_hint_to_min(addr); | 932 | addr = round_hint_to_min(addr); |
| 933 | 933 | ||
| 934 | error = arch_mmap_check(addr, len, flags); | ||
| 935 | if (error) | ||
| 936 | return error; | ||
| 937 | |||
| 938 | /* Careful about overflows.. */ | 934 | /* Careful about overflows.. */ |
| 939 | len = PAGE_ALIGN(len); | 935 | len = PAGE_ALIGN(len); |
| 940 | if (!len || len > TASK_SIZE) | 936 | if (!len) |
| 941 | return -ENOMEM; | 937 | return -ENOMEM; |
| 942 | 938 | ||
| 943 | /* offset overflow? */ | 939 | /* offset overflow? */ |
| @@ -948,24 +944,6 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, | |||
| 948 | if (mm->map_count > sysctl_max_map_count) | 944 | if (mm->map_count > sysctl_max_map_count) |
| 949 | return -ENOMEM; | 945 | return -ENOMEM; |
| 950 | 946 | ||
| 951 | if (flags & MAP_HUGETLB) { | ||
| 952 | struct user_struct *user = NULL; | ||
| 953 | if (file) | ||
| 954 | return -EINVAL; | ||
| 955 | |||
| 956 | /* | ||
| 957 | * VM_NORESERVE is used because the reservations will be | ||
| 958 | * taken when vm_ops->mmap() is called | ||
| 959 | * A dummy user value is used because we are not locking | ||
| 960 | * memory so no accounting is necessary | ||
| 961 | */ | ||
| 962 | len = ALIGN(len, huge_page_size(&default_hstate)); | ||
| 963 | file = hugetlb_file_setup(HUGETLB_ANON_FILE, len, VM_NORESERVE, | ||
| 964 | &user, HUGETLB_ANONHUGE_INODE); | ||
| 965 | if (IS_ERR(file)) | ||
| 966 | return PTR_ERR(file); | ||
| 967 | } | ||
| 968 | |||
| 969 | /* Obtain the address to map to. we verify (or select) it and ensure | 947 | /* Obtain the address to map to. we verify (or select) it and ensure |
| 970 | * that it represents a valid section of the address space. | 948 | * that it represents a valid section of the address space. |
| 971 | */ | 949 | */ |
| @@ -1455,6 +1433,14 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, | |||
| 1455 | unsigned long (*get_area)(struct file *, unsigned long, | 1433 | unsigned long (*get_area)(struct file *, unsigned long, |
| 1456 | unsigned long, unsigned long, unsigned long); | 1434 | unsigned long, unsigned long, unsigned long); |
| 1457 | 1435 | ||
| 1436 | unsigned long error = arch_mmap_check(addr, len, flags); | ||
| 1437 | if (error) | ||
| 1438 | return error; | ||
| 1439 | |||
| 1440 | /* Careful about overflows.. */ | ||
| 1441 | if (len > TASK_SIZE) | ||
| 1442 | return -ENOMEM; | ||
| 1443 | |||
| 1458 | get_area = current->mm->get_unmapped_area; | 1444 | get_area = current->mm->get_unmapped_area; |
| 1459 | if (file && file->f_op && file->f_op->get_unmapped_area) | 1445 | if (file && file->f_op && file->f_op->get_unmapped_area) |
| 1460 | get_area = file->f_op->get_unmapped_area; | 1446 | get_area = file->f_op->get_unmapped_area; |
| @@ -1999,20 +1985,14 @@ unsigned long do_brk(unsigned long addr, unsigned long len) | |||
| 1999 | if (!len) | 1985 | if (!len) |
| 2000 | return addr; | 1986 | return addr; |
| 2001 | 1987 | ||
| 2002 | if ((addr + len) > TASK_SIZE || (addr + len) < addr) | ||
| 2003 | return -EINVAL; | ||
| 2004 | |||
| 2005 | if (is_hugepage_only_range(mm, addr, len)) | ||
| 2006 | return -EINVAL; | ||
| 2007 | |||
| 2008 | error = security_file_mmap(NULL, 0, 0, 0, addr, 1); | 1988 | error = security_file_mmap(NULL, 0, 0, 0, addr, 1); |
| 2009 | if (error) | 1989 | if (error) |
| 2010 | return error; | 1990 | return error; |
| 2011 | 1991 | ||
| 2012 | flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags; | 1992 | flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags; |
| 2013 | 1993 | ||
| 2014 | error = arch_mmap_check(addr, len, flags); | 1994 | error = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED); |
| 2015 | if (error) | 1995 | if (error & ~PAGE_MASK) |
| 2016 | return error; | 1996 | return error; |
| 2017 | 1997 | ||
| 2018 | /* | 1998 | /* |
diff --git a/mm/mremap.c b/mm/mremap.c index 97bff2547719..845190898d59 100644 --- a/mm/mremap.c +++ b/mm/mremap.c | |||
| @@ -261,6 +261,137 @@ static unsigned long move_vma(struct vm_area_struct *vma, | |||
| 261 | return new_addr; | 261 | return new_addr; |
| 262 | } | 262 | } |
| 263 | 263 | ||
| 264 | static struct vm_area_struct *vma_to_resize(unsigned long addr, | ||
| 265 | unsigned long old_len, unsigned long new_len, unsigned long *p) | ||
| 266 | { | ||
| 267 | struct mm_struct *mm = current->mm; | ||
| 268 | struct vm_area_struct *vma = find_vma(mm, addr); | ||
| 269 | |||
| 270 | if (!vma || vma->vm_start > addr) | ||
| 271 | goto Efault; | ||
| 272 | |||
| 273 | if (is_vm_hugetlb_page(vma)) | ||
| 274 | goto Einval; | ||
| 275 | |||
| 276 | /* We can't remap across vm area boundaries */ | ||
| 277 | if (old_len > vma->vm_end - addr) | ||
| 278 | goto Efault; | ||
| 279 | |||
| 280 | if (vma->vm_flags & (VM_DONTEXPAND | VM_PFNMAP)) { | ||
| 281 | if (new_len > old_len) | ||
| 282 | goto Efault; | ||
| 283 | } | ||
| 284 | |||
| 285 | if (vma->vm_flags & VM_LOCKED) { | ||
| 286 | unsigned long locked, lock_limit; | ||
| 287 | locked = mm->locked_vm << PAGE_SHIFT; | ||
| 288 | lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; | ||
| 289 | locked += new_len - old_len; | ||
| 290 | if (locked > lock_limit && !capable(CAP_IPC_LOCK)) | ||
| 291 | goto Eagain; | ||
| 292 | } | ||
| 293 | |||
| 294 | if (!may_expand_vm(mm, (new_len - old_len) >> PAGE_SHIFT)) | ||
| 295 | goto Enomem; | ||
| 296 | |||
| 297 | if (vma->vm_flags & VM_ACCOUNT) { | ||
| 298 | unsigned long charged = (new_len - old_len) >> PAGE_SHIFT; | ||
| 299 | if (security_vm_enough_memory(charged)) | ||
| 300 | goto Efault; | ||
| 301 | *p = charged; | ||
| 302 | } | ||
| 303 | |||
| 304 | return vma; | ||
| 305 | |||
| 306 | Efault: /* very odd choice for most of the cases, but... */ | ||
| 307 | return ERR_PTR(-EFAULT); | ||
| 308 | Einval: | ||
| 309 | return ERR_PTR(-EINVAL); | ||
| 310 | Enomem: | ||
| 311 | return ERR_PTR(-ENOMEM); | ||
| 312 | Eagain: | ||
| 313 | return ERR_PTR(-EAGAIN); | ||
| 314 | } | ||
| 315 | |||
| 316 | static unsigned long mremap_to(unsigned long addr, | ||
| 317 | unsigned long old_len, unsigned long new_addr, | ||
| 318 | unsigned long new_len) | ||
| 319 | { | ||
| 320 | struct mm_struct *mm = current->mm; | ||
| 321 | struct vm_area_struct *vma; | ||
| 322 | unsigned long ret = -EINVAL; | ||
| 323 | unsigned long charged = 0; | ||
| 324 | unsigned long map_flags; | ||
| 325 | |||
| 326 | if (new_addr & ~PAGE_MASK) | ||
| 327 | goto out; | ||
| 328 | |||
| 329 | if (new_len > TASK_SIZE || new_addr > TASK_SIZE - new_len) | ||
| 330 | goto out; | ||
| 331 | |||
| 332 | /* Check if the location we're moving into overlaps the | ||
| 333 | * old location at all, and fail if it does. | ||
| 334 | */ | ||
| 335 | if ((new_addr <= addr) && (new_addr+new_len) > addr) | ||
| 336 | goto out; | ||
| 337 | |||
| 338 | if ((addr <= new_addr) && (addr+old_len) > new_addr) | ||
| 339 | goto out; | ||
| 340 | |||
| 341 | ret = security_file_mmap(NULL, 0, 0, 0, new_addr, 1); | ||
| 342 | if (ret) | ||
| 343 | goto out; | ||
| 344 | |||
| 345 | ret = do_munmap(mm, new_addr, new_len); | ||
| 346 | if (ret) | ||
| 347 | goto out; | ||
| 348 | |||
| 349 | if (old_len >= new_len) { | ||
| 350 | ret = do_munmap(mm, addr+new_len, old_len - new_len); | ||
| 351 | if (ret && old_len != new_len) | ||
| 352 | goto out; | ||
| 353 | old_len = new_len; | ||
| 354 | } | ||
| 355 | |||
| 356 | vma = vma_to_resize(addr, old_len, new_len, &charged); | ||
| 357 | if (IS_ERR(vma)) { | ||
| 358 | ret = PTR_ERR(vma); | ||
| 359 | goto out; | ||
| 360 | } | ||
| 361 | |||
| 362 | map_flags = MAP_FIXED; | ||
| 363 | if (vma->vm_flags & VM_MAYSHARE) | ||
| 364 | map_flags |= MAP_SHARED; | ||
| 365 | |||
| 366 | ret = get_unmapped_area(vma->vm_file, new_addr, new_len, vma->vm_pgoff + | ||
| 367 | ((addr - vma->vm_start) >> PAGE_SHIFT), | ||
| 368 | map_flags); | ||
| 369 | if (ret & ~PAGE_MASK) | ||
| 370 | goto out1; | ||
| 371 | |||
| 372 | ret = move_vma(vma, addr, old_len, new_len, new_addr); | ||
| 373 | if (!(ret & ~PAGE_MASK)) | ||
| 374 | goto out; | ||
| 375 | out1: | ||
| 376 | vm_unacct_memory(charged); | ||
| 377 | |||
| 378 | out: | ||
| 379 | return ret; | ||
| 380 | } | ||
| 381 | |||
| 382 | static int vma_expandable(struct vm_area_struct *vma, unsigned long delta) | ||
| 383 | { | ||
| 384 | unsigned long end = vma->vm_end + delta; | ||
| 385 | if (end < vma->vm_end) /* overflow */ | ||
| 386 | return 0; | ||
| 387 | if (vma->vm_next && vma->vm_next->vm_start < end) /* intersection */ | ||
| 388 | return 0; | ||
| 389 | if (get_unmapped_area(NULL, vma->vm_start, end - vma->vm_start, | ||
| 390 | 0, MAP_FIXED) & ~PAGE_MASK) | ||
| 391 | return 0; | ||
| 392 | return 1; | ||
| 393 | } | ||
| 394 | |||
| 264 | /* | 395 | /* |
| 265 | * Expand (or shrink) an existing mapping, potentially moving it at the | 396 | * Expand (or shrink) an existing mapping, potentially moving it at the |
| 266 | * same time (controlled by the MREMAP_MAYMOVE flag and available VM space) | 397 | * same time (controlled by the MREMAP_MAYMOVE flag and available VM space) |
| @@ -294,32 +425,10 @@ unsigned long do_mremap(unsigned long addr, | |||
| 294 | if (!new_len) | 425 | if (!new_len) |
| 295 | goto out; | 426 | goto out; |
| 296 | 427 | ||
| 297 | /* new_addr is only valid if MREMAP_FIXED is specified */ | ||
| 298 | if (flags & MREMAP_FIXED) { | 428 | if (flags & MREMAP_FIXED) { |
| 299 | if (new_addr & ~PAGE_MASK) | 429 | if (flags & MREMAP_MAYMOVE) |
| 300 | goto out; | 430 | ret = mremap_to(addr, old_len, new_addr, new_len); |
| 301 | if (!(flags & MREMAP_MAYMOVE)) | 431 | goto out; |
| 302 | goto out; | ||
| 303 | |||
| 304 | if (new_len > TASK_SIZE || new_addr > TASK_SIZE - new_len) | ||
| 305 | goto out; | ||
| 306 | |||
| 307 | /* Check if the location we're moving into overlaps the | ||
| 308 | * old location at all, and fail if it does. | ||
| 309 | */ | ||
| 310 | if ((new_addr <= addr) && (new_addr+new_len) > addr) | ||
| 311 | goto out; | ||
| 312 | |||
| 313 | if ((addr <= new_addr) && (addr+old_len) > new_addr) | ||
| 314 | goto out; | ||
| 315 | |||
| 316 | ret = security_file_mmap(NULL, 0, 0, 0, new_addr, 1); | ||
| 317 | if (ret) | ||
| 318 | goto out; | ||
| 319 | |||
| 320 | ret = do_munmap(mm, new_addr, new_len); | ||
| 321 | if (ret) | ||
| 322 | goto out; | ||
| 323 | } | 432 | } |
| 324 | 433 | ||
| 325 | /* | 434 | /* |
| @@ -332,60 +441,23 @@ unsigned long do_mremap(unsigned long addr, | |||
| 332 | if (ret && old_len != new_len) | 441 | if (ret && old_len != new_len) |
| 333 | goto out; | 442 | goto out; |
| 334 | ret = addr; | 443 | ret = addr; |
| 335 | if (!(flags & MREMAP_FIXED) || (new_addr == addr)) | 444 | goto out; |
| 336 | goto out; | ||
| 337 | old_len = new_len; | ||
| 338 | } | 445 | } |
| 339 | 446 | ||
| 340 | /* | 447 | /* |
| 341 | * Ok, we need to grow.. or relocate. | 448 | * Ok, we need to grow.. |
| 342 | */ | 449 | */ |
| 343 | ret = -EFAULT; | 450 | vma = vma_to_resize(addr, old_len, new_len, &charged); |
| 344 | vma = find_vma(mm, addr); | 451 | if (IS_ERR(vma)) { |
| 345 | if (!vma || vma->vm_start > addr) | 452 | ret = PTR_ERR(vma); |
| 346 | goto out; | ||
| 347 | if (is_vm_hugetlb_page(vma)) { | ||
| 348 | ret = -EINVAL; | ||
| 349 | goto out; | ||
| 350 | } | ||
| 351 | /* We can't remap across vm area boundaries */ | ||
| 352 | if (old_len > vma->vm_end - addr) | ||
| 353 | goto out; | ||
| 354 | if (vma->vm_flags & (VM_DONTEXPAND | VM_PFNMAP)) { | ||
| 355 | if (new_len > old_len) | ||
| 356 | goto out; | ||
| 357 | } | ||
| 358 | if (vma->vm_flags & VM_LOCKED) { | ||
| 359 | unsigned long locked, lock_limit; | ||
| 360 | locked = mm->locked_vm << PAGE_SHIFT; | ||
| 361 | lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur; | ||
| 362 | locked += new_len - old_len; | ||
| 363 | ret = -EAGAIN; | ||
| 364 | if (locked > lock_limit && !capable(CAP_IPC_LOCK)) | ||
| 365 | goto out; | ||
| 366 | } | ||
| 367 | if (!may_expand_vm(mm, (new_len - old_len) >> PAGE_SHIFT)) { | ||
| 368 | ret = -ENOMEM; | ||
| 369 | goto out; | 453 | goto out; |
| 370 | } | 454 | } |
| 371 | 455 | ||
| 372 | if (vma->vm_flags & VM_ACCOUNT) { | ||
| 373 | charged = (new_len - old_len) >> PAGE_SHIFT; | ||
| 374 | if (security_vm_enough_memory(charged)) | ||
| 375 | goto out_nc; | ||
| 376 | } | ||
| 377 | |||
| 378 | /* old_len exactly to the end of the area.. | 456 | /* old_len exactly to the end of the area.. |
| 379 | * And we're not relocating the area. | ||
| 380 | */ | 457 | */ |
| 381 | if (old_len == vma->vm_end - addr && | 458 | if (old_len == vma->vm_end - addr) { |
| 382 | !((flags & MREMAP_FIXED) && (addr != new_addr)) && | ||
| 383 | (old_len != new_len || !(flags & MREMAP_MAYMOVE))) { | ||
| 384 | unsigned long max_addr = TASK_SIZE; | ||
| 385 | if (vma->vm_next) | ||
| 386 | max_addr = vma->vm_next->vm_start; | ||
| 387 | /* can we just expand the current mapping? */ | 459 | /* can we just expand the current mapping? */ |
| 388 | if (max_addr - addr >= new_len) { | 460 | if (vma_expandable(vma, new_len - old_len)) { |
| 389 | int pages = (new_len - old_len) >> PAGE_SHIFT; | 461 | int pages = (new_len - old_len) >> PAGE_SHIFT; |
| 390 | 462 | ||
| 391 | vma_adjust(vma, vma->vm_start, | 463 | vma_adjust(vma, vma->vm_start, |
| @@ -409,28 +481,27 @@ unsigned long do_mremap(unsigned long addr, | |||
| 409 | */ | 481 | */ |
| 410 | ret = -ENOMEM; | 482 | ret = -ENOMEM; |
| 411 | if (flags & MREMAP_MAYMOVE) { | 483 | if (flags & MREMAP_MAYMOVE) { |
| 412 | if (!(flags & MREMAP_FIXED)) { | 484 | unsigned long map_flags = 0; |
| 413 | unsigned long map_flags = 0; | 485 | if (vma->vm_flags & VM_MAYSHARE) |
| 414 | if (vma->vm_flags & VM_MAYSHARE) | 486 | map_flags |= MAP_SHARED; |
| 415 | map_flags |= MAP_SHARED; | 487 | |
| 416 | 488 | new_addr = get_unmapped_area(vma->vm_file, 0, new_len, | |
| 417 | new_addr = get_unmapped_area(vma->vm_file, 0, new_len, | 489 | vma->vm_pgoff + |
| 418 | vma->vm_pgoff, map_flags); | 490 | ((addr - vma->vm_start) >> PAGE_SHIFT), |
| 419 | if (new_addr & ~PAGE_MASK) { | 491 | map_flags); |
| 420 | ret = new_addr; | 492 | if (new_addr & ~PAGE_MASK) { |
| 421 | goto out; | 493 | ret = new_addr; |
| 422 | } | 494 | goto out; |
| 423 | |||
| 424 | ret = security_file_mmap(NULL, 0, 0, 0, new_addr, 1); | ||
| 425 | if (ret) | ||
| 426 | goto out; | ||
| 427 | } | 495 | } |
| 496 | |||
| 497 | ret = security_file_mmap(NULL, 0, 0, 0, new_addr, 1); | ||
| 498 | if (ret) | ||
| 499 | goto out; | ||
| 428 | ret = move_vma(vma, addr, old_len, new_len, new_addr); | 500 | ret = move_vma(vma, addr, old_len, new_len, new_addr); |
| 429 | } | 501 | } |
| 430 | out: | 502 | out: |
| 431 | if (ret & ~PAGE_MASK) | 503 | if (ret & ~PAGE_MASK) |
| 432 | vm_unacct_memory(charged); | 504 | vm_unacct_memory(charged); |
| 433 | out_nc: | ||
| 434 | return ret; | 505 | return ret; |
| 435 | } | 506 | } |
| 436 | 507 | ||
| @@ -604,6 +604,26 @@ static struct kmem_cache cache_cache = { | |||
| 604 | 604 | ||
| 605 | #define BAD_ALIEN_MAGIC 0x01020304ul | 605 | #define BAD_ALIEN_MAGIC 0x01020304ul |
| 606 | 606 | ||
| 607 | /* | ||
| 608 | * chicken and egg problem: delay the per-cpu array allocation | ||
| 609 | * until the general caches are up. | ||
| 610 | */ | ||
| 611 | static enum { | ||
| 612 | NONE, | ||
| 613 | PARTIAL_AC, | ||
| 614 | PARTIAL_L3, | ||
| 615 | EARLY, | ||
| 616 | FULL | ||
| 617 | } g_cpucache_up; | ||
| 618 | |||
| 619 | /* | ||
| 620 | * used by boot code to determine if it can use slab based allocator | ||
| 621 | */ | ||
| 622 | int slab_is_available(void) | ||
| 623 | { | ||
| 624 | return g_cpucache_up >= EARLY; | ||
| 625 | } | ||
| 626 | |||
| 607 | #ifdef CONFIG_LOCKDEP | 627 | #ifdef CONFIG_LOCKDEP |
| 608 | 628 | ||
| 609 | /* | 629 | /* |
| @@ -620,40 +640,52 @@ static struct kmem_cache cache_cache = { | |||
| 620 | static struct lock_class_key on_slab_l3_key; | 640 | static struct lock_class_key on_slab_l3_key; |
| 621 | static struct lock_class_key on_slab_alc_key; | 641 | static struct lock_class_key on_slab_alc_key; |
| 622 | 642 | ||
| 623 | static inline void init_lock_keys(void) | 643 | static void init_node_lock_keys(int q) |
| 624 | |||
| 625 | { | 644 | { |
| 626 | int q; | ||
| 627 | struct cache_sizes *s = malloc_sizes; | 645 | struct cache_sizes *s = malloc_sizes; |
| 628 | 646 | ||
| 629 | while (s->cs_size != ULONG_MAX) { | 647 | if (g_cpucache_up != FULL) |
| 630 | for_each_node(q) { | 648 | return; |
| 631 | struct array_cache **alc; | 649 | |
| 632 | int r; | 650 | for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) { |
| 633 | struct kmem_list3 *l3 = s->cs_cachep->nodelists[q]; | 651 | struct array_cache **alc; |
| 634 | if (!l3 || OFF_SLAB(s->cs_cachep)) | 652 | struct kmem_list3 *l3; |
| 635 | continue; | 653 | int r; |
| 636 | lockdep_set_class(&l3->list_lock, &on_slab_l3_key); | 654 | |
| 637 | alc = l3->alien; | 655 | l3 = s->cs_cachep->nodelists[q]; |
| 638 | /* | 656 | if (!l3 || OFF_SLAB(s->cs_cachep)) |
| 639 | * FIXME: This check for BAD_ALIEN_MAGIC | 657 | return; |
| 640 | * should go away when common slab code is taught to | 658 | lockdep_set_class(&l3->list_lock, &on_slab_l3_key); |
| 641 | * work even without alien caches. | 659 | alc = l3->alien; |
| 642 | * Currently, non NUMA code returns BAD_ALIEN_MAGIC | 660 | /* |
| 643 | * for alloc_alien_cache, | 661 | * FIXME: This check for BAD_ALIEN_MAGIC |
| 644 | */ | 662 | * should go away when common slab code is taught to |
| 645 | if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC) | 663 | * work even without alien caches. |
| 646 | continue; | 664 | * Currently, non NUMA code returns BAD_ALIEN_MAGIC |
| 647 | for_each_node(r) { | 665 | * for alloc_alien_cache, |
| 648 | if (alc[r]) | 666 | */ |
| 649 | lockdep_set_class(&alc[r]->lock, | 667 | if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC) |
| 650 | &on_slab_alc_key); | 668 | return; |
| 651 | } | 669 | for_each_node(r) { |
| 670 | if (alc[r]) | ||
| 671 | lockdep_set_class(&alc[r]->lock, | ||
| 672 | &on_slab_alc_key); | ||
| 652 | } | 673 | } |
| 653 | s++; | ||
| 654 | } | 674 | } |
| 655 | } | 675 | } |
| 676 | |||
| 677 | static inline void init_lock_keys(void) | ||
| 678 | { | ||
| 679 | int node; | ||
| 680 | |||
| 681 | for_each_node(node) | ||
| 682 | init_node_lock_keys(node); | ||
| 683 | } | ||
| 656 | #else | 684 | #else |
| 685 | static void init_node_lock_keys(int q) | ||
| 686 | { | ||
| 687 | } | ||
| 688 | |||
| 657 | static inline void init_lock_keys(void) | 689 | static inline void init_lock_keys(void) |
| 658 | { | 690 | { |
| 659 | } | 691 | } |
| @@ -665,26 +697,6 @@ static inline void init_lock_keys(void) | |||
| 665 | static DEFINE_MUTEX(cache_chain_mutex); | 697 | static DEFINE_MUTEX(cache_chain_mutex); |
| 666 | static struct list_head cache_chain; | 698 | static struct list_head cache_chain; |
| 667 | 699 | ||
| 668 | /* | ||
| 669 | * chicken and egg problem: delay the per-cpu array allocation | ||
| 670 | * until the general caches are up. | ||
| 671 | */ | ||
| 672 | static enum { | ||
| 673 | NONE, | ||
| 674 | PARTIAL_AC, | ||
| 675 | PARTIAL_L3, | ||
| 676 | EARLY, | ||
| 677 | FULL | ||
| 678 | } g_cpucache_up; | ||
| 679 | |||
| 680 | /* | ||
| 681 | * used by boot code to determine if it can use slab based allocator | ||
| 682 | */ | ||
| 683 | int slab_is_available(void) | ||
| 684 | { | ||
| 685 | return g_cpucache_up >= EARLY; | ||
| 686 | } | ||
| 687 | |||
| 688 | static DEFINE_PER_CPU(struct delayed_work, reap_work); | 700 | static DEFINE_PER_CPU(struct delayed_work, reap_work); |
| 689 | 701 | ||
| 690 | static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) | 702 | static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) |
| @@ -1254,6 +1266,8 @@ static int __cpuinit cpuup_prepare(long cpu) | |||
| 1254 | kfree(shared); | 1266 | kfree(shared); |
| 1255 | free_alien_cache(alien); | 1267 | free_alien_cache(alien); |
| 1256 | } | 1268 | } |
| 1269 | init_node_lock_keys(node); | ||
| 1270 | |||
| 1257 | return 0; | 1271 | return 0; |
| 1258 | bad: | 1272 | bad: |
| 1259 | cpuup_canceled(cpu); | 1273 | cpuup_canceled(cpu); |
| @@ -3103,13 +3117,19 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags) | |||
| 3103 | } else { | 3117 | } else { |
| 3104 | STATS_INC_ALLOCMISS(cachep); | 3118 | STATS_INC_ALLOCMISS(cachep); |
| 3105 | objp = cache_alloc_refill(cachep, flags); | 3119 | objp = cache_alloc_refill(cachep, flags); |
| 3120 | /* | ||
| 3121 | * the 'ac' may be updated by cache_alloc_refill(), | ||
| 3122 | * and kmemleak_erase() requires its correct value. | ||
| 3123 | */ | ||
| 3124 | ac = cpu_cache_get(cachep); | ||
| 3106 | } | 3125 | } |
| 3107 | /* | 3126 | /* |
| 3108 | * To avoid a false negative, if an object that is in one of the | 3127 | * To avoid a false negative, if an object that is in one of the |
| 3109 | * per-CPU caches is leaked, we need to make sure kmemleak doesn't | 3128 | * per-CPU caches is leaked, we need to make sure kmemleak doesn't |
| 3110 | * treat the array pointers as a reference to the object. | 3129 | * treat the array pointers as a reference to the object. |
| 3111 | */ | 3130 | */ |
| 3112 | kmemleak_erase(&ac->entry[ac->avail]); | 3131 | if (objp) |
| 3132 | kmemleak_erase(&ac->entry[ac->avail]); | ||
| 3113 | return objp; | 3133 | return objp; |
| 3114 | } | 3134 | } |
| 3115 | 3135 | ||
| @@ -3306,7 +3326,7 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, | |||
| 3306 | cache_alloc_debugcheck_before(cachep, flags); | 3326 | cache_alloc_debugcheck_before(cachep, flags); |
| 3307 | local_irq_save(save_flags); | 3327 | local_irq_save(save_flags); |
| 3308 | 3328 | ||
| 3309 | if (unlikely(nodeid == -1)) | 3329 | if (nodeid == -1) |
| 3310 | nodeid = numa_node_id(); | 3330 | nodeid = numa_node_id(); |
| 3311 | 3331 | ||
| 3312 | if (unlikely(!cachep->nodelists[nodeid])) { | 3332 | if (unlikely(!cachep->nodelists[nodeid])) { |
| @@ -1735,7 +1735,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s, | |||
| 1735 | } | 1735 | } |
| 1736 | local_irq_restore(flags); | 1736 | local_irq_restore(flags); |
| 1737 | 1737 | ||
| 1738 | if (unlikely((gfpflags & __GFP_ZERO) && object)) | 1738 | if (unlikely(gfpflags & __GFP_ZERO) && object) |
| 1739 | memset(object, 0, objsize); | 1739 | memset(object, 0, objsize); |
| 1740 | 1740 | ||
| 1741 | kmemcheck_slab_alloc(s, gfpflags, object, c->objsize); | 1741 | kmemcheck_slab_alloc(s, gfpflags, object, c->objsize); |
| @@ -4371,12 +4371,28 @@ static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si) | |||
| 4371 | return len + sprintf(buf + len, "\n"); | 4371 | return len + sprintf(buf + len, "\n"); |
| 4372 | } | 4372 | } |
| 4373 | 4373 | ||
| 4374 | static void clear_stat(struct kmem_cache *s, enum stat_item si) | ||
| 4375 | { | ||
| 4376 | int cpu; | ||
| 4377 | |||
| 4378 | for_each_online_cpu(cpu) | ||
| 4379 | get_cpu_slab(s, cpu)->stat[si] = 0; | ||
| 4380 | } | ||
| 4381 | |||
| 4374 | #define STAT_ATTR(si, text) \ | 4382 | #define STAT_ATTR(si, text) \ |
| 4375 | static ssize_t text##_show(struct kmem_cache *s, char *buf) \ | 4383 | static ssize_t text##_show(struct kmem_cache *s, char *buf) \ |
| 4376 | { \ | 4384 | { \ |
| 4377 | return show_stat(s, buf, si); \ | 4385 | return show_stat(s, buf, si); \ |
| 4378 | } \ | 4386 | } \ |
| 4379 | SLAB_ATTR_RO(text); \ | 4387 | static ssize_t text##_store(struct kmem_cache *s, \ |
| 4388 | const char *buf, size_t length) \ | ||
| 4389 | { \ | ||
| 4390 | if (buf[0] != '0') \ | ||
| 4391 | return -EINVAL; \ | ||
| 4392 | clear_stat(s, si); \ | ||
| 4393 | return length; \ | ||
| 4394 | } \ | ||
| 4395 | SLAB_ATTR(text); \ | ||
| 4380 | 4396 | ||
| 4381 | STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath); | 4397 | STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath); |
| 4382 | STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath); | 4398 | STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath); |
diff --git a/mm/truncate.c b/mm/truncate.c index 450cebdabfc0..2c147a7e5f2c 100644 --- a/mm/truncate.c +++ b/mm/truncate.c | |||
| @@ -490,7 +490,7 @@ EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range); | |||
| 490 | * Any pages which are found to be mapped into pagetables are unmapped prior to | 490 | * Any pages which are found to be mapped into pagetables are unmapped prior to |
| 491 | * invalidation. | 491 | * invalidation. |
| 492 | * | 492 | * |
| 493 | * Returns -EIO if any pages could not be invalidated. | 493 | * Returns -EBUSY if any pages could not be invalidated. |
| 494 | */ | 494 | */ |
| 495 | int invalidate_inode_pages2(struct address_space *mapping) | 495 | int invalidate_inode_pages2(struct address_space *mapping) |
| 496 | { | 496 | { |
| @@ -4,6 +4,10 @@ | |||
| 4 | #include <linux/module.h> | 4 | #include <linux/module.h> |
| 5 | #include <linux/err.h> | 5 | #include <linux/err.h> |
| 6 | #include <linux/sched.h> | 6 | #include <linux/sched.h> |
| 7 | #include <linux/hugetlb.h> | ||
| 8 | #include <linux/syscalls.h> | ||
| 9 | #include <linux/mman.h> | ||
| 10 | #include <linux/file.h> | ||
| 7 | #include <asm/uaccess.h> | 11 | #include <asm/uaccess.h> |
| 8 | 12 | ||
| 9 | #define CREATE_TRACE_POINTS | 13 | #define CREATE_TRACE_POINTS |
| @@ -268,6 +272,46 @@ int __attribute__((weak)) get_user_pages_fast(unsigned long start, | |||
| 268 | } | 272 | } |
| 269 | EXPORT_SYMBOL_GPL(get_user_pages_fast); | 273 | EXPORT_SYMBOL_GPL(get_user_pages_fast); |
| 270 | 274 | ||
| 275 | SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, | ||
| 276 | unsigned long, prot, unsigned long, flags, | ||
| 277 | unsigned long, fd, unsigned long, pgoff) | ||
| 278 | { | ||
| 279 | struct file * file = NULL; | ||
| 280 | unsigned long retval = -EBADF; | ||
| 281 | |||
| 282 | if (!(flags & MAP_ANONYMOUS)) { | ||
| 283 | if (unlikely(flags & MAP_HUGETLB)) | ||
| 284 | return -EINVAL; | ||
| 285 | file = fget(fd); | ||
| 286 | if (!file) | ||
| 287 | goto out; | ||
| 288 | } else if (flags & MAP_HUGETLB) { | ||
| 289 | struct user_struct *user = NULL; | ||
| 290 | /* | ||
| 291 | * VM_NORESERVE is used because the reservations will be | ||
| 292 | * taken when vm_ops->mmap() is called | ||
| 293 | * A dummy user value is used because we are not locking | ||
| 294 | * memory so no accounting is necessary | ||
| 295 | */ | ||
| 296 | len = ALIGN(len, huge_page_size(&default_hstate)); | ||
| 297 | file = hugetlb_file_setup(HUGETLB_ANON_FILE, len, VM_NORESERVE, | ||
| 298 | &user, HUGETLB_ANONHUGE_INODE); | ||
| 299 | if (IS_ERR(file)) | ||
| 300 | return PTR_ERR(file); | ||
| 301 | } | ||
| 302 | |||
| 303 | flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE); | ||
| 304 | |||
| 305 | down_write(¤t->mm->mmap_sem); | ||
| 306 | retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); | ||
| 307 | up_write(¤t->mm->mmap_sem); | ||
| 308 | |||
| 309 | if (file) | ||
| 310 | fput(file); | ||
| 311 | out: | ||
| 312 | return retval; | ||
| 313 | } | ||
| 314 | |||
| 271 | /* Tracepoints definitions. */ | 315 | /* Tracepoints definitions. */ |
| 272 | EXPORT_TRACEPOINT_SYMBOL(kmalloc); | 316 | EXPORT_TRACEPOINT_SYMBOL(kmalloc); |
| 273 | EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc); | 317 | EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc); |
