aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/filemap.c51
-rw-r--r--mm/kmemleak.c4
-rw-r--r--mm/memcontrol.c6
-rw-r--r--mm/memory-failure.c2
-rw-r--r--mm/mmap.c42
-rw-r--r--mm/mremap.c241
-rw-r--r--mm/slab.c118
-rw-r--r--mm/slub.c20
-rw-r--r--mm/truncate.c2
-rw-r--r--mm/util.c44
10 files changed, 320 insertions, 210 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index ef169f37156d..8b4d88f9249e 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -260,27 +260,27 @@ int filemap_flush(struct address_space *mapping)
260EXPORT_SYMBOL(filemap_flush); 260EXPORT_SYMBOL(filemap_flush);
261 261
262/** 262/**
263 * wait_on_page_writeback_range - wait for writeback to complete 263 * filemap_fdatawait_range - wait for writeback to complete
264 * @mapping: target address_space 264 * @mapping: address space structure to wait for
265 * @start: beginning page index 265 * @start_byte: offset in bytes where the range starts
266 * @end: ending page index 266 * @end_byte: offset in bytes where the range ends (inclusive)
267 * 267 *
268 * Wait for writeback to complete against pages indexed by start->end 268 * Walk the list of under-writeback pages of the given address space
269 * inclusive 269 * in the given range and wait for all of them.
270 */ 270 */
271int wait_on_page_writeback_range(struct address_space *mapping, 271int filemap_fdatawait_range(struct address_space *mapping, loff_t start_byte,
272 pgoff_t start, pgoff_t end) 272 loff_t end_byte)
273{ 273{
274 pgoff_t index = start_byte >> PAGE_CACHE_SHIFT;
275 pgoff_t end = end_byte >> PAGE_CACHE_SHIFT;
274 struct pagevec pvec; 276 struct pagevec pvec;
275 int nr_pages; 277 int nr_pages;
276 int ret = 0; 278 int ret = 0;
277 pgoff_t index;
278 279
279 if (end < start) 280 if (end_byte < start_byte)
280 return 0; 281 return 0;
281 282
282 pagevec_init(&pvec, 0); 283 pagevec_init(&pvec, 0);
283 index = start;
284 while ((index <= end) && 284 while ((index <= end) &&
285 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, 285 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
286 PAGECACHE_TAG_WRITEBACK, 286 PAGECACHE_TAG_WRITEBACK,
@@ -310,25 +310,6 @@ int wait_on_page_writeback_range(struct address_space *mapping,
310 310
311 return ret; 311 return ret;
312} 312}
313
314/**
315 * filemap_fdatawait_range - wait for all under-writeback pages to complete in a given range
316 * @mapping: address space structure to wait for
317 * @start: offset in bytes where the range starts
318 * @end: offset in bytes where the range ends (inclusive)
319 *
320 * Walk the list of under-writeback pages of the given address space
321 * in the given range and wait for all of them.
322 *
323 * This is just a simple wrapper so that callers don't have to convert offsets
324 * to page indexes themselves
325 */
326int filemap_fdatawait_range(struct address_space *mapping, loff_t start,
327 loff_t end)
328{
329 return wait_on_page_writeback_range(mapping, start >> PAGE_CACHE_SHIFT,
330 end >> PAGE_CACHE_SHIFT);
331}
332EXPORT_SYMBOL(filemap_fdatawait_range); 313EXPORT_SYMBOL(filemap_fdatawait_range);
333 314
334/** 315/**
@@ -345,8 +326,7 @@ int filemap_fdatawait(struct address_space *mapping)
345 if (i_size == 0) 326 if (i_size == 0)
346 return 0; 327 return 0;
347 328
348 return wait_on_page_writeback_range(mapping, 0, 329 return filemap_fdatawait_range(mapping, 0, i_size - 1);
349 (i_size - 1) >> PAGE_CACHE_SHIFT);
350} 330}
351EXPORT_SYMBOL(filemap_fdatawait); 331EXPORT_SYMBOL(filemap_fdatawait);
352 332
@@ -393,9 +373,8 @@ int filemap_write_and_wait_range(struct address_space *mapping,
393 WB_SYNC_ALL); 373 WB_SYNC_ALL);
394 /* See comment of filemap_write_and_wait() */ 374 /* See comment of filemap_write_and_wait() */
395 if (err != -EIO) { 375 if (err != -EIO) {
396 int err2 = wait_on_page_writeback_range(mapping, 376 int err2 = filemap_fdatawait_range(mapping,
397 lstart >> PAGE_CACHE_SHIFT, 377 lstart, lend);
398 lend >> PAGE_CACHE_SHIFT);
399 if (!err) 378 if (!err)
400 err = err2; 379 err = err2;
401 } 380 }
@@ -1844,7 +1823,7 @@ static size_t __iovec_copy_from_user_inatomic(char *vaddr,
1844 1823
1845/* 1824/*
1846 * Copy as much as we can into the page and return the number of bytes which 1825 * Copy as much as we can into the page and return the number of bytes which
1847 * were sucessfully copied. If a fault is encountered then return the number of 1826 * were successfully copied. If a fault is encountered then return the number of
1848 * bytes which were copied. 1827 * bytes which were copied.
1849 */ 1828 */
1850size_t iov_iter_copy_from_user_atomic(struct page *page, 1829size_t iov_iter_copy_from_user_atomic(struct page *page,
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 8bf765c4f58d..13f33b3081ec 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -1050,8 +1050,8 @@ static void scan_object(struct kmemleak_object *object)
1050 unsigned long flags; 1050 unsigned long flags;
1051 1051
1052 /* 1052 /*
1053 * Once the object->lock is aquired, the corresponding memory block 1053 * Once the object->lock is acquired, the corresponding memory block
1054 * cannot be freed (the same lock is aquired in delete_object). 1054 * cannot be freed (the same lock is acquired in delete_object).
1055 */ 1055 */
1056 spin_lock_irqsave(&object->lock, flags); 1056 spin_lock_irqsave(&object->lock, flags);
1057 if (object->flags & OBJECT_NO_SCAN) 1057 if (object->flags & OBJECT_NO_SCAN)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f99f5991d6bb..c31a310aa146 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -209,7 +209,7 @@ struct mem_cgroup {
209 int prev_priority; /* for recording reclaim priority */ 209 int prev_priority; /* for recording reclaim priority */
210 210
211 /* 211 /*
212 * While reclaiming in a hiearchy, we cache the last child we 212 * While reclaiming in a hierarchy, we cache the last child we
213 * reclaimed from. 213 * reclaimed from.
214 */ 214 */
215 int last_scanned_child; 215 int last_scanned_child;
@@ -1720,7 +1720,7 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
1720/* 1720/*
1721 * While swap-in, try_charge -> commit or cancel, the page is locked. 1721 * While swap-in, try_charge -> commit or cancel, the page is locked.
1722 * And when try_charge() successfully returns, one refcnt to memcg without 1722 * And when try_charge() successfully returns, one refcnt to memcg without
1723 * struct page_cgroup is aquired. This refcnt will be cumsumed by 1723 * struct page_cgroup is acquired. This refcnt will be consumed by
1724 * "commit()" or removed by "cancel()" 1724 * "commit()" or removed by "cancel()"
1725 */ 1725 */
1726int mem_cgroup_try_charge_swapin(struct mm_struct *mm, 1726int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
@@ -2466,7 +2466,7 @@ static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft,
2466 2466
2467 cgroup_lock(); 2467 cgroup_lock();
2468 /* 2468 /*
2469 * If parent's use_hiearchy is set, we can't make any modifications 2469 * If parent's use_hierarchy is set, we can't make any modifications
2470 * in the child subtrees. If it is unset, then the change can 2470 * in the child subtrees. If it is unset, then the change can
2471 * occur, provided the current cgroup has no children. 2471 * occur, provided the current cgroup has no children.
2472 * 2472 *
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index dacc64183874..1ac49fef95ab 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -174,7 +174,7 @@ static void kill_procs_ao(struct list_head *to_kill, int doit, int trapno,
174 list_for_each_entry_safe (tk, next, to_kill, nd) { 174 list_for_each_entry_safe (tk, next, to_kill, nd) {
175 if (doit) { 175 if (doit) {
176 /* 176 /*
177 * In case something went wrong with munmaping 177 * In case something went wrong with munmapping
178 * make sure the process doesn't catch the 178 * make sure the process doesn't catch the
179 * signal and then access the memory. Just kill it. 179 * signal and then access the memory. Just kill it.
180 * the signal handlers 180 * the signal handlers
diff --git a/mm/mmap.c b/mm/mmap.c
index 292ddc3cef9c..ed70a68e882a 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -931,13 +931,9 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
931 if (!(flags & MAP_FIXED)) 931 if (!(flags & MAP_FIXED))
932 addr = round_hint_to_min(addr); 932 addr = round_hint_to_min(addr);
933 933
934 error = arch_mmap_check(addr, len, flags);
935 if (error)
936 return error;
937
938 /* Careful about overflows.. */ 934 /* Careful about overflows.. */
939 len = PAGE_ALIGN(len); 935 len = PAGE_ALIGN(len);
940 if (!len || len > TASK_SIZE) 936 if (!len)
941 return -ENOMEM; 937 return -ENOMEM;
942 938
943 /* offset overflow? */ 939 /* offset overflow? */
@@ -948,24 +944,6 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
948 if (mm->map_count > sysctl_max_map_count) 944 if (mm->map_count > sysctl_max_map_count)
949 return -ENOMEM; 945 return -ENOMEM;
950 946
951 if (flags & MAP_HUGETLB) {
952 struct user_struct *user = NULL;
953 if (file)
954 return -EINVAL;
955
956 /*
957 * VM_NORESERVE is used because the reservations will be
958 * taken when vm_ops->mmap() is called
959 * A dummy user value is used because we are not locking
960 * memory so no accounting is necessary
961 */
962 len = ALIGN(len, huge_page_size(&default_hstate));
963 file = hugetlb_file_setup(HUGETLB_ANON_FILE, len, VM_NORESERVE,
964 &user, HUGETLB_ANONHUGE_INODE);
965 if (IS_ERR(file))
966 return PTR_ERR(file);
967 }
968
969 /* Obtain the address to map to. we verify (or select) it and ensure 947 /* Obtain the address to map to. we verify (or select) it and ensure
970 * that it represents a valid section of the address space. 948 * that it represents a valid section of the address space.
971 */ 949 */
@@ -1455,6 +1433,14 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
1455 unsigned long (*get_area)(struct file *, unsigned long, 1433 unsigned long (*get_area)(struct file *, unsigned long,
1456 unsigned long, unsigned long, unsigned long); 1434 unsigned long, unsigned long, unsigned long);
1457 1435
1436 unsigned long error = arch_mmap_check(addr, len, flags);
1437 if (error)
1438 return error;
1439
1440 /* Careful about overflows.. */
1441 if (len > TASK_SIZE)
1442 return -ENOMEM;
1443
1458 get_area = current->mm->get_unmapped_area; 1444 get_area = current->mm->get_unmapped_area;
1459 if (file && file->f_op && file->f_op->get_unmapped_area) 1445 if (file && file->f_op && file->f_op->get_unmapped_area)
1460 get_area = file->f_op->get_unmapped_area; 1446 get_area = file->f_op->get_unmapped_area;
@@ -1999,20 +1985,14 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
1999 if (!len) 1985 if (!len)
2000 return addr; 1986 return addr;
2001 1987
2002 if ((addr + len) > TASK_SIZE || (addr + len) < addr)
2003 return -EINVAL;
2004
2005 if (is_hugepage_only_range(mm, addr, len))
2006 return -EINVAL;
2007
2008 error = security_file_mmap(NULL, 0, 0, 0, addr, 1); 1988 error = security_file_mmap(NULL, 0, 0, 0, addr, 1);
2009 if (error) 1989 if (error)
2010 return error; 1990 return error;
2011 1991
2012 flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags; 1992 flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
2013 1993
2014 error = arch_mmap_check(addr, len, flags); 1994 error = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED);
2015 if (error) 1995 if (error & ~PAGE_MASK)
2016 return error; 1996 return error;
2017 1997
2018 /* 1998 /*
diff --git a/mm/mremap.c b/mm/mremap.c
index 97bff2547719..845190898d59 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -261,6 +261,137 @@ static unsigned long move_vma(struct vm_area_struct *vma,
261 return new_addr; 261 return new_addr;
262} 262}
263 263
264static struct vm_area_struct *vma_to_resize(unsigned long addr,
265 unsigned long old_len, unsigned long new_len, unsigned long *p)
266{
267 struct mm_struct *mm = current->mm;
268 struct vm_area_struct *vma = find_vma(mm, addr);
269
270 if (!vma || vma->vm_start > addr)
271 goto Efault;
272
273 if (is_vm_hugetlb_page(vma))
274 goto Einval;
275
276 /* We can't remap across vm area boundaries */
277 if (old_len > vma->vm_end - addr)
278 goto Efault;
279
280 if (vma->vm_flags & (VM_DONTEXPAND | VM_PFNMAP)) {
281 if (new_len > old_len)
282 goto Efault;
283 }
284
285 if (vma->vm_flags & VM_LOCKED) {
286 unsigned long locked, lock_limit;
287 locked = mm->locked_vm << PAGE_SHIFT;
288 lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
289 locked += new_len - old_len;
290 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
291 goto Eagain;
292 }
293
294 if (!may_expand_vm(mm, (new_len - old_len) >> PAGE_SHIFT))
295 goto Enomem;
296
297 if (vma->vm_flags & VM_ACCOUNT) {
298 unsigned long charged = (new_len - old_len) >> PAGE_SHIFT;
299 if (security_vm_enough_memory(charged))
300 goto Efault;
301 *p = charged;
302 }
303
304 return vma;
305
306Efault: /* very odd choice for most of the cases, but... */
307 return ERR_PTR(-EFAULT);
308Einval:
309 return ERR_PTR(-EINVAL);
310Enomem:
311 return ERR_PTR(-ENOMEM);
312Eagain:
313 return ERR_PTR(-EAGAIN);
314}
315
316static unsigned long mremap_to(unsigned long addr,
317 unsigned long old_len, unsigned long new_addr,
318 unsigned long new_len)
319{
320 struct mm_struct *mm = current->mm;
321 struct vm_area_struct *vma;
322 unsigned long ret = -EINVAL;
323 unsigned long charged = 0;
324 unsigned long map_flags;
325
326 if (new_addr & ~PAGE_MASK)
327 goto out;
328
329 if (new_len > TASK_SIZE || new_addr > TASK_SIZE - new_len)
330 goto out;
331
332 /* Check if the location we're moving into overlaps the
333 * old location at all, and fail if it does.
334 */
335 if ((new_addr <= addr) && (new_addr+new_len) > addr)
336 goto out;
337
338 if ((addr <= new_addr) && (addr+old_len) > new_addr)
339 goto out;
340
341 ret = security_file_mmap(NULL, 0, 0, 0, new_addr, 1);
342 if (ret)
343 goto out;
344
345 ret = do_munmap(mm, new_addr, new_len);
346 if (ret)
347 goto out;
348
349 if (old_len >= new_len) {
350 ret = do_munmap(mm, addr+new_len, old_len - new_len);
351 if (ret && old_len != new_len)
352 goto out;
353 old_len = new_len;
354 }
355
356 vma = vma_to_resize(addr, old_len, new_len, &charged);
357 if (IS_ERR(vma)) {
358 ret = PTR_ERR(vma);
359 goto out;
360 }
361
362 map_flags = MAP_FIXED;
363 if (vma->vm_flags & VM_MAYSHARE)
364 map_flags |= MAP_SHARED;
365
366 ret = get_unmapped_area(vma->vm_file, new_addr, new_len, vma->vm_pgoff +
367 ((addr - vma->vm_start) >> PAGE_SHIFT),
368 map_flags);
369 if (ret & ~PAGE_MASK)
370 goto out1;
371
372 ret = move_vma(vma, addr, old_len, new_len, new_addr);
373 if (!(ret & ~PAGE_MASK))
374 goto out;
375out1:
376 vm_unacct_memory(charged);
377
378out:
379 return ret;
380}
381
382static int vma_expandable(struct vm_area_struct *vma, unsigned long delta)
383{
384 unsigned long end = vma->vm_end + delta;
385 if (end < vma->vm_end) /* overflow */
386 return 0;
387 if (vma->vm_next && vma->vm_next->vm_start < end) /* intersection */
388 return 0;
389 if (get_unmapped_area(NULL, vma->vm_start, end - vma->vm_start,
390 0, MAP_FIXED) & ~PAGE_MASK)
391 return 0;
392 return 1;
393}
394
264/* 395/*
265 * Expand (or shrink) an existing mapping, potentially moving it at the 396 * Expand (or shrink) an existing mapping, potentially moving it at the
266 * same time (controlled by the MREMAP_MAYMOVE flag and available VM space) 397 * same time (controlled by the MREMAP_MAYMOVE flag and available VM space)
@@ -294,32 +425,10 @@ unsigned long do_mremap(unsigned long addr,
294 if (!new_len) 425 if (!new_len)
295 goto out; 426 goto out;
296 427
297 /* new_addr is only valid if MREMAP_FIXED is specified */
298 if (flags & MREMAP_FIXED) { 428 if (flags & MREMAP_FIXED) {
299 if (new_addr & ~PAGE_MASK) 429 if (flags & MREMAP_MAYMOVE)
300 goto out; 430 ret = mremap_to(addr, old_len, new_addr, new_len);
301 if (!(flags & MREMAP_MAYMOVE)) 431 goto out;
302 goto out;
303
304 if (new_len > TASK_SIZE || new_addr > TASK_SIZE - new_len)
305 goto out;
306
307 /* Check if the location we're moving into overlaps the
308 * old location at all, and fail if it does.
309 */
310 if ((new_addr <= addr) && (new_addr+new_len) > addr)
311 goto out;
312
313 if ((addr <= new_addr) && (addr+old_len) > new_addr)
314 goto out;
315
316 ret = security_file_mmap(NULL, 0, 0, 0, new_addr, 1);
317 if (ret)
318 goto out;
319
320 ret = do_munmap(mm, new_addr, new_len);
321 if (ret)
322 goto out;
323 } 432 }
324 433
325 /* 434 /*
@@ -332,60 +441,23 @@ unsigned long do_mremap(unsigned long addr,
332 if (ret && old_len != new_len) 441 if (ret && old_len != new_len)
333 goto out; 442 goto out;
334 ret = addr; 443 ret = addr;
335 if (!(flags & MREMAP_FIXED) || (new_addr == addr)) 444 goto out;
336 goto out;
337 old_len = new_len;
338 } 445 }
339 446
340 /* 447 /*
341 * Ok, we need to grow.. or relocate. 448 * Ok, we need to grow..
342 */ 449 */
343 ret = -EFAULT; 450 vma = vma_to_resize(addr, old_len, new_len, &charged);
344 vma = find_vma(mm, addr); 451 if (IS_ERR(vma)) {
345 if (!vma || vma->vm_start > addr) 452 ret = PTR_ERR(vma);
346 goto out;
347 if (is_vm_hugetlb_page(vma)) {
348 ret = -EINVAL;
349 goto out;
350 }
351 /* We can't remap across vm area boundaries */
352 if (old_len > vma->vm_end - addr)
353 goto out;
354 if (vma->vm_flags & (VM_DONTEXPAND | VM_PFNMAP)) {
355 if (new_len > old_len)
356 goto out;
357 }
358 if (vma->vm_flags & VM_LOCKED) {
359 unsigned long locked, lock_limit;
360 locked = mm->locked_vm << PAGE_SHIFT;
361 lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
362 locked += new_len - old_len;
363 ret = -EAGAIN;
364 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
365 goto out;
366 }
367 if (!may_expand_vm(mm, (new_len - old_len) >> PAGE_SHIFT)) {
368 ret = -ENOMEM;
369 goto out; 453 goto out;
370 } 454 }
371 455
372 if (vma->vm_flags & VM_ACCOUNT) {
373 charged = (new_len - old_len) >> PAGE_SHIFT;
374 if (security_vm_enough_memory(charged))
375 goto out_nc;
376 }
377
378 /* old_len exactly to the end of the area.. 456 /* old_len exactly to the end of the area..
379 * And we're not relocating the area.
380 */ 457 */
381 if (old_len == vma->vm_end - addr && 458 if (old_len == vma->vm_end - addr) {
382 !((flags & MREMAP_FIXED) && (addr != new_addr)) &&
383 (old_len != new_len || !(flags & MREMAP_MAYMOVE))) {
384 unsigned long max_addr = TASK_SIZE;
385 if (vma->vm_next)
386 max_addr = vma->vm_next->vm_start;
387 /* can we just expand the current mapping? */ 459 /* can we just expand the current mapping? */
388 if (max_addr - addr >= new_len) { 460 if (vma_expandable(vma, new_len - old_len)) {
389 int pages = (new_len - old_len) >> PAGE_SHIFT; 461 int pages = (new_len - old_len) >> PAGE_SHIFT;
390 462
391 vma_adjust(vma, vma->vm_start, 463 vma_adjust(vma, vma->vm_start,
@@ -409,28 +481,27 @@ unsigned long do_mremap(unsigned long addr,
409 */ 481 */
410 ret = -ENOMEM; 482 ret = -ENOMEM;
411 if (flags & MREMAP_MAYMOVE) { 483 if (flags & MREMAP_MAYMOVE) {
412 if (!(flags & MREMAP_FIXED)) { 484 unsigned long map_flags = 0;
413 unsigned long map_flags = 0; 485 if (vma->vm_flags & VM_MAYSHARE)
414 if (vma->vm_flags & VM_MAYSHARE) 486 map_flags |= MAP_SHARED;
415 map_flags |= MAP_SHARED; 487
416 488 new_addr = get_unmapped_area(vma->vm_file, 0, new_len,
417 new_addr = get_unmapped_area(vma->vm_file, 0, new_len, 489 vma->vm_pgoff +
418 vma->vm_pgoff, map_flags); 490 ((addr - vma->vm_start) >> PAGE_SHIFT),
419 if (new_addr & ~PAGE_MASK) { 491 map_flags);
420 ret = new_addr; 492 if (new_addr & ~PAGE_MASK) {
421 goto out; 493 ret = new_addr;
422 } 494 goto out;
423
424 ret = security_file_mmap(NULL, 0, 0, 0, new_addr, 1);
425 if (ret)
426 goto out;
427 } 495 }
496
497 ret = security_file_mmap(NULL, 0, 0, 0, new_addr, 1);
498 if (ret)
499 goto out;
428 ret = move_vma(vma, addr, old_len, new_len, new_addr); 500 ret = move_vma(vma, addr, old_len, new_len, new_addr);
429 } 501 }
430out: 502out:
431 if (ret & ~PAGE_MASK) 503 if (ret & ~PAGE_MASK)
432 vm_unacct_memory(charged); 504 vm_unacct_memory(charged);
433out_nc:
434 return ret; 505 return ret;
435} 506}
436 507
diff --git a/mm/slab.c b/mm/slab.c
index 7dfa481c96ba..a6c9166996a9 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -604,6 +604,26 @@ static struct kmem_cache cache_cache = {
604 604
605#define BAD_ALIEN_MAGIC 0x01020304ul 605#define BAD_ALIEN_MAGIC 0x01020304ul
606 606
607/*
608 * chicken and egg problem: delay the per-cpu array allocation
609 * until the general caches are up.
610 */
611static enum {
612 NONE,
613 PARTIAL_AC,
614 PARTIAL_L3,
615 EARLY,
616 FULL
617} g_cpucache_up;
618
619/*
620 * used by boot code to determine if it can use slab based allocator
621 */
622int slab_is_available(void)
623{
624 return g_cpucache_up >= EARLY;
625}
626
607#ifdef CONFIG_LOCKDEP 627#ifdef CONFIG_LOCKDEP
608 628
609/* 629/*
@@ -620,40 +640,52 @@ static struct kmem_cache cache_cache = {
620static struct lock_class_key on_slab_l3_key; 640static struct lock_class_key on_slab_l3_key;
621static struct lock_class_key on_slab_alc_key; 641static struct lock_class_key on_slab_alc_key;
622 642
623static inline void init_lock_keys(void) 643static void init_node_lock_keys(int q)
624
625{ 644{
626 int q;
627 struct cache_sizes *s = malloc_sizes; 645 struct cache_sizes *s = malloc_sizes;
628 646
629 while (s->cs_size != ULONG_MAX) { 647 if (g_cpucache_up != FULL)
630 for_each_node(q) { 648 return;
631 struct array_cache **alc; 649
632 int r; 650 for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) {
633 struct kmem_list3 *l3 = s->cs_cachep->nodelists[q]; 651 struct array_cache **alc;
634 if (!l3 || OFF_SLAB(s->cs_cachep)) 652 struct kmem_list3 *l3;
635 continue; 653 int r;
636 lockdep_set_class(&l3->list_lock, &on_slab_l3_key); 654
637 alc = l3->alien; 655 l3 = s->cs_cachep->nodelists[q];
638 /* 656 if (!l3 || OFF_SLAB(s->cs_cachep))
639 * FIXME: This check for BAD_ALIEN_MAGIC 657 return;
640 * should go away when common slab code is taught to 658 lockdep_set_class(&l3->list_lock, &on_slab_l3_key);
641 * work even without alien caches. 659 alc = l3->alien;
642 * Currently, non NUMA code returns BAD_ALIEN_MAGIC 660 /*
643 * for alloc_alien_cache, 661 * FIXME: This check for BAD_ALIEN_MAGIC
644 */ 662 * should go away when common slab code is taught to
645 if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC) 663 * work even without alien caches.
646 continue; 664 * Currently, non NUMA code returns BAD_ALIEN_MAGIC
647 for_each_node(r) { 665 * for alloc_alien_cache,
648 if (alc[r]) 666 */
649 lockdep_set_class(&alc[r]->lock, 667 if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
650 &on_slab_alc_key); 668 return;
651 } 669 for_each_node(r) {
670 if (alc[r])
671 lockdep_set_class(&alc[r]->lock,
672 &on_slab_alc_key);
652 } 673 }
653 s++;
654 } 674 }
655} 675}
676
677static inline void init_lock_keys(void)
678{
679 int node;
680
681 for_each_node(node)
682 init_node_lock_keys(node);
683}
656#else 684#else
685static void init_node_lock_keys(int q)
686{
687}
688
657static inline void init_lock_keys(void) 689static inline void init_lock_keys(void)
658{ 690{
659} 691}
@@ -665,26 +697,6 @@ static inline void init_lock_keys(void)
665static DEFINE_MUTEX(cache_chain_mutex); 697static DEFINE_MUTEX(cache_chain_mutex);
666static struct list_head cache_chain; 698static struct list_head cache_chain;
667 699
668/*
669 * chicken and egg problem: delay the per-cpu array allocation
670 * until the general caches are up.
671 */
672static enum {
673 NONE,
674 PARTIAL_AC,
675 PARTIAL_L3,
676 EARLY,
677 FULL
678} g_cpucache_up;
679
680/*
681 * used by boot code to determine if it can use slab based allocator
682 */
683int slab_is_available(void)
684{
685 return g_cpucache_up >= EARLY;
686}
687
688static DEFINE_PER_CPU(struct delayed_work, reap_work); 700static DEFINE_PER_CPU(struct delayed_work, reap_work);
689 701
690static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) 702static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
@@ -1254,6 +1266,8 @@ static int __cpuinit cpuup_prepare(long cpu)
1254 kfree(shared); 1266 kfree(shared);
1255 free_alien_cache(alien); 1267 free_alien_cache(alien);
1256 } 1268 }
1269 init_node_lock_keys(node);
1270
1257 return 0; 1271 return 0;
1258bad: 1272bad:
1259 cpuup_canceled(cpu); 1273 cpuup_canceled(cpu);
@@ -3103,13 +3117,19 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3103 } else { 3117 } else {
3104 STATS_INC_ALLOCMISS(cachep); 3118 STATS_INC_ALLOCMISS(cachep);
3105 objp = cache_alloc_refill(cachep, flags); 3119 objp = cache_alloc_refill(cachep, flags);
3120 /*
3121 * the 'ac' may be updated by cache_alloc_refill(),
3122 * and kmemleak_erase() requires its correct value.
3123 */
3124 ac = cpu_cache_get(cachep);
3106 } 3125 }
3107 /* 3126 /*
3108 * To avoid a false negative, if an object that is in one of the 3127 * To avoid a false negative, if an object that is in one of the
3109 * per-CPU caches is leaked, we need to make sure kmemleak doesn't 3128 * per-CPU caches is leaked, we need to make sure kmemleak doesn't
3110 * treat the array pointers as a reference to the object. 3129 * treat the array pointers as a reference to the object.
3111 */ 3130 */
3112 kmemleak_erase(&ac->entry[ac->avail]); 3131 if (objp)
3132 kmemleak_erase(&ac->entry[ac->avail]);
3113 return objp; 3133 return objp;
3114} 3134}
3115 3135
@@ -3306,7 +3326,7 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
3306 cache_alloc_debugcheck_before(cachep, flags); 3326 cache_alloc_debugcheck_before(cachep, flags);
3307 local_irq_save(save_flags); 3327 local_irq_save(save_flags);
3308 3328
3309 if (unlikely(nodeid == -1)) 3329 if (nodeid == -1)
3310 nodeid = numa_node_id(); 3330 nodeid = numa_node_id();
3311 3331
3312 if (unlikely(!cachep->nodelists[nodeid])) { 3332 if (unlikely(!cachep->nodelists[nodeid])) {
diff --git a/mm/slub.c b/mm/slub.c
index 4996fc719552..da0ce55965dc 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1735,7 +1735,7 @@ static __always_inline void *slab_alloc(struct kmem_cache *s,
1735 } 1735 }
1736 local_irq_restore(flags); 1736 local_irq_restore(flags);
1737 1737
1738 if (unlikely((gfpflags & __GFP_ZERO) && object)) 1738 if (unlikely(gfpflags & __GFP_ZERO) && object)
1739 memset(object, 0, objsize); 1739 memset(object, 0, objsize);
1740 1740
1741 kmemcheck_slab_alloc(s, gfpflags, object, c->objsize); 1741 kmemcheck_slab_alloc(s, gfpflags, object, c->objsize);
@@ -4371,12 +4371,28 @@ static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
4371 return len + sprintf(buf + len, "\n"); 4371 return len + sprintf(buf + len, "\n");
4372} 4372}
4373 4373
4374static void clear_stat(struct kmem_cache *s, enum stat_item si)
4375{
4376 int cpu;
4377
4378 for_each_online_cpu(cpu)
4379 get_cpu_slab(s, cpu)->stat[si] = 0;
4380}
4381
4374#define STAT_ATTR(si, text) \ 4382#define STAT_ATTR(si, text) \
4375static ssize_t text##_show(struct kmem_cache *s, char *buf) \ 4383static ssize_t text##_show(struct kmem_cache *s, char *buf) \
4376{ \ 4384{ \
4377 return show_stat(s, buf, si); \ 4385 return show_stat(s, buf, si); \
4378} \ 4386} \
4379SLAB_ATTR_RO(text); \ 4387static ssize_t text##_store(struct kmem_cache *s, \
4388 const char *buf, size_t length) \
4389{ \
4390 if (buf[0] != '0') \
4391 return -EINVAL; \
4392 clear_stat(s, si); \
4393 return length; \
4394} \
4395SLAB_ATTR(text); \
4380 4396
4381STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath); 4397STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath);
4382STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath); 4398STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath);
diff --git a/mm/truncate.c b/mm/truncate.c
index 450cebdabfc0..2c147a7e5f2c 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -490,7 +490,7 @@ EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range);
490 * Any pages which are found to be mapped into pagetables are unmapped prior to 490 * Any pages which are found to be mapped into pagetables are unmapped prior to
491 * invalidation. 491 * invalidation.
492 * 492 *
493 * Returns -EIO if any pages could not be invalidated. 493 * Returns -EBUSY if any pages could not be invalidated.
494 */ 494 */
495int invalidate_inode_pages2(struct address_space *mapping) 495int invalidate_inode_pages2(struct address_space *mapping)
496{ 496{
diff --git a/mm/util.c b/mm/util.c
index 7c35ad95f927..b377ce430803 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -4,6 +4,10 @@
4#include <linux/module.h> 4#include <linux/module.h>
5#include <linux/err.h> 5#include <linux/err.h>
6#include <linux/sched.h> 6#include <linux/sched.h>
7#include <linux/hugetlb.h>
8#include <linux/syscalls.h>
9#include <linux/mman.h>
10#include <linux/file.h>
7#include <asm/uaccess.h> 11#include <asm/uaccess.h>
8 12
9#define CREATE_TRACE_POINTS 13#define CREATE_TRACE_POINTS
@@ -268,6 +272,46 @@ int __attribute__((weak)) get_user_pages_fast(unsigned long start,
268} 272}
269EXPORT_SYMBOL_GPL(get_user_pages_fast); 273EXPORT_SYMBOL_GPL(get_user_pages_fast);
270 274
275SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
276 unsigned long, prot, unsigned long, flags,
277 unsigned long, fd, unsigned long, pgoff)
278{
279 struct file * file = NULL;
280 unsigned long retval = -EBADF;
281
282 if (!(flags & MAP_ANONYMOUS)) {
283 if (unlikely(flags & MAP_HUGETLB))
284 return -EINVAL;
285 file = fget(fd);
286 if (!file)
287 goto out;
288 } else if (flags & MAP_HUGETLB) {
289 struct user_struct *user = NULL;
290 /*
291 * VM_NORESERVE is used because the reservations will be
292 * taken when vm_ops->mmap() is called
293 * A dummy user value is used because we are not locking
294 * memory so no accounting is necessary
295 */
296 len = ALIGN(len, huge_page_size(&default_hstate));
297 file = hugetlb_file_setup(HUGETLB_ANON_FILE, len, VM_NORESERVE,
298 &user, HUGETLB_ANONHUGE_INODE);
299 if (IS_ERR(file))
300 return PTR_ERR(file);
301 }
302
303 flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
304
305 down_write(&current->mm->mmap_sem);
306 retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
307 up_write(&current->mm->mmap_sem);
308
309 if (file)
310 fput(file);
311out:
312 return retval;
313}
314
271/* Tracepoints definitions. */ 315/* Tracepoints definitions. */
272EXPORT_TRACEPOINT_SYMBOL(kmalloc); 316EXPORT_TRACEPOINT_SYMBOL(kmalloc);
273EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc); 317EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);