aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig5
-rw-r--r--mm/backing-dev.c11
-rw-r--r--mm/bootmem.c24
-rw-r--r--mm/filemap.c51
-rw-r--r--mm/highmem.c17
-rw-r--r--mm/kmemleak.c4
-rw-r--r--mm/ksm.c1
-rw-r--r--mm/memcontrol.c6
-rw-r--r--mm/memory-failure.c2
-rw-r--r--mm/memory_hotplug.c24
-rw-r--r--mm/migrate.c2
-rw-r--r--mm/mmap.c46
-rw-r--r--mm/mremap.c241
-rw-r--r--mm/page-writeback.c12
-rw-r--r--mm/page_alloc.c4
-rw-r--r--mm/percpu.c121
-rw-r--r--mm/slab.c118
-rw-r--r--mm/slub.c18
-rw-r--r--mm/truncate.c2
-rw-r--r--mm/util.c44
20 files changed, 468 insertions, 285 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index fd3386242cf0..44cf6f0a3a6d 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -128,12 +128,9 @@ config SPARSEMEM_VMEMMAP
128config MEMORY_HOTPLUG 128config MEMORY_HOTPLUG
129 bool "Allow for memory hot-add" 129 bool "Allow for memory hot-add"
130 depends on SPARSEMEM || X86_64_ACPI_NUMA 130 depends on SPARSEMEM || X86_64_ACPI_NUMA
131 depends on HOTPLUG && !(HIBERNATION && !S390) && ARCH_ENABLE_MEMORY_HOTPLUG 131 depends on HOTPLUG && ARCH_ENABLE_MEMORY_HOTPLUG
132 depends on (IA64 || X86 || PPC_BOOK3S_64 || SUPERH || S390) 132 depends on (IA64 || X86 || PPC_BOOK3S_64 || SUPERH || S390)
133 133
134comment "Memory hotplug is currently incompatible with Software Suspend"
135 depends on SPARSEMEM && HOTPLUG && HIBERNATION && !S390
136
137config MEMORY_HOTPLUG_SPARSE 134config MEMORY_HOTPLUG_SPARSE
138 def_bool y 135 def_bool y
139 depends on SPARSEMEM && MEMORY_HOTPLUG 136 depends on SPARSEMEM && MEMORY_HOTPLUG
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 1065b715ef64..0e8ca0347707 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -604,10 +604,14 @@ static void bdi_wb_shutdown(struct backing_dev_info *bdi)
604 604
605 /* 605 /*
606 * Finally, kill the kernel threads. We don't need to be RCU 606 * Finally, kill the kernel threads. We don't need to be RCU
607 * safe anymore, since the bdi is gone from visibility. 607 * safe anymore, since the bdi is gone from visibility. Force
608 * unfreeze of the thread before calling kthread_stop(), otherwise
609 * it would never exet if it is currently stuck in the refrigerator.
608 */ 610 */
609 list_for_each_entry(wb, &bdi->wb_list, list) 611 list_for_each_entry(wb, &bdi->wb_list, list) {
612 thaw_process(wb->task);
610 kthread_stop(wb->task); 613 kthread_stop(wb->task);
614 }
611} 615}
612 616
613/* 617/*
@@ -628,6 +632,8 @@ static void bdi_prune_sb(struct backing_dev_info *bdi)
628void bdi_unregister(struct backing_dev_info *bdi) 632void bdi_unregister(struct backing_dev_info *bdi)
629{ 633{
630 if (bdi->dev) { 634 if (bdi->dev) {
635 bdi_prune_sb(bdi);
636
631 if (!bdi_cap_flush_forker(bdi)) 637 if (!bdi_cap_flush_forker(bdi))
632 bdi_wb_shutdown(bdi); 638 bdi_wb_shutdown(bdi);
633 bdi_debug_unregister(bdi); 639 bdi_debug_unregister(bdi);
@@ -697,7 +703,6 @@ void bdi_destroy(struct backing_dev_info *bdi)
697 spin_unlock(&inode_lock); 703 spin_unlock(&inode_lock);
698 } 704 }
699 705
700 bdi_prune_sb(bdi);
701 bdi_unregister(bdi); 706 bdi_unregister(bdi);
702 707
703 for (i = 0; i < NR_BDI_STAT_ITEMS; i++) 708 for (i = 0; i < NR_BDI_STAT_ITEMS; i++)
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 555d5d2731c6..d1dc23cc7f10 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -143,6 +143,30 @@ unsigned long __init init_bootmem(unsigned long start, unsigned long pages)
143 return init_bootmem_core(NODE_DATA(0)->bdata, start, 0, pages); 143 return init_bootmem_core(NODE_DATA(0)->bdata, start, 0, pages);
144} 144}
145 145
146/*
147 * free_bootmem_late - free bootmem pages directly to page allocator
148 * @addr: starting address of the range
149 * @size: size of the range in bytes
150 *
151 * This is only useful when the bootmem allocator has already been torn
152 * down, but we are still initializing the system. Pages are given directly
153 * to the page allocator, no bootmem metadata is updated because it is gone.
154 */
155void __init free_bootmem_late(unsigned long addr, unsigned long size)
156{
157 unsigned long cursor, end;
158
159 kmemleak_free_part(__va(addr), size);
160
161 cursor = PFN_UP(addr);
162 end = PFN_DOWN(addr + size);
163
164 for (; cursor < end; cursor++) {
165 __free_pages_bootmem(pfn_to_page(cursor), 0);
166 totalram_pages++;
167 }
168}
169
146static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) 170static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
147{ 171{
148 int aligned; 172 int aligned;
diff --git a/mm/filemap.c b/mm/filemap.c
index ef169f37156d..8b4d88f9249e 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -260,27 +260,27 @@ int filemap_flush(struct address_space *mapping)
260EXPORT_SYMBOL(filemap_flush); 260EXPORT_SYMBOL(filemap_flush);
261 261
262/** 262/**
263 * wait_on_page_writeback_range - wait for writeback to complete 263 * filemap_fdatawait_range - wait for writeback to complete
264 * @mapping: target address_space 264 * @mapping: address space structure to wait for
265 * @start: beginning page index 265 * @start_byte: offset in bytes where the range starts
266 * @end: ending page index 266 * @end_byte: offset in bytes where the range ends (inclusive)
267 * 267 *
268 * Wait for writeback to complete against pages indexed by start->end 268 * Walk the list of under-writeback pages of the given address space
269 * inclusive 269 * in the given range and wait for all of them.
270 */ 270 */
271int wait_on_page_writeback_range(struct address_space *mapping, 271int filemap_fdatawait_range(struct address_space *mapping, loff_t start_byte,
272 pgoff_t start, pgoff_t end) 272 loff_t end_byte)
273{ 273{
274 pgoff_t index = start_byte >> PAGE_CACHE_SHIFT;
275 pgoff_t end = end_byte >> PAGE_CACHE_SHIFT;
274 struct pagevec pvec; 276 struct pagevec pvec;
275 int nr_pages; 277 int nr_pages;
276 int ret = 0; 278 int ret = 0;
277 pgoff_t index;
278 279
279 if (end < start) 280 if (end_byte < start_byte)
280 return 0; 281 return 0;
281 282
282 pagevec_init(&pvec, 0); 283 pagevec_init(&pvec, 0);
283 index = start;
284 while ((index <= end) && 284 while ((index <= end) &&
285 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, 285 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
286 PAGECACHE_TAG_WRITEBACK, 286 PAGECACHE_TAG_WRITEBACK,
@@ -310,25 +310,6 @@ int wait_on_page_writeback_range(struct address_space *mapping,
310 310
311 return ret; 311 return ret;
312} 312}
313
314/**
315 * filemap_fdatawait_range - wait for all under-writeback pages to complete in a given range
316 * @mapping: address space structure to wait for
317 * @start: offset in bytes where the range starts
318 * @end: offset in bytes where the range ends (inclusive)
319 *
320 * Walk the list of under-writeback pages of the given address space
321 * in the given range and wait for all of them.
322 *
323 * This is just a simple wrapper so that callers don't have to convert offsets
324 * to page indexes themselves
325 */
326int filemap_fdatawait_range(struct address_space *mapping, loff_t start,
327 loff_t end)
328{
329 return wait_on_page_writeback_range(mapping, start >> PAGE_CACHE_SHIFT,
330 end >> PAGE_CACHE_SHIFT);
331}
332EXPORT_SYMBOL(filemap_fdatawait_range); 313EXPORT_SYMBOL(filemap_fdatawait_range);
333 314
334/** 315/**
@@ -345,8 +326,7 @@ int filemap_fdatawait(struct address_space *mapping)
345 if (i_size == 0) 326 if (i_size == 0)
346 return 0; 327 return 0;
347 328
348 return wait_on_page_writeback_range(mapping, 0, 329 return filemap_fdatawait_range(mapping, 0, i_size - 1);
349 (i_size - 1) >> PAGE_CACHE_SHIFT);
350} 330}
351EXPORT_SYMBOL(filemap_fdatawait); 331EXPORT_SYMBOL(filemap_fdatawait);
352 332
@@ -393,9 +373,8 @@ int filemap_write_and_wait_range(struct address_space *mapping,
393 WB_SYNC_ALL); 373 WB_SYNC_ALL);
394 /* See comment of filemap_write_and_wait() */ 374 /* See comment of filemap_write_and_wait() */
395 if (err != -EIO) { 375 if (err != -EIO) {
396 int err2 = wait_on_page_writeback_range(mapping, 376 int err2 = filemap_fdatawait_range(mapping,
397 lstart >> PAGE_CACHE_SHIFT, 377 lstart, lend);
398 lend >> PAGE_CACHE_SHIFT);
399 if (!err) 378 if (!err)
400 err = err2; 379 err = err2;
401 } 380 }
@@ -1844,7 +1823,7 @@ static size_t __iovec_copy_from_user_inatomic(char *vaddr,
1844 1823
1845/* 1824/*
1846 * Copy as much as we can into the page and return the number of bytes which 1825 * Copy as much as we can into the page and return the number of bytes which
1847 * were sucessfully copied. If a fault is encountered then return the number of 1826 * were successfully copied. If a fault is encountered then return the number of
1848 * bytes which were copied. 1827 * bytes which were copied.
1849 */ 1828 */
1850size_t iov_iter_copy_from_user_atomic(struct page *page, 1829size_t iov_iter_copy_from_user_atomic(struct page *page,
diff --git a/mm/highmem.c b/mm/highmem.c
index 25878cc49daa..9c1e627f282e 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -426,16 +426,21 @@ void __init page_address_init(void)
426 426
427void debug_kmap_atomic(enum km_type type) 427void debug_kmap_atomic(enum km_type type)
428{ 428{
429 static unsigned warn_count = 10; 429 static int warn_count = 10;
430 430
431 if (unlikely(warn_count == 0)) 431 if (unlikely(warn_count < 0))
432 return; 432 return;
433 433
434 if (unlikely(in_interrupt())) { 434 if (unlikely(in_interrupt())) {
435 if (in_irq()) { 435 if (in_nmi()) {
436 if (type != KM_NMI && type != KM_NMI_PTE) {
437 WARN_ON(1);
438 warn_count--;
439 }
440 } else if (in_irq()) {
436 if (type != KM_IRQ0 && type != KM_IRQ1 && 441 if (type != KM_IRQ0 && type != KM_IRQ1 &&
437 type != KM_BIO_SRC_IRQ && type != KM_BIO_DST_IRQ && 442 type != KM_BIO_SRC_IRQ && type != KM_BIO_DST_IRQ &&
438 type != KM_BOUNCE_READ) { 443 type != KM_BOUNCE_READ && type != KM_IRQ_PTE) {
439 WARN_ON(1); 444 WARN_ON(1);
440 warn_count--; 445 warn_count--;
441 } 446 }
@@ -452,7 +457,9 @@ void debug_kmap_atomic(enum km_type type)
452 } 457 }
453 458
454 if (type == KM_IRQ0 || type == KM_IRQ1 || type == KM_BOUNCE_READ || 459 if (type == KM_IRQ0 || type == KM_IRQ1 || type == KM_BOUNCE_READ ||
455 type == KM_BIO_SRC_IRQ || type == KM_BIO_DST_IRQ) { 460 type == KM_BIO_SRC_IRQ || type == KM_BIO_DST_IRQ ||
461 type == KM_IRQ_PTE || type == KM_NMI ||
462 type == KM_NMI_PTE ) {
456 if (!irqs_disabled()) { 463 if (!irqs_disabled()) {
457 WARN_ON(1); 464 WARN_ON(1);
458 warn_count--; 465 warn_count--;
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 8bf765c4f58d..13f33b3081ec 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -1050,8 +1050,8 @@ static void scan_object(struct kmemleak_object *object)
1050 unsigned long flags; 1050 unsigned long flags;
1051 1051
1052 /* 1052 /*
1053 * Once the object->lock is aquired, the corresponding memory block 1053 * Once the object->lock is acquired, the corresponding memory block
1054 * cannot be freed (the same lock is aquired in delete_object). 1054 * cannot be freed (the same lock is acquired in delete_object).
1055 */ 1055 */
1056 spin_lock_irqsave(&object->lock, flags); 1056 spin_lock_irqsave(&object->lock, flags);
1057 if (object->flags & OBJECT_NO_SCAN) 1057 if (object->flags & OBJECT_NO_SCAN)
diff --git a/mm/ksm.c b/mm/ksm.c
index bef1af4f77e3..5575f8628fef 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1012,6 +1012,7 @@ static struct rmap_item *unstable_tree_search_insert(struct page *page,
1012 struct rmap_item *tree_rmap_item; 1012 struct rmap_item *tree_rmap_item;
1013 int ret; 1013 int ret;
1014 1014
1015 cond_resched();
1015 tree_rmap_item = rb_entry(*new, struct rmap_item, node); 1016 tree_rmap_item = rb_entry(*new, struct rmap_item, node);
1016 page2[0] = get_mergeable_page(tree_rmap_item); 1017 page2[0] = get_mergeable_page(tree_rmap_item);
1017 if (!page2[0]) 1018 if (!page2[0])
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f99f5991d6bb..c31a310aa146 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -209,7 +209,7 @@ struct mem_cgroup {
209 int prev_priority; /* for recording reclaim priority */ 209 int prev_priority; /* for recording reclaim priority */
210 210
211 /* 211 /*
212 * While reclaiming in a hiearchy, we cache the last child we 212 * While reclaiming in a hierarchy, we cache the last child we
213 * reclaimed from. 213 * reclaimed from.
214 */ 214 */
215 int last_scanned_child; 215 int last_scanned_child;
@@ -1720,7 +1720,7 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
1720/* 1720/*
1721 * While swap-in, try_charge -> commit or cancel, the page is locked. 1721 * While swap-in, try_charge -> commit or cancel, the page is locked.
1722 * And when try_charge() successfully returns, one refcnt to memcg without 1722 * And when try_charge() successfully returns, one refcnt to memcg without
1723 * struct page_cgroup is aquired. This refcnt will be cumsumed by 1723 * struct page_cgroup is acquired. This refcnt will be consumed by
1724 * "commit()" or removed by "cancel()" 1724 * "commit()" or removed by "cancel()"
1725 */ 1725 */
1726int mem_cgroup_try_charge_swapin(struct mm_struct *mm, 1726int mem_cgroup_try_charge_swapin(struct mm_struct *mm,
@@ -2466,7 +2466,7 @@ static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft,
2466 2466
2467 cgroup_lock(); 2467 cgroup_lock();
2468 /* 2468 /*
2469 * If parent's use_hiearchy is set, we can't make any modifications 2469 * If parent's use_hierarchy is set, we can't make any modifications
2470 * in the child subtrees. If it is unset, then the change can 2470 * in the child subtrees. If it is unset, then the change can
2471 * occur, provided the current cgroup has no children. 2471 * occur, provided the current cgroup has no children.
2472 * 2472 *
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index dacc64183874..1ac49fef95ab 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -174,7 +174,7 @@ static void kill_procs_ao(struct list_head *to_kill, int doit, int trapno,
174 list_for_each_entry_safe (tk, next, to_kill, nd) { 174 list_for_each_entry_safe (tk, next, to_kill, nd) {
175 if (doit) { 175 if (doit) {
176 /* 176 /*
177 * In case something went wrong with munmaping 177 * In case something went wrong with munmapping
178 * make sure the process doesn't catch the 178 * make sure the process doesn't catch the
179 * signal and then access the memory. Just kill it. 179 * signal and then access the memory. Just kill it.
180 * the signal handlers 180 * the signal handlers
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 821dee596377..2047465cd27c 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -26,6 +26,7 @@
26#include <linux/migrate.h> 26#include <linux/migrate.h>
27#include <linux/page-isolation.h> 27#include <linux/page-isolation.h>
28#include <linux/pfn.h> 28#include <linux/pfn.h>
29#include <linux/suspend.h>
29 30
30#include <asm/tlbflush.h> 31#include <asm/tlbflush.h>
31 32
@@ -447,7 +448,8 @@ int online_pages(unsigned long pfn, unsigned long nr_pages)
447} 448}
448#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */ 449#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
449 450
450static pg_data_t *hotadd_new_pgdat(int nid, u64 start) 451/* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */
452static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
451{ 453{
452 struct pglist_data *pgdat; 454 struct pglist_data *pgdat;
453 unsigned long zones_size[MAX_NR_ZONES] = {0}; 455 unsigned long zones_size[MAX_NR_ZONES] = {0};
@@ -484,14 +486,18 @@ int __ref add_memory(int nid, u64 start, u64 size)
484 struct resource *res; 486 struct resource *res;
485 int ret; 487 int ret;
486 488
489 lock_system_sleep();
490
487 res = register_memory_resource(start, size); 491 res = register_memory_resource(start, size);
492 ret = -EEXIST;
488 if (!res) 493 if (!res)
489 return -EEXIST; 494 goto out;
490 495
491 if (!node_online(nid)) { 496 if (!node_online(nid)) {
492 pgdat = hotadd_new_pgdat(nid, start); 497 pgdat = hotadd_new_pgdat(nid, start);
498 ret = -ENOMEM;
493 if (!pgdat) 499 if (!pgdat)
494 return -ENOMEM; 500 goto out;
495 new_pgdat = 1; 501 new_pgdat = 1;
496 } 502 }
497 503
@@ -514,7 +520,8 @@ int __ref add_memory(int nid, u64 start, u64 size)
514 BUG_ON(ret); 520 BUG_ON(ret);
515 } 521 }
516 522
517 return ret; 523 goto out;
524
518error: 525error:
519 /* rollback pgdat allocation and others */ 526 /* rollback pgdat allocation and others */
520 if (new_pgdat) 527 if (new_pgdat)
@@ -522,6 +529,8 @@ error:
522 if (res) 529 if (res)
523 release_memory_resource(res); 530 release_memory_resource(res);
524 531
532out:
533 unlock_system_sleep();
525 return ret; 534 return ret;
526} 535}
527EXPORT_SYMBOL_GPL(add_memory); 536EXPORT_SYMBOL_GPL(add_memory);
@@ -758,6 +767,8 @@ int offline_pages(unsigned long start_pfn,
758 if (!test_pages_in_a_zone(start_pfn, end_pfn)) 767 if (!test_pages_in_a_zone(start_pfn, end_pfn))
759 return -EINVAL; 768 return -EINVAL;
760 769
770 lock_system_sleep();
771
761 zone = page_zone(pfn_to_page(start_pfn)); 772 zone = page_zone(pfn_to_page(start_pfn));
762 node = zone_to_nid(zone); 773 node = zone_to_nid(zone);
763 nr_pages = end_pfn - start_pfn; 774 nr_pages = end_pfn - start_pfn;
@@ -765,7 +776,7 @@ int offline_pages(unsigned long start_pfn,
765 /* set above range as isolated */ 776 /* set above range as isolated */
766 ret = start_isolate_page_range(start_pfn, end_pfn); 777 ret = start_isolate_page_range(start_pfn, end_pfn);
767 if (ret) 778 if (ret)
768 return ret; 779 goto out;
769 780
770 arg.start_pfn = start_pfn; 781 arg.start_pfn = start_pfn;
771 arg.nr_pages = nr_pages; 782 arg.nr_pages = nr_pages;
@@ -843,6 +854,7 @@ repeat:
843 writeback_set_ratelimit(); 854 writeback_set_ratelimit();
844 855
845 memory_notify(MEM_OFFLINE, &arg); 856 memory_notify(MEM_OFFLINE, &arg);
857 unlock_system_sleep();
846 return 0; 858 return 0;
847 859
848failed_removal: 860failed_removal:
@@ -852,6 +864,8 @@ failed_removal:
852 /* pushback to free area */ 864 /* pushback to free area */
853 undo_isolate_page_range(start_pfn, end_pfn); 865 undo_isolate_page_range(start_pfn, end_pfn);
854 866
867out:
868 unlock_system_sleep();
855 return ret; 869 return ret;
856} 870}
857 871
diff --git a/mm/migrate.c b/mm/migrate.c
index 1a4bf4813780..7dbcb22316d2 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -602,7 +602,7 @@ static int unmap_and_move(new_page_t get_new_page, unsigned long private,
602 struct page *newpage = get_new_page(page, private, &result); 602 struct page *newpage = get_new_page(page, private, &result);
603 int rcu_locked = 0; 603 int rcu_locked = 0;
604 int charge = 0; 604 int charge = 0;
605 struct mem_cgroup *mem; 605 struct mem_cgroup *mem = NULL;
606 606
607 if (!newpage) 607 if (!newpage)
608 return -ENOMEM; 608 return -ENOMEM;
diff --git a/mm/mmap.c b/mm/mmap.c
index 73f5e4b64010..ed70a68e882a 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -20,7 +20,6 @@
20#include <linux/fs.h> 20#include <linux/fs.h>
21#include <linux/personality.h> 21#include <linux/personality.h>
22#include <linux/security.h> 22#include <linux/security.h>
23#include <linux/ima.h>
24#include <linux/hugetlb.h> 23#include <linux/hugetlb.h>
25#include <linux/profile.h> 24#include <linux/profile.h>
26#include <linux/module.h> 25#include <linux/module.h>
@@ -932,13 +931,9 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
932 if (!(flags & MAP_FIXED)) 931 if (!(flags & MAP_FIXED))
933 addr = round_hint_to_min(addr); 932 addr = round_hint_to_min(addr);
934 933
935 error = arch_mmap_check(addr, len, flags);
936 if (error)
937 return error;
938
939 /* Careful about overflows.. */ 934 /* Careful about overflows.. */
940 len = PAGE_ALIGN(len); 935 len = PAGE_ALIGN(len);
941 if (!len || len > TASK_SIZE) 936 if (!len)
942 return -ENOMEM; 937 return -ENOMEM;
943 938
944 /* offset overflow? */ 939 /* offset overflow? */
@@ -949,24 +944,6 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
949 if (mm->map_count > sysctl_max_map_count) 944 if (mm->map_count > sysctl_max_map_count)
950 return -ENOMEM; 945 return -ENOMEM;
951 946
952 if (flags & MAP_HUGETLB) {
953 struct user_struct *user = NULL;
954 if (file)
955 return -EINVAL;
956
957 /*
958 * VM_NORESERVE is used because the reservations will be
959 * taken when vm_ops->mmap() is called
960 * A dummy user value is used because we are not locking
961 * memory so no accounting is necessary
962 */
963 len = ALIGN(len, huge_page_size(&default_hstate));
964 file = hugetlb_file_setup(HUGETLB_ANON_FILE, len, VM_NORESERVE,
965 &user, HUGETLB_ANONHUGE_INODE);
966 if (IS_ERR(file))
967 return PTR_ERR(file);
968 }
969
970 /* Obtain the address to map to. we verify (or select) it and ensure 947 /* Obtain the address to map to. we verify (or select) it and ensure
971 * that it represents a valid section of the address space. 948 * that it represents a valid section of the address space.
972 */ 949 */
@@ -1061,9 +1038,6 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
1061 error = security_file_mmap(file, reqprot, prot, flags, addr, 0); 1038 error = security_file_mmap(file, reqprot, prot, flags, addr, 0);
1062 if (error) 1039 if (error)
1063 return error; 1040 return error;
1064 error = ima_file_mmap(file, prot);
1065 if (error)
1066 return error;
1067 1041
1068 return mmap_region(file, addr, len, flags, vm_flags, pgoff); 1042 return mmap_region(file, addr, len, flags, vm_flags, pgoff);
1069} 1043}
@@ -1459,6 +1433,14 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
1459 unsigned long (*get_area)(struct file *, unsigned long, 1433 unsigned long (*get_area)(struct file *, unsigned long,
1460 unsigned long, unsigned long, unsigned long); 1434 unsigned long, unsigned long, unsigned long);
1461 1435
1436 unsigned long error = arch_mmap_check(addr, len, flags);
1437 if (error)
1438 return error;
1439
1440 /* Careful about overflows.. */
1441 if (len > TASK_SIZE)
1442 return -ENOMEM;
1443
1462 get_area = current->mm->get_unmapped_area; 1444 get_area = current->mm->get_unmapped_area;
1463 if (file && file->f_op && file->f_op->get_unmapped_area) 1445 if (file && file->f_op && file->f_op->get_unmapped_area)
1464 get_area = file->f_op->get_unmapped_area; 1446 get_area = file->f_op->get_unmapped_area;
@@ -2003,20 +1985,14 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
2003 if (!len) 1985 if (!len)
2004 return addr; 1986 return addr;
2005 1987
2006 if ((addr + len) > TASK_SIZE || (addr + len) < addr)
2007 return -EINVAL;
2008
2009 if (is_hugepage_only_range(mm, addr, len))
2010 return -EINVAL;
2011
2012 error = security_file_mmap(NULL, 0, 0, 0, addr, 1); 1988 error = security_file_mmap(NULL, 0, 0, 0, addr, 1);
2013 if (error) 1989 if (error)
2014 return error; 1990 return error;
2015 1991
2016 flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags; 1992 flags = VM_DATA_DEFAULT_FLAGS | VM_ACCOUNT | mm->def_flags;
2017 1993
2018 error = arch_mmap_check(addr, len, flags); 1994 error = get_unmapped_area(NULL, addr, len, 0, MAP_FIXED);
2019 if (error) 1995 if (error & ~PAGE_MASK)
2020 return error; 1996 return error;
2021 1997
2022 /* 1998 /*
diff --git a/mm/mremap.c b/mm/mremap.c
index 97bff2547719..845190898d59 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -261,6 +261,137 @@ static unsigned long move_vma(struct vm_area_struct *vma,
261 return new_addr; 261 return new_addr;
262} 262}
263 263
264static struct vm_area_struct *vma_to_resize(unsigned long addr,
265 unsigned long old_len, unsigned long new_len, unsigned long *p)
266{
267 struct mm_struct *mm = current->mm;
268 struct vm_area_struct *vma = find_vma(mm, addr);
269
270 if (!vma || vma->vm_start > addr)
271 goto Efault;
272
273 if (is_vm_hugetlb_page(vma))
274 goto Einval;
275
276 /* We can't remap across vm area boundaries */
277 if (old_len > vma->vm_end - addr)
278 goto Efault;
279
280 if (vma->vm_flags & (VM_DONTEXPAND | VM_PFNMAP)) {
281 if (new_len > old_len)
282 goto Efault;
283 }
284
285 if (vma->vm_flags & VM_LOCKED) {
286 unsigned long locked, lock_limit;
287 locked = mm->locked_vm << PAGE_SHIFT;
288 lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
289 locked += new_len - old_len;
290 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
291 goto Eagain;
292 }
293
294 if (!may_expand_vm(mm, (new_len - old_len) >> PAGE_SHIFT))
295 goto Enomem;
296
297 if (vma->vm_flags & VM_ACCOUNT) {
298 unsigned long charged = (new_len - old_len) >> PAGE_SHIFT;
299 if (security_vm_enough_memory(charged))
300 goto Efault;
301 *p = charged;
302 }
303
304 return vma;
305
306Efault: /* very odd choice for most of the cases, but... */
307 return ERR_PTR(-EFAULT);
308Einval:
309 return ERR_PTR(-EINVAL);
310Enomem:
311 return ERR_PTR(-ENOMEM);
312Eagain:
313 return ERR_PTR(-EAGAIN);
314}
315
316static unsigned long mremap_to(unsigned long addr,
317 unsigned long old_len, unsigned long new_addr,
318 unsigned long new_len)
319{
320 struct mm_struct *mm = current->mm;
321 struct vm_area_struct *vma;
322 unsigned long ret = -EINVAL;
323 unsigned long charged = 0;
324 unsigned long map_flags;
325
326 if (new_addr & ~PAGE_MASK)
327 goto out;
328
329 if (new_len > TASK_SIZE || new_addr > TASK_SIZE - new_len)
330 goto out;
331
332 /* Check if the location we're moving into overlaps the
333 * old location at all, and fail if it does.
334 */
335 if ((new_addr <= addr) && (new_addr+new_len) > addr)
336 goto out;
337
338 if ((addr <= new_addr) && (addr+old_len) > new_addr)
339 goto out;
340
341 ret = security_file_mmap(NULL, 0, 0, 0, new_addr, 1);
342 if (ret)
343 goto out;
344
345 ret = do_munmap(mm, new_addr, new_len);
346 if (ret)
347 goto out;
348
349 if (old_len >= new_len) {
350 ret = do_munmap(mm, addr+new_len, old_len - new_len);
351 if (ret && old_len != new_len)
352 goto out;
353 old_len = new_len;
354 }
355
356 vma = vma_to_resize(addr, old_len, new_len, &charged);
357 if (IS_ERR(vma)) {
358 ret = PTR_ERR(vma);
359 goto out;
360 }
361
362 map_flags = MAP_FIXED;
363 if (vma->vm_flags & VM_MAYSHARE)
364 map_flags |= MAP_SHARED;
365
366 ret = get_unmapped_area(vma->vm_file, new_addr, new_len, vma->vm_pgoff +
367 ((addr - vma->vm_start) >> PAGE_SHIFT),
368 map_flags);
369 if (ret & ~PAGE_MASK)
370 goto out1;
371
372 ret = move_vma(vma, addr, old_len, new_len, new_addr);
373 if (!(ret & ~PAGE_MASK))
374 goto out;
375out1:
376 vm_unacct_memory(charged);
377
378out:
379 return ret;
380}
381
382static int vma_expandable(struct vm_area_struct *vma, unsigned long delta)
383{
384 unsigned long end = vma->vm_end + delta;
385 if (end < vma->vm_end) /* overflow */
386 return 0;
387 if (vma->vm_next && vma->vm_next->vm_start < end) /* intersection */
388 return 0;
389 if (get_unmapped_area(NULL, vma->vm_start, end - vma->vm_start,
390 0, MAP_FIXED) & ~PAGE_MASK)
391 return 0;
392 return 1;
393}
394
264/* 395/*
265 * Expand (or shrink) an existing mapping, potentially moving it at the 396 * Expand (or shrink) an existing mapping, potentially moving it at the
266 * same time (controlled by the MREMAP_MAYMOVE flag and available VM space) 397 * same time (controlled by the MREMAP_MAYMOVE flag and available VM space)
@@ -294,32 +425,10 @@ unsigned long do_mremap(unsigned long addr,
294 if (!new_len) 425 if (!new_len)
295 goto out; 426 goto out;
296 427
297 /* new_addr is only valid if MREMAP_FIXED is specified */
298 if (flags & MREMAP_FIXED) { 428 if (flags & MREMAP_FIXED) {
299 if (new_addr & ~PAGE_MASK) 429 if (flags & MREMAP_MAYMOVE)
300 goto out; 430 ret = mremap_to(addr, old_len, new_addr, new_len);
301 if (!(flags & MREMAP_MAYMOVE)) 431 goto out;
302 goto out;
303
304 if (new_len > TASK_SIZE || new_addr > TASK_SIZE - new_len)
305 goto out;
306
307 /* Check if the location we're moving into overlaps the
308 * old location at all, and fail if it does.
309 */
310 if ((new_addr <= addr) && (new_addr+new_len) > addr)
311 goto out;
312
313 if ((addr <= new_addr) && (addr+old_len) > new_addr)
314 goto out;
315
316 ret = security_file_mmap(NULL, 0, 0, 0, new_addr, 1);
317 if (ret)
318 goto out;
319
320 ret = do_munmap(mm, new_addr, new_len);
321 if (ret)
322 goto out;
323 } 432 }
324 433
325 /* 434 /*
@@ -332,60 +441,23 @@ unsigned long do_mremap(unsigned long addr,
332 if (ret && old_len != new_len) 441 if (ret && old_len != new_len)
333 goto out; 442 goto out;
334 ret = addr; 443 ret = addr;
335 if (!(flags & MREMAP_FIXED) || (new_addr == addr)) 444 goto out;
336 goto out;
337 old_len = new_len;
338 } 445 }
339 446
340 /* 447 /*
341 * Ok, we need to grow.. or relocate. 448 * Ok, we need to grow..
342 */ 449 */
343 ret = -EFAULT; 450 vma = vma_to_resize(addr, old_len, new_len, &charged);
344 vma = find_vma(mm, addr); 451 if (IS_ERR(vma)) {
345 if (!vma || vma->vm_start > addr) 452 ret = PTR_ERR(vma);
346 goto out;
347 if (is_vm_hugetlb_page(vma)) {
348 ret = -EINVAL;
349 goto out;
350 }
351 /* We can't remap across vm area boundaries */
352 if (old_len > vma->vm_end - addr)
353 goto out;
354 if (vma->vm_flags & (VM_DONTEXPAND | VM_PFNMAP)) {
355 if (new_len > old_len)
356 goto out;
357 }
358 if (vma->vm_flags & VM_LOCKED) {
359 unsigned long locked, lock_limit;
360 locked = mm->locked_vm << PAGE_SHIFT;
361 lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur;
362 locked += new_len - old_len;
363 ret = -EAGAIN;
364 if (locked > lock_limit && !capable(CAP_IPC_LOCK))
365 goto out;
366 }
367 if (!may_expand_vm(mm, (new_len - old_len) >> PAGE_SHIFT)) {
368 ret = -ENOMEM;
369 goto out; 453 goto out;
370 } 454 }
371 455
372 if (vma->vm_flags & VM_ACCOUNT) {
373 charged = (new_len - old_len) >> PAGE_SHIFT;
374 if (security_vm_enough_memory(charged))
375 goto out_nc;
376 }
377
378 /* old_len exactly to the end of the area.. 456 /* old_len exactly to the end of the area..
379 * And we're not relocating the area.
380 */ 457 */
381 if (old_len == vma->vm_end - addr && 458 if (old_len == vma->vm_end - addr) {
382 !((flags & MREMAP_FIXED) && (addr != new_addr)) &&
383 (old_len != new_len || !(flags & MREMAP_MAYMOVE))) {
384 unsigned long max_addr = TASK_SIZE;
385 if (vma->vm_next)
386 max_addr = vma->vm_next->vm_start;
387 /* can we just expand the current mapping? */ 459 /* can we just expand the current mapping? */
388 if (max_addr - addr >= new_len) { 460 if (vma_expandable(vma, new_len - old_len)) {
389 int pages = (new_len - old_len) >> PAGE_SHIFT; 461 int pages = (new_len - old_len) >> PAGE_SHIFT;
390 462
391 vma_adjust(vma, vma->vm_start, 463 vma_adjust(vma, vma->vm_start,
@@ -409,28 +481,27 @@ unsigned long do_mremap(unsigned long addr,
409 */ 481 */
410 ret = -ENOMEM; 482 ret = -ENOMEM;
411 if (flags & MREMAP_MAYMOVE) { 483 if (flags & MREMAP_MAYMOVE) {
412 if (!(flags & MREMAP_FIXED)) { 484 unsigned long map_flags = 0;
413 unsigned long map_flags = 0; 485 if (vma->vm_flags & VM_MAYSHARE)
414 if (vma->vm_flags & VM_MAYSHARE) 486 map_flags |= MAP_SHARED;
415 map_flags |= MAP_SHARED; 487
416 488 new_addr = get_unmapped_area(vma->vm_file, 0, new_len,
417 new_addr = get_unmapped_area(vma->vm_file, 0, new_len, 489 vma->vm_pgoff +
418 vma->vm_pgoff, map_flags); 490 ((addr - vma->vm_start) >> PAGE_SHIFT),
419 if (new_addr & ~PAGE_MASK) { 491 map_flags);
420 ret = new_addr; 492 if (new_addr & ~PAGE_MASK) {
421 goto out; 493 ret = new_addr;
422 } 494 goto out;
423
424 ret = security_file_mmap(NULL, 0, 0, 0, new_addr, 1);
425 if (ret)
426 goto out;
427 } 495 }
496
497 ret = security_file_mmap(NULL, 0, 0, 0, new_addr, 1);
498 if (ret)
499 goto out;
428 ret = move_vma(vma, addr, old_len, new_len, new_addr); 500 ret = move_vma(vma, addr, old_len, new_len, new_addr);
429 } 501 }
430out: 502out:
431 if (ret & ~PAGE_MASK) 503 if (ret & ~PAGE_MASK)
432 vm_unacct_memory(charged); 504 vm_unacct_memory(charged);
433out_nc:
434 return ret; 505 return ret;
435} 506}
436 507
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 2c5d79236ead..0b19943ecf8b 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -821,7 +821,6 @@ int write_cache_pages(struct address_space *mapping,
821 struct writeback_control *wbc, writepage_t writepage, 821 struct writeback_control *wbc, writepage_t writepage,
822 void *data) 822 void *data)
823{ 823{
824 struct backing_dev_info *bdi = mapping->backing_dev_info;
825 int ret = 0; 824 int ret = 0;
826 int done = 0; 825 int done = 0;
827 struct pagevec pvec; 826 struct pagevec pvec;
@@ -834,11 +833,6 @@ int write_cache_pages(struct address_space *mapping,
834 int range_whole = 0; 833 int range_whole = 0;
835 long nr_to_write = wbc->nr_to_write; 834 long nr_to_write = wbc->nr_to_write;
836 835
837 if (wbc->nonblocking && bdi_write_congested(bdi)) {
838 wbc->encountered_congestion = 1;
839 return 0;
840 }
841
842 pagevec_init(&pvec, 0); 836 pagevec_init(&pvec, 0);
843 if (wbc->range_cyclic) { 837 if (wbc->range_cyclic) {
844 writeback_index = mapping->writeback_index; /* prev offset */ 838 writeback_index = mapping->writeback_index; /* prev offset */
@@ -957,12 +951,6 @@ continue_unlock:
957 break; 951 break;
958 } 952 }
959 } 953 }
960
961 if (wbc->nonblocking && bdi_write_congested(bdi)) {
962 wbc->encountered_congestion = 1;
963 done = 1;
964 break;
965 }
966 } 954 }
967 pagevec_release(&pvec); 955 pagevec_release(&pvec);
968 cond_resched(); 956 cond_resched();
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index cdcedf661616..2bc2ac63f41e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1769,7 +1769,7 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
1769 * See also cpuset_zone_allowed() comment in kernel/cpuset.c. 1769 * See also cpuset_zone_allowed() comment in kernel/cpuset.c.
1770 */ 1770 */
1771 alloc_flags &= ~ALLOC_CPUSET; 1771 alloc_flags &= ~ALLOC_CPUSET;
1772 } else if (unlikely(rt_task(p))) 1772 } else if (unlikely(rt_task(p)) && !in_interrupt())
1773 alloc_flags |= ALLOC_HARDER; 1773 alloc_flags |= ALLOC_HARDER;
1774 1774
1775 if (likely(!(gfp_mask & __GFP_NOMEMALLOC))) { 1775 if (likely(!(gfp_mask & __GFP_NOMEMALLOC))) {
@@ -1817,9 +1817,9 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
1817 if (NUMA_BUILD && (gfp_mask & GFP_THISNODE) == GFP_THISNODE) 1817 if (NUMA_BUILD && (gfp_mask & GFP_THISNODE) == GFP_THISNODE)
1818 goto nopage; 1818 goto nopage;
1819 1819
1820restart:
1820 wake_all_kswapd(order, zonelist, high_zoneidx); 1821 wake_all_kswapd(order, zonelist, high_zoneidx);
1821 1822
1822restart:
1823 /* 1823 /*
1824 * OK, we're below the kswapd watermark and have kicked background 1824 * OK, we're below the kswapd watermark and have kicked background
1825 * reclaim. Now things get more complex, so set up alloc_flags according 1825 * reclaim. Now things get more complex, so set up alloc_flags according
diff --git a/mm/percpu.c b/mm/percpu.c
index d90797160c2a..5adfc268b408 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -355,62 +355,86 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
355} 355}
356 356
357/** 357/**
358 * pcpu_extend_area_map - extend area map for allocation 358 * pcpu_need_to_extend - determine whether chunk area map needs to be extended
359 * @chunk: target chunk 359 * @chunk: chunk of interest
360 * 360 *
361 * Extend area map of @chunk so that it can accomodate an allocation. 361 * Determine whether area map of @chunk needs to be extended to
362 * A single allocation can split an area into three areas, so this 362 * accomodate a new allocation.
363 * function makes sure that @chunk->map has at least two extra slots.
364 * 363 *
365 * CONTEXT: 364 * CONTEXT:
366 * pcpu_alloc_mutex, pcpu_lock. pcpu_lock is released and reacquired 365 * pcpu_lock.
367 * if area map is extended.
368 * 366 *
369 * RETURNS: 367 * RETURNS:
370 * 0 if noop, 1 if successfully extended, -errno on failure. 368 * New target map allocation length if extension is necessary, 0
369 * otherwise.
371 */ 370 */
372static int pcpu_extend_area_map(struct pcpu_chunk *chunk, unsigned long *flags) 371static int pcpu_need_to_extend(struct pcpu_chunk *chunk)
373{ 372{
374 int new_alloc; 373 int new_alloc;
375 int *new;
376 size_t size;
377 374
378 /* has enough? */
379 if (chunk->map_alloc >= chunk->map_used + 2) 375 if (chunk->map_alloc >= chunk->map_used + 2)
380 return 0; 376 return 0;
381 377
382 spin_unlock_irqrestore(&pcpu_lock, *flags);
383
384 new_alloc = PCPU_DFL_MAP_ALLOC; 378 new_alloc = PCPU_DFL_MAP_ALLOC;
385 while (new_alloc < chunk->map_used + 2) 379 while (new_alloc < chunk->map_used + 2)
386 new_alloc *= 2; 380 new_alloc *= 2;
387 381
388 new = pcpu_mem_alloc(new_alloc * sizeof(new[0])); 382 return new_alloc;
389 if (!new) { 383}
390 spin_lock_irqsave(&pcpu_lock, *flags); 384
385/**
386 * pcpu_extend_area_map - extend area map of a chunk
387 * @chunk: chunk of interest
388 * @new_alloc: new target allocation length of the area map
389 *
390 * Extend area map of @chunk to have @new_alloc entries.
391 *
392 * CONTEXT:
393 * Does GFP_KERNEL allocation. Grabs and releases pcpu_lock.
394 *
395 * RETURNS:
396 * 0 on success, -errno on failure.
397 */
398static int pcpu_extend_area_map(struct pcpu_chunk *chunk, int new_alloc)
399{
400 int *old = NULL, *new = NULL;
401 size_t old_size = 0, new_size = new_alloc * sizeof(new[0]);
402 unsigned long flags;
403
404 new = pcpu_mem_alloc(new_size);
405 if (!new)
391 return -ENOMEM; 406 return -ENOMEM;
392 }
393 407
394 /* 408 /* acquire pcpu_lock and switch to new area map */
395 * Acquire pcpu_lock and switch to new area map. Only free 409 spin_lock_irqsave(&pcpu_lock, flags);
396 * could have happened inbetween, so map_used couldn't have 410
397 * grown. 411 if (new_alloc <= chunk->map_alloc)
398 */ 412 goto out_unlock;
399 spin_lock_irqsave(&pcpu_lock, *flags);
400 BUG_ON(new_alloc < chunk->map_used + 2);
401 413
402 size = chunk->map_alloc * sizeof(chunk->map[0]); 414 old_size = chunk->map_alloc * sizeof(chunk->map[0]);
403 memcpy(new, chunk->map, size); 415 memcpy(new, chunk->map, old_size);
404 416
405 /* 417 /*
406 * map_alloc < PCPU_DFL_MAP_ALLOC indicates that the chunk is 418 * map_alloc < PCPU_DFL_MAP_ALLOC indicates that the chunk is
407 * one of the first chunks and still using static map. 419 * one of the first chunks and still using static map.
408 */ 420 */
409 if (chunk->map_alloc >= PCPU_DFL_MAP_ALLOC) 421 if (chunk->map_alloc >= PCPU_DFL_MAP_ALLOC)
410 pcpu_mem_free(chunk->map, size); 422 old = chunk->map;
411 423
412 chunk->map_alloc = new_alloc; 424 chunk->map_alloc = new_alloc;
413 chunk->map = new; 425 chunk->map = new;
426 new = NULL;
427
428out_unlock:
429 spin_unlock_irqrestore(&pcpu_lock, flags);
430
431 /*
432 * pcpu_mem_free() might end up calling vfree() which uses
433 * IRQ-unsafe lock and thus can't be called under pcpu_lock.
434 */
435 pcpu_mem_free(old, old_size);
436 pcpu_mem_free(new, new_size);
437
414 return 0; 438 return 0;
415} 439}
416 440
@@ -1049,7 +1073,7 @@ static void *pcpu_alloc(size_t size, size_t align, bool reserved)
1049 static int warn_limit = 10; 1073 static int warn_limit = 10;
1050 struct pcpu_chunk *chunk; 1074 struct pcpu_chunk *chunk;
1051 const char *err; 1075 const char *err;
1052 int slot, off; 1076 int slot, off, new_alloc;
1053 unsigned long flags; 1077 unsigned long flags;
1054 1078
1055 if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) { 1079 if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE)) {
@@ -1064,14 +1088,25 @@ static void *pcpu_alloc(size_t size, size_t align, bool reserved)
1064 /* serve reserved allocations from the reserved chunk if available */ 1088 /* serve reserved allocations from the reserved chunk if available */
1065 if (reserved && pcpu_reserved_chunk) { 1089 if (reserved && pcpu_reserved_chunk) {
1066 chunk = pcpu_reserved_chunk; 1090 chunk = pcpu_reserved_chunk;
1067 if (size > chunk->contig_hint || 1091
1068 pcpu_extend_area_map(chunk, &flags) < 0) { 1092 if (size > chunk->contig_hint) {
1069 err = "failed to extend area map of reserved chunk"; 1093 err = "alloc from reserved chunk failed";
1070 goto fail_unlock; 1094 goto fail_unlock;
1071 } 1095 }
1096
1097 while ((new_alloc = pcpu_need_to_extend(chunk))) {
1098 spin_unlock_irqrestore(&pcpu_lock, flags);
1099 if (pcpu_extend_area_map(chunk, new_alloc) < 0) {
1100 err = "failed to extend area map of reserved chunk";
1101 goto fail_unlock_mutex;
1102 }
1103 spin_lock_irqsave(&pcpu_lock, flags);
1104 }
1105
1072 off = pcpu_alloc_area(chunk, size, align); 1106 off = pcpu_alloc_area(chunk, size, align);
1073 if (off >= 0) 1107 if (off >= 0)
1074 goto area_found; 1108 goto area_found;
1109
1075 err = "alloc from reserved chunk failed"; 1110 err = "alloc from reserved chunk failed";
1076 goto fail_unlock; 1111 goto fail_unlock;
1077 } 1112 }
@@ -1083,14 +1118,20 @@ restart:
1083 if (size > chunk->contig_hint) 1118 if (size > chunk->contig_hint)
1084 continue; 1119 continue;
1085 1120
1086 switch (pcpu_extend_area_map(chunk, &flags)) { 1121 new_alloc = pcpu_need_to_extend(chunk);
1087 case 0: 1122 if (new_alloc) {
1088 break; 1123 spin_unlock_irqrestore(&pcpu_lock, flags);
1089 case 1: 1124 if (pcpu_extend_area_map(chunk,
1090 goto restart; /* pcpu_lock dropped, restart */ 1125 new_alloc) < 0) {
1091 default: 1126 err = "failed to extend area map";
1092 err = "failed to extend area map"; 1127 goto fail_unlock_mutex;
1093 goto fail_unlock; 1128 }
1129 spin_lock_irqsave(&pcpu_lock, flags);
1130 /*
1131 * pcpu_lock has been dropped, need to
1132 * restart cpu_slot list walking.
1133 */
1134 goto restart;
1094 } 1135 }
1095 1136
1096 off = pcpu_alloc_area(chunk, size, align); 1137 off = pcpu_alloc_area(chunk, size, align);
diff --git a/mm/slab.c b/mm/slab.c
index 7dfa481c96ba..a6c9166996a9 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -604,6 +604,26 @@ static struct kmem_cache cache_cache = {
604 604
605#define BAD_ALIEN_MAGIC 0x01020304ul 605#define BAD_ALIEN_MAGIC 0x01020304ul
606 606
607/*
608 * chicken and egg problem: delay the per-cpu array allocation
609 * until the general caches are up.
610 */
611static enum {
612 NONE,
613 PARTIAL_AC,
614 PARTIAL_L3,
615 EARLY,
616 FULL
617} g_cpucache_up;
618
619/*
620 * used by boot code to determine if it can use slab based allocator
621 */
622int slab_is_available(void)
623{
624 return g_cpucache_up >= EARLY;
625}
626
607#ifdef CONFIG_LOCKDEP 627#ifdef CONFIG_LOCKDEP
608 628
609/* 629/*
@@ -620,40 +640,52 @@ static struct kmem_cache cache_cache = {
620static struct lock_class_key on_slab_l3_key; 640static struct lock_class_key on_slab_l3_key;
621static struct lock_class_key on_slab_alc_key; 641static struct lock_class_key on_slab_alc_key;
622 642
623static inline void init_lock_keys(void) 643static void init_node_lock_keys(int q)
624
625{ 644{
626 int q;
627 struct cache_sizes *s = malloc_sizes; 645 struct cache_sizes *s = malloc_sizes;
628 646
629 while (s->cs_size != ULONG_MAX) { 647 if (g_cpucache_up != FULL)
630 for_each_node(q) { 648 return;
631 struct array_cache **alc; 649
632 int r; 650 for (s = malloc_sizes; s->cs_size != ULONG_MAX; s++) {
633 struct kmem_list3 *l3 = s->cs_cachep->nodelists[q]; 651 struct array_cache **alc;
634 if (!l3 || OFF_SLAB(s->cs_cachep)) 652 struct kmem_list3 *l3;
635 continue; 653 int r;
636 lockdep_set_class(&l3->list_lock, &on_slab_l3_key); 654
637 alc = l3->alien; 655 l3 = s->cs_cachep->nodelists[q];
638 /* 656 if (!l3 || OFF_SLAB(s->cs_cachep))
639 * FIXME: This check for BAD_ALIEN_MAGIC 657 return;
640 * should go away when common slab code is taught to 658 lockdep_set_class(&l3->list_lock, &on_slab_l3_key);
641 * work even without alien caches. 659 alc = l3->alien;
642 * Currently, non NUMA code returns BAD_ALIEN_MAGIC 660 /*
643 * for alloc_alien_cache, 661 * FIXME: This check for BAD_ALIEN_MAGIC
644 */ 662 * should go away when common slab code is taught to
645 if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC) 663 * work even without alien caches.
646 continue; 664 * Currently, non NUMA code returns BAD_ALIEN_MAGIC
647 for_each_node(r) { 665 * for alloc_alien_cache,
648 if (alc[r]) 666 */
649 lockdep_set_class(&alc[r]->lock, 667 if (!alc || (unsigned long)alc == BAD_ALIEN_MAGIC)
650 &on_slab_alc_key); 668 return;
651 } 669 for_each_node(r) {
670 if (alc[r])
671 lockdep_set_class(&alc[r]->lock,
672 &on_slab_alc_key);
652 } 673 }
653 s++;
654 } 674 }
655} 675}
676
677static inline void init_lock_keys(void)
678{
679 int node;
680
681 for_each_node(node)
682 init_node_lock_keys(node);
683}
656#else 684#else
685static void init_node_lock_keys(int q)
686{
687}
688
657static inline void init_lock_keys(void) 689static inline void init_lock_keys(void)
658{ 690{
659} 691}
@@ -665,26 +697,6 @@ static inline void init_lock_keys(void)
665static DEFINE_MUTEX(cache_chain_mutex); 697static DEFINE_MUTEX(cache_chain_mutex);
666static struct list_head cache_chain; 698static struct list_head cache_chain;
667 699
668/*
669 * chicken and egg problem: delay the per-cpu array allocation
670 * until the general caches are up.
671 */
672static enum {
673 NONE,
674 PARTIAL_AC,
675 PARTIAL_L3,
676 EARLY,
677 FULL
678} g_cpucache_up;
679
680/*
681 * used by boot code to determine if it can use slab based allocator
682 */
683int slab_is_available(void)
684{
685 return g_cpucache_up >= EARLY;
686}
687
688static DEFINE_PER_CPU(struct delayed_work, reap_work); 700static DEFINE_PER_CPU(struct delayed_work, reap_work);
689 701
690static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep) 702static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
@@ -1254,6 +1266,8 @@ static int __cpuinit cpuup_prepare(long cpu)
1254 kfree(shared); 1266 kfree(shared);
1255 free_alien_cache(alien); 1267 free_alien_cache(alien);
1256 } 1268 }
1269 init_node_lock_keys(node);
1270
1257 return 0; 1271 return 0;
1258bad: 1272bad:
1259 cpuup_canceled(cpu); 1273 cpuup_canceled(cpu);
@@ -3103,13 +3117,19 @@ static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
3103 } else { 3117 } else {
3104 STATS_INC_ALLOCMISS(cachep); 3118 STATS_INC_ALLOCMISS(cachep);
3105 objp = cache_alloc_refill(cachep, flags); 3119 objp = cache_alloc_refill(cachep, flags);
3120 /*
3121 * the 'ac' may be updated by cache_alloc_refill(),
3122 * and kmemleak_erase() requires its correct value.
3123 */
3124 ac = cpu_cache_get(cachep);
3106 } 3125 }
3107 /* 3126 /*
3108 * To avoid a false negative, if an object that is in one of the 3127 * To avoid a false negative, if an object that is in one of the
3109 * per-CPU caches is leaked, we need to make sure kmemleak doesn't 3128 * per-CPU caches is leaked, we need to make sure kmemleak doesn't
3110 * treat the array pointers as a reference to the object. 3129 * treat the array pointers as a reference to the object.
3111 */ 3130 */
3112 kmemleak_erase(&ac->entry[ac->avail]); 3131 if (objp)
3132 kmemleak_erase(&ac->entry[ac->avail]);
3113 return objp; 3133 return objp;
3114} 3134}
3115 3135
@@ -3306,7 +3326,7 @@ __cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
3306 cache_alloc_debugcheck_before(cachep, flags); 3326 cache_alloc_debugcheck_before(cachep, flags);
3307 local_irq_save(save_flags); 3327 local_irq_save(save_flags);
3308 3328
3309 if (unlikely(nodeid == -1)) 3329 if (nodeid == -1)
3310 nodeid = numa_node_id(); 3330 nodeid = numa_node_id();
3311 3331
3312 if (unlikely(!cachep->nodelists[nodeid])) { 3332 if (unlikely(!cachep->nodelists[nodeid])) {
diff --git a/mm/slub.c b/mm/slub.c
index 0956396faed1..da0ce55965dc 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -4371,12 +4371,28 @@ static int show_stat(struct kmem_cache *s, char *buf, enum stat_item si)
4371 return len + sprintf(buf + len, "\n"); 4371 return len + sprintf(buf + len, "\n");
4372} 4372}
4373 4373
4374static void clear_stat(struct kmem_cache *s, enum stat_item si)
4375{
4376 int cpu;
4377
4378 for_each_online_cpu(cpu)
4379 get_cpu_slab(s, cpu)->stat[si] = 0;
4380}
4381
4374#define STAT_ATTR(si, text) \ 4382#define STAT_ATTR(si, text) \
4375static ssize_t text##_show(struct kmem_cache *s, char *buf) \ 4383static ssize_t text##_show(struct kmem_cache *s, char *buf) \
4376{ \ 4384{ \
4377 return show_stat(s, buf, si); \ 4385 return show_stat(s, buf, si); \
4378} \ 4386} \
4379SLAB_ATTR_RO(text); \ 4387static ssize_t text##_store(struct kmem_cache *s, \
4388 const char *buf, size_t length) \
4389{ \
4390 if (buf[0] != '0') \
4391 return -EINVAL; \
4392 clear_stat(s, si); \
4393 return length; \
4394} \
4395SLAB_ATTR(text); \
4380 4396
4381STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath); 4397STAT_ATTR(ALLOC_FASTPATH, alloc_fastpath);
4382STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath); 4398STAT_ATTR(ALLOC_SLOWPATH, alloc_slowpath);
diff --git a/mm/truncate.c b/mm/truncate.c
index 450cebdabfc0..2c147a7e5f2c 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -490,7 +490,7 @@ EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range);
490 * Any pages which are found to be mapped into pagetables are unmapped prior to 490 * Any pages which are found to be mapped into pagetables are unmapped prior to
491 * invalidation. 491 * invalidation.
492 * 492 *
493 * Returns -EIO if any pages could not be invalidated. 493 * Returns -EBUSY if any pages could not be invalidated.
494 */ 494 */
495int invalidate_inode_pages2(struct address_space *mapping) 495int invalidate_inode_pages2(struct address_space *mapping)
496{ 496{
diff --git a/mm/util.c b/mm/util.c
index 7c35ad95f927..b377ce430803 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -4,6 +4,10 @@
4#include <linux/module.h> 4#include <linux/module.h>
5#include <linux/err.h> 5#include <linux/err.h>
6#include <linux/sched.h> 6#include <linux/sched.h>
7#include <linux/hugetlb.h>
8#include <linux/syscalls.h>
9#include <linux/mman.h>
10#include <linux/file.h>
7#include <asm/uaccess.h> 11#include <asm/uaccess.h>
8 12
9#define CREATE_TRACE_POINTS 13#define CREATE_TRACE_POINTS
@@ -268,6 +272,46 @@ int __attribute__((weak)) get_user_pages_fast(unsigned long start,
268} 272}
269EXPORT_SYMBOL_GPL(get_user_pages_fast); 273EXPORT_SYMBOL_GPL(get_user_pages_fast);
270 274
275SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
276 unsigned long, prot, unsigned long, flags,
277 unsigned long, fd, unsigned long, pgoff)
278{
279 struct file * file = NULL;
280 unsigned long retval = -EBADF;
281
282 if (!(flags & MAP_ANONYMOUS)) {
283 if (unlikely(flags & MAP_HUGETLB))
284 return -EINVAL;
285 file = fget(fd);
286 if (!file)
287 goto out;
288 } else if (flags & MAP_HUGETLB) {
289 struct user_struct *user = NULL;
290 /*
291 * VM_NORESERVE is used because the reservations will be
292 * taken when vm_ops->mmap() is called
293 * A dummy user value is used because we are not locking
294 * memory so no accounting is necessary
295 */
296 len = ALIGN(len, huge_page_size(&default_hstate));
297 file = hugetlb_file_setup(HUGETLB_ANON_FILE, len, VM_NORESERVE,
298 &user, HUGETLB_ANONHUGE_INODE);
299 if (IS_ERR(file))
300 return PTR_ERR(file);
301 }
302
303 flags &= ~(MAP_EXECUTABLE | MAP_DENYWRITE);
304
305 down_write(&current->mm->mmap_sem);
306 retval = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
307 up_write(&current->mm->mmap_sem);
308
309 if (file)
310 fput(file);
311out:
312 return retval;
313}
314
271/* Tracepoints definitions. */ 315/* Tracepoints definitions. */
272EXPORT_TRACEPOINT_SYMBOL(kmalloc); 316EXPORT_TRACEPOINT_SYMBOL(kmalloc);
273EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc); 317EXPORT_TRACEPOINT_SYMBOL(kmem_cache_alloc);