aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorBenjamin Herrenschmidt <benh@kernel.crashing.org>2010-05-06 21:29:25 -0400
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2010-05-06 21:29:25 -0400
commit1ed31d6db90d51010545921e59d369d2f92b7ac2 (patch)
tree358a0b346bc8135cd5e53700eb44308b1a7c8c5b /mm
parentceba1abcb00b0ef0b1efcd715285f6e05523edef (diff)
parent722154e4cacf015161efe60009ae9be23d492296 (diff)
Merge commit 'origin/master' into next
Diffstat (limited to 'mm')
-rw-r--r--mm/backing-dev.c37
-rw-r--r--mm/bootmem.c17
-rw-r--r--mm/hugetlb.c5
-rw-r--r--mm/ksm.c12
-rw-r--r--mm/memcontrol.c22
-rw-r--r--mm/memory.c3
-rw-r--r--mm/mmap.c113
-rw-r--r--mm/pagewalk.c47
-rw-r--r--mm/readahead.c2
-rw-r--r--mm/rmap.c33
-rw-r--r--mm/slab.c13
-rw-r--r--mm/slub.c5
-rw-r--r--mm/util.c21
-rw-r--r--mm/vmscan.c23
14 files changed, 244 insertions, 109 deletions
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 0e8ca0347707..707d0dc6da0f 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -11,6 +11,8 @@
11#include <linux/writeback.h> 11#include <linux/writeback.h>
12#include <linux/device.h> 12#include <linux/device.h>
13 13
14static atomic_long_t bdi_seq = ATOMIC_LONG_INIT(0);
15
14void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page) 16void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page)
15{ 17{
16} 18}
@@ -25,6 +27,11 @@ struct backing_dev_info default_backing_dev_info = {
25}; 27};
26EXPORT_SYMBOL_GPL(default_backing_dev_info); 28EXPORT_SYMBOL_GPL(default_backing_dev_info);
27 29
30struct backing_dev_info noop_backing_dev_info = {
31 .name = "noop",
32};
33EXPORT_SYMBOL_GPL(noop_backing_dev_info);
34
28static struct class *bdi_class; 35static struct class *bdi_class;
29 36
30/* 37/*
@@ -227,6 +234,9 @@ static struct device_attribute bdi_dev_attrs[] = {
227static __init int bdi_class_init(void) 234static __init int bdi_class_init(void)
228{ 235{
229 bdi_class = class_create(THIS_MODULE, "bdi"); 236 bdi_class = class_create(THIS_MODULE, "bdi");
237 if (IS_ERR(bdi_class))
238 return PTR_ERR(bdi_class);
239
230 bdi_class->dev_attrs = bdi_dev_attrs; 240 bdi_class->dev_attrs = bdi_dev_attrs;
231 bdi_debug_init(); 241 bdi_debug_init();
232 return 0; 242 return 0;
@@ -712,6 +722,33 @@ void bdi_destroy(struct backing_dev_info *bdi)
712} 722}
713EXPORT_SYMBOL(bdi_destroy); 723EXPORT_SYMBOL(bdi_destroy);
714 724
725/*
726 * For use from filesystems to quickly init and register a bdi associated
727 * with dirty writeback
728 */
729int bdi_setup_and_register(struct backing_dev_info *bdi, char *name,
730 unsigned int cap)
731{
732 char tmp[32];
733 int err;
734
735 bdi->name = name;
736 bdi->capabilities = cap;
737 err = bdi_init(bdi);
738 if (err)
739 return err;
740
741 sprintf(tmp, "%.28s%s", name, "-%d");
742 err = bdi_register(bdi, NULL, tmp, atomic_long_inc_return(&bdi_seq));
743 if (err) {
744 bdi_destroy(bdi);
745 return err;
746 }
747
748 return 0;
749}
750EXPORT_SYMBOL(bdi_setup_and_register);
751
715static wait_queue_head_t congestion_wqh[2] = { 752static wait_queue_head_t congestion_wqh[2] = {
716 __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]), 753 __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]),
717 __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1]) 754 __WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1])
diff --git a/mm/bootmem.c b/mm/bootmem.c
index eff224220571..58c66cc5056a 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -304,9 +304,22 @@ unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
304unsigned long __init free_all_bootmem(void) 304unsigned long __init free_all_bootmem(void)
305{ 305{
306#ifdef CONFIG_NO_BOOTMEM 306#ifdef CONFIG_NO_BOOTMEM
307 return free_all_memory_core_early(NODE_DATA(0)->node_id); 307 /*
308 * We need to use MAX_NUMNODES instead of NODE_DATA(0)->node_id
309 * because in some case like Node0 doesnt have RAM installed
310 * low ram will be on Node1
311 * Use MAX_NUMNODES will make sure all ranges in early_node_map[]
312 * will be used instead of only Node0 related
313 */
314 return free_all_memory_core_early(MAX_NUMNODES);
308#else 315#else
309 return free_all_bootmem_core(NODE_DATA(0)->bdata); 316 unsigned long total_pages = 0;
317 bootmem_data_t *bdata;
318
319 list_for_each_entry(bdata, &bdata_list, list)
320 total_pages += free_all_bootmem_core(bdata);
321
322 return total_pages;
310#endif 323#endif
311} 324}
312 325
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 6034dc9e9796..ffbdfc86aedf 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -546,6 +546,7 @@ static void free_huge_page(struct page *page)
546 546
547 mapping = (struct address_space *) page_private(page); 547 mapping = (struct address_space *) page_private(page);
548 set_page_private(page, 0); 548 set_page_private(page, 0);
549 page->mapping = NULL;
549 BUG_ON(page_count(page)); 550 BUG_ON(page_count(page));
550 INIT_LIST_HEAD(&page->lru); 551 INIT_LIST_HEAD(&page->lru);
551 552
@@ -2447,8 +2448,10 @@ retry:
2447 spin_lock(&inode->i_lock); 2448 spin_lock(&inode->i_lock);
2448 inode->i_blocks += blocks_per_huge_page(h); 2449 inode->i_blocks += blocks_per_huge_page(h);
2449 spin_unlock(&inode->i_lock); 2450 spin_unlock(&inode->i_lock);
2450 } else 2451 } else {
2451 lock_page(page); 2452 lock_page(page);
2453 page->mapping = HUGETLB_POISON;
2454 }
2452 } 2455 }
2453 2456
2454 /* 2457 /*
diff --git a/mm/ksm.c b/mm/ksm.c
index 8cdfc2a1e8bf..956880f2ff49 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -365,7 +365,7 @@ static int break_ksm(struct vm_area_struct *vma, unsigned long addr)
365 do { 365 do {
366 cond_resched(); 366 cond_resched();
367 page = follow_page(vma, addr, FOLL_GET); 367 page = follow_page(vma, addr, FOLL_GET);
368 if (!page) 368 if (IS_ERR_OR_NULL(page))
369 break; 369 break;
370 if (PageKsm(page)) 370 if (PageKsm(page))
371 ret = handle_mm_fault(vma->vm_mm, vma, addr, 371 ret = handle_mm_fault(vma->vm_mm, vma, addr,
@@ -447,7 +447,7 @@ static struct page *get_mergeable_page(struct rmap_item *rmap_item)
447 goto out; 447 goto out;
448 448
449 page = follow_page(vma, addr, FOLL_GET); 449 page = follow_page(vma, addr, FOLL_GET);
450 if (!page) 450 if (IS_ERR_OR_NULL(page))
451 goto out; 451 goto out;
452 if (PageAnon(page)) { 452 if (PageAnon(page)) {
453 flush_anon_page(vma, page, addr); 453 flush_anon_page(vma, page, addr);
@@ -1086,7 +1086,7 @@ struct rmap_item *unstable_tree_search_insert(struct rmap_item *rmap_item,
1086 cond_resched(); 1086 cond_resched();
1087 tree_rmap_item = rb_entry(*new, struct rmap_item, node); 1087 tree_rmap_item = rb_entry(*new, struct rmap_item, node);
1088 tree_page = get_mergeable_page(tree_rmap_item); 1088 tree_page = get_mergeable_page(tree_rmap_item);
1089 if (!tree_page) 1089 if (IS_ERR_OR_NULL(tree_page))
1090 return NULL; 1090 return NULL;
1091 1091
1092 /* 1092 /*
@@ -1294,7 +1294,7 @@ next_mm:
1294 if (ksm_test_exit(mm)) 1294 if (ksm_test_exit(mm))
1295 break; 1295 break;
1296 *page = follow_page(vma, ksm_scan.address, FOLL_GET); 1296 *page = follow_page(vma, ksm_scan.address, FOLL_GET);
1297 if (*page && PageAnon(*page)) { 1297 if (!IS_ERR_OR_NULL(*page) && PageAnon(*page)) {
1298 flush_anon_page(vma, *page, ksm_scan.address); 1298 flush_anon_page(vma, *page, ksm_scan.address);
1299 flush_dcache_page(*page); 1299 flush_dcache_page(*page);
1300 rmap_item = get_next_rmap_item(slot, 1300 rmap_item = get_next_rmap_item(slot,
@@ -1308,7 +1308,7 @@ next_mm:
1308 up_read(&mm->mmap_sem); 1308 up_read(&mm->mmap_sem);
1309 return rmap_item; 1309 return rmap_item;
1310 } 1310 }
1311 if (*page) 1311 if (!IS_ERR_OR_NULL(*page))
1312 put_page(*page); 1312 put_page(*page);
1313 ksm_scan.address += PAGE_SIZE; 1313 ksm_scan.address += PAGE_SIZE;
1314 cond_resched(); 1314 cond_resched();
@@ -1367,7 +1367,7 @@ next_mm:
1367static void ksm_do_scan(unsigned int scan_npages) 1367static void ksm_do_scan(unsigned int scan_npages)
1368{ 1368{
1369 struct rmap_item *rmap_item; 1369 struct rmap_item *rmap_item;
1370 struct page *page; 1370 struct page *uninitialized_var(page);
1371 1371
1372 while (scan_npages--) { 1372 while (scan_npages--) {
1373 cond_resched(); 1373 cond_resched();
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9ed760dc7448..6c755de385f7 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1359,16 +1359,19 @@ void mem_cgroup_update_file_mapped(struct page *page, int val)
1359 1359
1360 lock_page_cgroup(pc); 1360 lock_page_cgroup(pc);
1361 mem = pc->mem_cgroup; 1361 mem = pc->mem_cgroup;
1362 if (!mem) 1362 if (!mem || !PageCgroupUsed(pc))
1363 goto done;
1364
1365 if (!PageCgroupUsed(pc))
1366 goto done; 1363 goto done;
1367 1364
1368 /* 1365 /*
1369 * Preemption is already disabled. We can use __this_cpu_xxx 1366 * Preemption is already disabled. We can use __this_cpu_xxx
1370 */ 1367 */
1371 __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_FILE_MAPPED], val); 1368 if (val > 0) {
1369 __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);
1370 SetPageCgroupFileMapped(pc);
1371 } else {
1372 __this_cpu_dec(mem->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);
1373 ClearPageCgroupFileMapped(pc);
1374 }
1372 1375
1373done: 1376done:
1374 unlock_page_cgroup(pc); 1377 unlock_page_cgroup(pc);
@@ -1801,16 +1804,13 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
1801static void __mem_cgroup_move_account(struct page_cgroup *pc, 1804static void __mem_cgroup_move_account(struct page_cgroup *pc,
1802 struct mem_cgroup *from, struct mem_cgroup *to, bool uncharge) 1805 struct mem_cgroup *from, struct mem_cgroup *to, bool uncharge)
1803{ 1806{
1804 struct page *page;
1805
1806 VM_BUG_ON(from == to); 1807 VM_BUG_ON(from == to);
1807 VM_BUG_ON(PageLRU(pc->page)); 1808 VM_BUG_ON(PageLRU(pc->page));
1808 VM_BUG_ON(!PageCgroupLocked(pc)); 1809 VM_BUG_ON(!PageCgroupLocked(pc));
1809 VM_BUG_ON(!PageCgroupUsed(pc)); 1810 VM_BUG_ON(!PageCgroupUsed(pc));
1810 VM_BUG_ON(pc->mem_cgroup != from); 1811 VM_BUG_ON(pc->mem_cgroup != from);
1811 1812
1812 page = pc->page; 1813 if (PageCgroupFileMapped(pc)) {
1813 if (page_mapped(page) && !PageAnon(page)) {
1814 /* Update mapped_file data for mem_cgroup */ 1814 /* Update mapped_file data for mem_cgroup */
1815 preempt_disable(); 1815 preempt_disable();
1816 __this_cpu_dec(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]); 1816 __this_cpu_dec(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);
@@ -2429,11 +2429,11 @@ int mem_cgroup_prepare_migration(struct page *page, struct mem_cgroup **ptr)
2429 } 2429 }
2430 unlock_page_cgroup(pc); 2430 unlock_page_cgroup(pc);
2431 2431
2432 *ptr = mem;
2432 if (mem) { 2433 if (mem) {
2433 ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem, false); 2434 ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, ptr, false);
2434 css_put(&mem->css); 2435 css_put(&mem->css);
2435 } 2436 }
2436 *ptr = mem;
2437 return ret; 2437 return ret;
2438} 2438}
2439 2439
diff --git a/mm/memory.c b/mm/memory.c
index 1d2ea39260e5..833952d8b74d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -125,13 +125,12 @@ core_initcall(init_zero_pfn);
125 125
126#if defined(SPLIT_RSS_COUNTING) 126#if defined(SPLIT_RSS_COUNTING)
127 127
128void __sync_task_rss_stat(struct task_struct *task, struct mm_struct *mm) 128static void __sync_task_rss_stat(struct task_struct *task, struct mm_struct *mm)
129{ 129{
130 int i; 130 int i;
131 131
132 for (i = 0; i < NR_MM_COUNTERS; i++) { 132 for (i = 0; i < NR_MM_COUNTERS; i++) {
133 if (task->rss_stat.count[i]) { 133 if (task->rss_stat.count[i]) {
134 BUG_ON(!mm);
135 add_mm_counter(mm, i, task->rss_stat.count[i]); 134 add_mm_counter(mm, i, task->rss_stat.count[i]);
136 task->rss_stat.count[i] = 0; 135 task->rss_stat.count[i] = 0;
137 } 136 }
diff --git a/mm/mmap.c b/mm/mmap.c
index 75557c639ad4..456ec6f27889 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -507,11 +507,12 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start,
507 struct address_space *mapping = NULL; 507 struct address_space *mapping = NULL;
508 struct prio_tree_root *root = NULL; 508 struct prio_tree_root *root = NULL;
509 struct file *file = vma->vm_file; 509 struct file *file = vma->vm_file;
510 struct anon_vma *anon_vma = NULL;
511 long adjust_next = 0; 510 long adjust_next = 0;
512 int remove_next = 0; 511 int remove_next = 0;
513 512
514 if (next && !insert) { 513 if (next && !insert) {
514 struct vm_area_struct *exporter = NULL;
515
515 if (end >= next->vm_end) { 516 if (end >= next->vm_end) {
516 /* 517 /*
517 * vma expands, overlapping all the next, and 518 * vma expands, overlapping all the next, and
@@ -519,7 +520,7 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start,
519 */ 520 */
520again: remove_next = 1 + (end > next->vm_end); 521again: remove_next = 1 + (end > next->vm_end);
521 end = next->vm_end; 522 end = next->vm_end;
522 anon_vma = next->anon_vma; 523 exporter = next;
523 importer = vma; 524 importer = vma;
524 } else if (end > next->vm_start) { 525 } else if (end > next->vm_start) {
525 /* 526 /*
@@ -527,7 +528,7 @@ again: remove_next = 1 + (end > next->vm_end);
527 * mprotect case 5 shifting the boundary up. 528 * mprotect case 5 shifting the boundary up.
528 */ 529 */
529 adjust_next = (end - next->vm_start) >> PAGE_SHIFT; 530 adjust_next = (end - next->vm_start) >> PAGE_SHIFT;
530 anon_vma = next->anon_vma; 531 exporter = next;
531 importer = vma; 532 importer = vma;
532 } else if (end < vma->vm_end) { 533 } else if (end < vma->vm_end) {
533 /* 534 /*
@@ -536,28 +537,19 @@ again: remove_next = 1 + (end > next->vm_end);
536 * mprotect case 4 shifting the boundary down. 537 * mprotect case 4 shifting the boundary down.
537 */ 538 */
538 adjust_next = - ((vma->vm_end - end) >> PAGE_SHIFT); 539 adjust_next = - ((vma->vm_end - end) >> PAGE_SHIFT);
539 anon_vma = next->anon_vma; 540 exporter = vma;
540 importer = next; 541 importer = next;
541 } 542 }
542 }
543 543
544 /*
545 * When changing only vma->vm_end, we don't really need anon_vma lock.
546 */
547 if (vma->anon_vma && (insert || importer || start != vma->vm_start))
548 anon_vma = vma->anon_vma;
549 if (anon_vma) {
550 /* 544 /*
551 * Easily overlooked: when mprotect shifts the boundary, 545 * Easily overlooked: when mprotect shifts the boundary,
552 * make sure the expanding vma has anon_vma set if the 546 * make sure the expanding vma has anon_vma set if the
553 * shrinking vma had, to cover any anon pages imported. 547 * shrinking vma had, to cover any anon pages imported.
554 */ 548 */
555 if (importer && !importer->anon_vma) { 549 if (exporter && exporter->anon_vma && !importer->anon_vma) {
556 /* Block reverse map lookups until things are set up. */ 550 if (anon_vma_clone(importer, exporter))
557 if (anon_vma_clone(importer, vma)) {
558 return -ENOMEM; 551 return -ENOMEM;
559 } 552 importer->anon_vma = exporter->anon_vma;
560 importer->anon_vma = anon_vma;
561 } 553 }
562 } 554 }
563 555
@@ -825,6 +817,61 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
825} 817}
826 818
827/* 819/*
820 * Rough compatbility check to quickly see if it's even worth looking
821 * at sharing an anon_vma.
822 *
823 * They need to have the same vm_file, and the flags can only differ
824 * in things that mprotect may change.
825 *
826 * NOTE! The fact that we share an anon_vma doesn't _have_ to mean that
827 * we can merge the two vma's. For example, we refuse to merge a vma if
828 * there is a vm_ops->close() function, because that indicates that the
829 * driver is doing some kind of reference counting. But that doesn't
830 * really matter for the anon_vma sharing case.
831 */
832static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *b)
833{
834 return a->vm_end == b->vm_start &&
835 mpol_equal(vma_policy(a), vma_policy(b)) &&
836 a->vm_file == b->vm_file &&
837 !((a->vm_flags ^ b->vm_flags) & ~(VM_READ|VM_WRITE|VM_EXEC)) &&
838 b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT);
839}
840
841/*
842 * Do some basic sanity checking to see if we can re-use the anon_vma
843 * from 'old'. The 'a'/'b' vma's are in VM order - one of them will be
844 * the same as 'old', the other will be the new one that is trying
845 * to share the anon_vma.
846 *
847 * NOTE! This runs with mm_sem held for reading, so it is possible that
848 * the anon_vma of 'old' is concurrently in the process of being set up
849 * by another page fault trying to merge _that_. But that's ok: if it
850 * is being set up, that automatically means that it will be a singleton
851 * acceptable for merging, so we can do all of this optimistically. But
852 * we do that ACCESS_ONCE() to make sure that we never re-load the pointer.
853 *
854 * IOW: that the "list_is_singular()" test on the anon_vma_chain only
855 * matters for the 'stable anon_vma' case (ie the thing we want to avoid
856 * is to return an anon_vma that is "complex" due to having gone through
857 * a fork).
858 *
859 * We also make sure that the two vma's are compatible (adjacent,
860 * and with the same memory policies). That's all stable, even with just
861 * a read lock on the mm_sem.
862 */
863static struct anon_vma *reusable_anon_vma(struct vm_area_struct *old, struct vm_area_struct *a, struct vm_area_struct *b)
864{
865 if (anon_vma_compatible(a, b)) {
866 struct anon_vma *anon_vma = ACCESS_ONCE(old->anon_vma);
867
868 if (anon_vma && list_is_singular(&old->anon_vma_chain))
869 return anon_vma;
870 }
871 return NULL;
872}
873
874/*
828 * find_mergeable_anon_vma is used by anon_vma_prepare, to check 875 * find_mergeable_anon_vma is used by anon_vma_prepare, to check
829 * neighbouring vmas for a suitable anon_vma, before it goes off 876 * neighbouring vmas for a suitable anon_vma, before it goes off
830 * to allocate a new anon_vma. It checks because a repetitive 877 * to allocate a new anon_vma. It checks because a repetitive
@@ -834,28 +881,16 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
834 */ 881 */
835struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma) 882struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
836{ 883{
884 struct anon_vma *anon_vma;
837 struct vm_area_struct *near; 885 struct vm_area_struct *near;
838 unsigned long vm_flags;
839 886
840 near = vma->vm_next; 887 near = vma->vm_next;
841 if (!near) 888 if (!near)
842 goto try_prev; 889 goto try_prev;
843 890
844 /* 891 anon_vma = reusable_anon_vma(near, vma, near);
845 * Since only mprotect tries to remerge vmas, match flags 892 if (anon_vma)
846 * which might be mprotected into each other later on. 893 return anon_vma;
847 * Neither mlock nor madvise tries to remerge at present,
848 * so leave their flags as obstructing a merge.
849 */
850 vm_flags = vma->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC);
851 vm_flags |= near->vm_flags & (VM_READ|VM_WRITE|VM_EXEC);
852
853 if (near->anon_vma && vma->vm_end == near->vm_start &&
854 mpol_equal(vma_policy(vma), vma_policy(near)) &&
855 can_vma_merge_before(near, vm_flags,
856 NULL, vma->vm_file, vma->vm_pgoff +
857 ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT)))
858 return near->anon_vma;
859try_prev: 894try_prev:
860 /* 895 /*
861 * It is potentially slow to have to call find_vma_prev here. 896 * It is potentially slow to have to call find_vma_prev here.
@@ -868,14 +903,9 @@ try_prev:
868 if (!near) 903 if (!near)
869 goto none; 904 goto none;
870 905
871 vm_flags = vma->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC); 906 anon_vma = reusable_anon_vma(near, near, vma);
872 vm_flags |= near->vm_flags & (VM_READ|VM_WRITE|VM_EXEC); 907 if (anon_vma)
873 908 return anon_vma;
874 if (near->anon_vma && near->vm_end == vma->vm_start &&
875 mpol_equal(vma_policy(near), vma_policy(vma)) &&
876 can_vma_merge_after(near, vm_flags,
877 NULL, vma->vm_file, vma->vm_pgoff))
878 return near->anon_vma;
879none: 909none:
880 /* 910 /*
881 * There's no absolute need to look only at touching neighbours: 911 * There's no absolute need to look only at touching neighbours:
@@ -1947,7 +1977,8 @@ static int __split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
1947 return 0; 1977 return 0;
1948 1978
1949 /* Clean everything up if vma_adjust failed. */ 1979 /* Clean everything up if vma_adjust failed. */
1950 new->vm_ops->close(new); 1980 if (new->vm_ops && new->vm_ops->close)
1981 new->vm_ops->close(new);
1951 if (new->vm_file) { 1982 if (new->vm_file) {
1952 if (vma->vm_flags & VM_EXECUTABLE) 1983 if (vma->vm_flags & VM_EXECUTABLE)
1953 removed_exe_file_vma(mm); 1984 removed_exe_file_vma(mm);
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index 7b47a57b6646..8b1a2ce21ee5 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -80,6 +80,37 @@ static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end,
80 return err; 80 return err;
81} 81}
82 82
83#ifdef CONFIG_HUGETLB_PAGE
84static unsigned long hugetlb_entry_end(struct hstate *h, unsigned long addr,
85 unsigned long end)
86{
87 unsigned long boundary = (addr & huge_page_mask(h)) + huge_page_size(h);
88 return boundary < end ? boundary : end;
89}
90
91static int walk_hugetlb_range(struct vm_area_struct *vma,
92 unsigned long addr, unsigned long end,
93 struct mm_walk *walk)
94{
95 struct hstate *h = hstate_vma(vma);
96 unsigned long next;
97 unsigned long hmask = huge_page_mask(h);
98 pte_t *pte;
99 int err = 0;
100
101 do {
102 next = hugetlb_entry_end(h, addr, end);
103 pte = huge_pte_offset(walk->mm, addr & hmask);
104 if (pte && walk->hugetlb_entry)
105 err = walk->hugetlb_entry(pte, hmask, addr, next, walk);
106 if (err)
107 return err;
108 } while (addr = next, addr != end);
109
110 return 0;
111}
112#endif
113
83/** 114/**
84 * walk_page_range - walk a memory map's page tables with a callback 115 * walk_page_range - walk a memory map's page tables with a callback
85 * @mm: memory map to walk 116 * @mm: memory map to walk
@@ -128,20 +159,16 @@ int walk_page_range(unsigned long addr, unsigned long end,
128 vma = find_vma(walk->mm, addr); 159 vma = find_vma(walk->mm, addr);
129#ifdef CONFIG_HUGETLB_PAGE 160#ifdef CONFIG_HUGETLB_PAGE
130 if (vma && is_vm_hugetlb_page(vma)) { 161 if (vma && is_vm_hugetlb_page(vma)) {
131 pte_t *pte;
132 struct hstate *hs;
133
134 if (vma->vm_end < next) 162 if (vma->vm_end < next)
135 next = vma->vm_end; 163 next = vma->vm_end;
136 hs = hstate_vma(vma); 164 /*
137 pte = huge_pte_offset(walk->mm, 165 * Hugepage is very tightly coupled with vma, so
138 addr & huge_page_mask(hs)); 166 * walk through hugetlb entries within a given vma.
139 if (pte && !huge_pte_none(huge_ptep_get(pte)) 167 */
140 && walk->hugetlb_entry) 168 err = walk_hugetlb_range(vma, addr, next, walk);
141 err = walk->hugetlb_entry(pte, addr,
142 next, walk);
143 if (err) 169 if (err)
144 break; 170 break;
171 pgd = pgd_offset(walk->mm, next);
145 continue; 172 continue;
146 } 173 }
147#endif 174#endif
diff --git a/mm/readahead.c b/mm/readahead.c
index 999b54bb462f..dfa9a1a03a11 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -503,7 +503,7 @@ void page_cache_sync_readahead(struct address_space *mapping,
503 return; 503 return;
504 504
505 /* be dumb */ 505 /* be dumb */
506 if (filp->f_mode & FMODE_RANDOM) { 506 if (filp && (filp->f_mode & FMODE_RANDOM)) {
507 force_page_cache_readahead(mapping, filp, offset, req_size); 507 force_page_cache_readahead(mapping, filp, offset, req_size);
508 return; 508 return;
509 } 509 }
diff --git a/mm/rmap.c b/mm/rmap.c
index eaa7a09eb72e..07fc94758799 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -133,8 +133,8 @@ int anon_vma_prepare(struct vm_area_struct *vma)
133 goto out_enomem_free_avc; 133 goto out_enomem_free_avc;
134 allocated = anon_vma; 134 allocated = anon_vma;
135 } 135 }
136 spin_lock(&anon_vma->lock);
137 136
137 spin_lock(&anon_vma->lock);
138 /* page_table_lock to protect against threads */ 138 /* page_table_lock to protect against threads */
139 spin_lock(&mm->page_table_lock); 139 spin_lock(&mm->page_table_lock);
140 if (likely(!vma->anon_vma)) { 140 if (likely(!vma->anon_vma)) {
@@ -144,14 +144,15 @@ int anon_vma_prepare(struct vm_area_struct *vma)
144 list_add(&avc->same_vma, &vma->anon_vma_chain); 144 list_add(&avc->same_vma, &vma->anon_vma_chain);
145 list_add(&avc->same_anon_vma, &anon_vma->head); 145 list_add(&avc->same_anon_vma, &anon_vma->head);
146 allocated = NULL; 146 allocated = NULL;
147 avc = NULL;
147 } 148 }
148 spin_unlock(&mm->page_table_lock); 149 spin_unlock(&mm->page_table_lock);
149
150 spin_unlock(&anon_vma->lock); 150 spin_unlock(&anon_vma->lock);
151 if (unlikely(allocated)) { 151
152 if (unlikely(allocated))
152 anon_vma_free(allocated); 153 anon_vma_free(allocated);
154 if (unlikely(avc))
153 anon_vma_chain_free(avc); 155 anon_vma_chain_free(avc);
154 }
155 } 156 }
156 return 0; 157 return 0;
157 158
@@ -182,7 +183,7 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
182{ 183{
183 struct anon_vma_chain *avc, *pavc; 184 struct anon_vma_chain *avc, *pavc;
184 185
185 list_for_each_entry(pavc, &src->anon_vma_chain, same_vma) { 186 list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) {
186 avc = anon_vma_chain_alloc(); 187 avc = anon_vma_chain_alloc();
187 if (!avc) 188 if (!avc)
188 goto enomem_failure; 189 goto enomem_failure;
@@ -730,13 +731,29 @@ void page_move_anon_rmap(struct page *page,
730 * @page: the page to add the mapping to 731 * @page: the page to add the mapping to
731 * @vma: the vm area in which the mapping is added 732 * @vma: the vm area in which the mapping is added
732 * @address: the user virtual address mapped 733 * @address: the user virtual address mapped
734 * @exclusive: the page is exclusively owned by the current process
733 */ 735 */
734static void __page_set_anon_rmap(struct page *page, 736static void __page_set_anon_rmap(struct page *page,
735 struct vm_area_struct *vma, unsigned long address) 737 struct vm_area_struct *vma, unsigned long address, int exclusive)
736{ 738{
737 struct anon_vma *anon_vma = vma->anon_vma; 739 struct anon_vma *anon_vma = vma->anon_vma;
738 740
739 BUG_ON(!anon_vma); 741 BUG_ON(!anon_vma);
742
743 /*
744 * If the page isn't exclusively mapped into this vma,
745 * we must use the _oldest_ possible anon_vma for the
746 * page mapping!
747 *
748 * So take the last AVC chain entry in the vma, which is
749 * the deepest ancestor, and use the anon_vma from that.
750 */
751 if (!exclusive) {
752 struct anon_vma_chain *avc;
753 avc = list_entry(vma->anon_vma_chain.prev, struct anon_vma_chain, same_vma);
754 anon_vma = avc->anon_vma;
755 }
756
740 anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; 757 anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
741 page->mapping = (struct address_space *) anon_vma; 758 page->mapping = (struct address_space *) anon_vma;
742 page->index = linear_page_index(vma, address); 759 page->index = linear_page_index(vma, address);
@@ -791,7 +808,7 @@ void page_add_anon_rmap(struct page *page,
791 VM_BUG_ON(!PageLocked(page)); 808 VM_BUG_ON(!PageLocked(page));
792 VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end); 809 VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
793 if (first) 810 if (first)
794 __page_set_anon_rmap(page, vma, address); 811 __page_set_anon_rmap(page, vma, address, 0);
795 else 812 else
796 __page_check_anon_rmap(page, vma, address); 813 __page_check_anon_rmap(page, vma, address);
797} 814}
@@ -813,7 +830,7 @@ void page_add_new_anon_rmap(struct page *page,
813 SetPageSwapBacked(page); 830 SetPageSwapBacked(page);
814 atomic_set(&page->_mapcount, 0); /* increment count (starts at -1) */ 831 atomic_set(&page->_mapcount, 0); /* increment count (starts at -1) */
815 __inc_zone_page_state(page, NR_ANON_PAGES); 832 __inc_zone_page_state(page, NR_ANON_PAGES);
816 __page_set_anon_rmap(page, vma, address); 833 __page_set_anon_rmap(page, vma, address, 1);
817 if (page_evictable(page, vma)) 834 if (page_evictable(page, vma))
818 lru_cache_add_lru(page, LRU_ACTIVE_ANON); 835 lru_cache_add_lru(page, LRU_ACTIVE_ANON);
819 else 836 else
diff --git a/mm/slab.c b/mm/slab.c
index a9f325b28bed..bac0f4fcc216 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3602,21 +3602,10 @@ EXPORT_SYMBOL(kmem_cache_alloc_notrace);
3602 */ 3602 */
3603int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr) 3603int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr)
3604{ 3604{
3605 unsigned long addr = (unsigned long)ptr;
3606 unsigned long min_addr = PAGE_OFFSET;
3607 unsigned long align_mask = BYTES_PER_WORD - 1;
3608 unsigned long size = cachep->buffer_size; 3605 unsigned long size = cachep->buffer_size;
3609 struct page *page; 3606 struct page *page;
3610 3607
3611 if (unlikely(addr < min_addr)) 3608 if (unlikely(!kern_ptr_validate(ptr, size)))
3612 goto out;
3613 if (unlikely(addr > (unsigned long)high_memory - size))
3614 goto out;
3615 if (unlikely(addr & align_mask))
3616 goto out;
3617 if (unlikely(!kern_addr_valid(addr)))
3618 goto out;
3619 if (unlikely(!kern_addr_valid(addr + size - 1)))
3620 goto out; 3609 goto out;
3621 page = virt_to_page(ptr); 3610 page = virt_to_page(ptr);
3622 if (unlikely(!PageSlab(page))) 3611 if (unlikely(!PageSlab(page)))
diff --git a/mm/slub.c b/mm/slub.c
index b364844a1068..d2a54fe71ea2 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2153,7 +2153,7 @@ static int init_kmem_cache_nodes(struct kmem_cache *s, gfp_t gfpflags)
2153 int local_node; 2153 int local_node;
2154 2154
2155 if (slab_state >= UP && (s < kmalloc_caches || 2155 if (slab_state >= UP && (s < kmalloc_caches ||
2156 s > kmalloc_caches + KMALLOC_CACHES)) 2156 s >= kmalloc_caches + KMALLOC_CACHES))
2157 local_node = page_to_nid(virt_to_page(s)); 2157 local_node = page_to_nid(virt_to_page(s));
2158 else 2158 else
2159 local_node = 0; 2159 local_node = 0;
@@ -2386,6 +2386,9 @@ int kmem_ptr_validate(struct kmem_cache *s, const void *object)
2386{ 2386{
2387 struct page *page; 2387 struct page *page;
2388 2388
2389 if (!kern_ptr_validate(object, s->size))
2390 return 0;
2391
2389 page = get_object_page(object); 2392 page = get_object_page(object);
2390 2393
2391 if (!page || s != page->slab) 2394 if (!page || s != page->slab)
diff --git a/mm/util.c b/mm/util.c
index 834db7be240f..f5712e8964be 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -186,6 +186,27 @@ void kzfree(const void *p)
186} 186}
187EXPORT_SYMBOL(kzfree); 187EXPORT_SYMBOL(kzfree);
188 188
189int kern_ptr_validate(const void *ptr, unsigned long size)
190{
191 unsigned long addr = (unsigned long)ptr;
192 unsigned long min_addr = PAGE_OFFSET;
193 unsigned long align_mask = sizeof(void *) - 1;
194
195 if (unlikely(addr < min_addr))
196 goto out;
197 if (unlikely(addr > (unsigned long)high_memory - size))
198 goto out;
199 if (unlikely(addr & align_mask))
200 goto out;
201 if (unlikely(!kern_addr_valid(addr)))
202 goto out;
203 if (unlikely(!kern_addr_valid(addr + size - 1)))
204 goto out;
205 return 1;
206out:
207 return 0;
208}
209
189/* 210/*
190 * strndup_user - duplicate an existing string from user space 211 * strndup_user - duplicate an existing string from user space
191 * @s: The string to duplicate 212 * @s: The string to duplicate
diff --git a/mm/vmscan.c b/mm/vmscan.c
index e0e5f15bb726..3ff3311447f5 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1535,13 +1535,6 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc,
1535 unsigned long ap, fp; 1535 unsigned long ap, fp;
1536 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); 1536 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
1537 1537
1538 /* If we have no swap space, do not bother scanning anon pages. */
1539 if (!sc->may_swap || (nr_swap_pages <= 0)) {
1540 percent[0] = 0;
1541 percent[1] = 100;
1542 return;
1543 }
1544
1545 anon = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_ANON) + 1538 anon = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_ANON) +
1546 zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON); 1539 zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON);
1547 file = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_FILE) + 1540 file = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_FILE) +
@@ -1639,20 +1632,22 @@ static void shrink_zone(int priority, struct zone *zone,
1639 unsigned long nr_reclaimed = sc->nr_reclaimed; 1632 unsigned long nr_reclaimed = sc->nr_reclaimed;
1640 unsigned long nr_to_reclaim = sc->nr_to_reclaim; 1633 unsigned long nr_to_reclaim = sc->nr_to_reclaim;
1641 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); 1634 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
1635 int noswap = 0;
1642 1636
1643 get_scan_ratio(zone, sc, percent); 1637 /* If we have no swap space, do not bother scanning anon pages. */
1638 if (!sc->may_swap || (nr_swap_pages <= 0)) {
1639 noswap = 1;
1640 percent[0] = 0;
1641 percent[1] = 100;
1642 } else
1643 get_scan_ratio(zone, sc, percent);
1644 1644
1645 for_each_evictable_lru(l) { 1645 for_each_evictable_lru(l) {
1646 int file = is_file_lru(l); 1646 int file = is_file_lru(l);
1647 unsigned long scan; 1647 unsigned long scan;
1648 1648
1649 if (percent[file] == 0) {
1650 nr[l] = 0;
1651 continue;
1652 }
1653
1654 scan = zone_nr_lru_pages(zone, sc, l); 1649 scan = zone_nr_lru_pages(zone, sc, l);
1655 if (priority) { 1650 if (priority || noswap) {
1656 scan >>= priority; 1651 scan >>= priority;
1657 scan = (scan * percent[file]) / 100; 1652 scan = (scan * percent[file]) / 100;
1658 } 1653 }