aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorJiri Kosina <jkosina@suse.cz>2010-04-22 20:08:44 -0400
committerJiri Kosina <jkosina@suse.cz>2010-04-22 20:08:44 -0400
commit6c9468e9eb1252eaefd94ce7f06e1be9b0b641b1 (patch)
tree797676a336b050bfa1ef879377c07e541b9075d6 /mm
parent4cb3ca7cd7e2cae8d1daf5345ec99a1e8502cf3f (diff)
parentc81eddb0e3728661d1585fbc564449c94165cc36 (diff)
Merge branch 'master' into for-next
Diffstat (limited to 'mm')
-rw-r--r--mm/Makefile6
-rw-r--r--mm/backing-dev.c3
-rw-r--r--mm/bootmem.c31
-rw-r--r--mm/bounce.c1
-rw-r--r--mm/failslab.c1
-rw-r--r--mm/filemap.c2
-rw-r--r--mm/filemap_xip.c1
-rw-r--r--mm/hugetlb.c2
-rw-r--r--mm/kmemleak.c1
-rw-r--r--mm/ksm.c2
-rw-r--r--mm/memcontrol.c68
-rw-r--r--mm/memory-failure.c1
-rw-r--r--mm/memory.c3
-rw-r--r--mm/mempolicy.c53
-rw-r--r--mm/migrate.c1
-rw-r--r--mm/mincore.c2
-rw-r--r--mm/mmap.c110
-rw-r--r--mm/mmu_context.c1
-rw-r--r--mm/mmu_notifier.c1
-rw-r--r--mm/mprotect.c1
-rw-r--r--mm/mremap.c1
-rw-r--r--mm/nommu.c13
-rw-r--r--mm/oom_kill.c1
-rw-r--r--mm/page_cgroup.c20
-rw-r--r--mm/page_io.c1
-rw-r--r--mm/pagewalk.c47
-rw-r--r--mm/percpu.c26
-rw-r--r--mm/percpu_up.c30
-rw-r--r--mm/quicklist.c1
-rw-r--r--mm/readahead.c3
-rw-r--r--mm/rmap.c25
-rw-r--r--mm/slab.c13
-rw-r--r--mm/slub.c3
-rw-r--r--mm/sparse-vmemmap.c1
-rw-r--r--mm/sparse.c1
-rw-r--r--mm/swap.c1
-rw-r--r--mm/swap_state.c1
-rw-r--r--mm/truncate.c1
-rw-r--r--mm/util.c21
-rw-r--r--mm/vmscan.c25
-rw-r--r--mm/vmstat.c1
41 files changed, 357 insertions, 170 deletions
diff --git a/mm/Makefile b/mm/Makefile
index 7a68d2ab5560..6c2a73a54a43 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -33,7 +33,11 @@ obj-$(CONFIG_FAILSLAB) += failslab.o
33obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o 33obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
34obj-$(CONFIG_FS_XIP) += filemap_xip.o 34obj-$(CONFIG_FS_XIP) += filemap_xip.o
35obj-$(CONFIG_MIGRATION) += migrate.o 35obj-$(CONFIG_MIGRATION) += migrate.o
36obj-$(CONFIG_SMP) += percpu.o 36ifdef CONFIG_SMP
37obj-y += percpu.o
38else
39obj-y += percpu_up.o
40endif
37obj-$(CONFIG_QUICKLIST) += quicklist.o 41obj-$(CONFIG_QUICKLIST) += quicklist.o
38obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o 42obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o
39obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o 43obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 0e8ca0347707..f13e067e1467 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -227,6 +227,9 @@ static struct device_attribute bdi_dev_attrs[] = {
227static __init int bdi_class_init(void) 227static __init int bdi_class_init(void)
228{ 228{
229 bdi_class = class_create(THIS_MODULE, "bdi"); 229 bdi_class = class_create(THIS_MODULE, "bdi");
230 if (IS_ERR(bdi_class))
231 return PTR_ERR(bdi_class);
232
230 bdi_class->dev_attrs = bdi_dev_attrs; 233 bdi_class->dev_attrs = bdi_dev_attrs;
231 bdi_debug_init(); 234 bdi_debug_init();
232 return 0; 235 return 0;
diff --git a/mm/bootmem.c b/mm/bootmem.c
index d7c791ef0036..58c66cc5056a 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -10,6 +10,7 @@
10 */ 10 */
11#include <linux/init.h> 11#include <linux/init.h>
12#include <linux/pfn.h> 12#include <linux/pfn.h>
13#include <linux/slab.h>
13#include <linux/bootmem.h> 14#include <linux/bootmem.h>
14#include <linux/module.h> 15#include <linux/module.h>
15#include <linux/kmemleak.h> 16#include <linux/kmemleak.h>
@@ -180,19 +181,12 @@ static void __init __free_pages_memory(unsigned long start, unsigned long end)
180 end_aligned = end & ~(BITS_PER_LONG - 1); 181 end_aligned = end & ~(BITS_PER_LONG - 1);
181 182
182 if (end_aligned <= start_aligned) { 183 if (end_aligned <= start_aligned) {
183#if 1
184 printk(KERN_DEBUG " %lx - %lx\n", start, end);
185#endif
186 for (i = start; i < end; i++) 184 for (i = start; i < end; i++)
187 __free_pages_bootmem(pfn_to_page(i), 0); 185 __free_pages_bootmem(pfn_to_page(i), 0);
188 186
189 return; 187 return;
190 } 188 }
191 189
192#if 1
193 printk(KERN_DEBUG " %lx %lx - %lx %lx\n",
194 start, start_aligned, end_aligned, end);
195#endif
196 for (i = start; i < start_aligned; i++) 190 for (i = start; i < start_aligned; i++)
197 __free_pages_bootmem(pfn_to_page(i), 0); 191 __free_pages_bootmem(pfn_to_page(i), 0);
198 192
@@ -310,9 +304,22 @@ unsigned long __init free_all_bootmem_node(pg_data_t *pgdat)
310unsigned long __init free_all_bootmem(void) 304unsigned long __init free_all_bootmem(void)
311{ 305{
312#ifdef CONFIG_NO_BOOTMEM 306#ifdef CONFIG_NO_BOOTMEM
313 return free_all_memory_core_early(NODE_DATA(0)->node_id); 307 /*
308 * We need to use MAX_NUMNODES instead of NODE_DATA(0)->node_id
309 * because in some case like Node0 doesnt have RAM installed
310 * low ram will be on Node1
311 * Use MAX_NUMNODES will make sure all ranges in early_node_map[]
312 * will be used instead of only Node0 related
313 */
314 return free_all_memory_core_early(MAX_NUMNODES);
314#else 315#else
315 return free_all_bootmem_core(NODE_DATA(0)->bdata); 316 unsigned long total_pages = 0;
317 bootmem_data_t *bdata;
318
319 list_for_each_entry(bdata, &bdata_list, list)
320 total_pages += free_all_bootmem_core(bdata);
321
322 return total_pages;
316#endif 323#endif
317} 324}
318 325
@@ -428,9 +435,6 @@ void __init free_bootmem_node(pg_data_t *pgdat, unsigned long physaddr,
428{ 435{
429#ifdef CONFIG_NO_BOOTMEM 436#ifdef CONFIG_NO_BOOTMEM
430 free_early(physaddr, physaddr + size); 437 free_early(physaddr, physaddr + size);
431#if 0
432 printk(KERN_DEBUG "free %lx %lx\n", physaddr, size);
433#endif
434#else 438#else
435 unsigned long start, end; 439 unsigned long start, end;
436 440
@@ -456,9 +460,6 @@ void __init free_bootmem(unsigned long addr, unsigned long size)
456{ 460{
457#ifdef CONFIG_NO_BOOTMEM 461#ifdef CONFIG_NO_BOOTMEM
458 free_early(addr, addr + size); 462 free_early(addr, addr + size);
459#if 0
460 printk(KERN_DEBUG "free %lx %lx\n", addr, size);
461#endif
462#else 463#else
463 unsigned long start, end; 464 unsigned long start, end;
464 465
diff --git a/mm/bounce.c b/mm/bounce.c
index a2b76a588e34..13b6dad1eed2 100644
--- a/mm/bounce.c
+++ b/mm/bounce.c
@@ -6,6 +6,7 @@
6#include <linux/mm.h> 6#include <linux/mm.h>
7#include <linux/module.h> 7#include <linux/module.h>
8#include <linux/swap.h> 8#include <linux/swap.h>
9#include <linux/gfp.h>
9#include <linux/bio.h> 10#include <linux/bio.h>
10#include <linux/pagemap.h> 11#include <linux/pagemap.h>
11#include <linux/mempool.h> 12#include <linux/mempool.h>
diff --git a/mm/failslab.c b/mm/failslab.c
index bb41f98dd8b7..c5f88f240ddc 100644
--- a/mm/failslab.c
+++ b/mm/failslab.c
@@ -1,5 +1,4 @@
1#include <linux/fault-inject.h> 1#include <linux/fault-inject.h>
2#include <linux/gfp.h>
3#include <linux/slab.h> 2#include <linux/slab.h>
4 3
5static struct { 4static struct {
diff --git a/mm/filemap.c b/mm/filemap.c
index 045b31c37653..140ebda9640f 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -10,13 +10,13 @@
10 * the NFS filesystem used to do this differently, for example) 10 * the NFS filesystem used to do this differently, for example)
11 */ 11 */
12#include <linux/module.h> 12#include <linux/module.h>
13#include <linux/slab.h>
14#include <linux/compiler.h> 13#include <linux/compiler.h>
15#include <linux/fs.h> 14#include <linux/fs.h>
16#include <linux/uaccess.h> 15#include <linux/uaccess.h>
17#include <linux/aio.h> 16#include <linux/aio.h>
18#include <linux/capability.h> 17#include <linux/capability.h>
19#include <linux/kernel_stat.h> 18#include <linux/kernel_stat.h>
19#include <linux/gfp.h>
20#include <linux/mm.h> 20#include <linux/mm.h>
21#include <linux/swap.h> 21#include <linux/swap.h>
22#include <linux/mman.h> 22#include <linux/mman.h>
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index 78b94f0b6d5d..83364df74a33 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -17,6 +17,7 @@
17#include <linux/sched.h> 17#include <linux/sched.h>
18#include <linux/seqlock.h> 18#include <linux/seqlock.h>
19#include <linux/mutex.h> 19#include <linux/mutex.h>
20#include <linux/gfp.h>
20#include <asm/tlbflush.h> 21#include <asm/tlbflush.h>
21#include <asm/io.h> 22#include <asm/io.h>
22 23
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 3a5aeb37c110..6034dc9e9796 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2,7 +2,6 @@
2 * Generic hugetlb support. 2 * Generic hugetlb support.
3 * (C) William Irwin, April 2004 3 * (C) William Irwin, April 2004
4 */ 4 */
5#include <linux/gfp.h>
6#include <linux/list.h> 5#include <linux/list.h>
7#include <linux/init.h> 6#include <linux/init.h>
8#include <linux/module.h> 7#include <linux/module.h>
@@ -18,6 +17,7 @@
18#include <linux/mutex.h> 17#include <linux/mutex.h>
19#include <linux/bootmem.h> 18#include <linux/bootmem.h>
20#include <linux/sysfs.h> 19#include <linux/sysfs.h>
20#include <linux/slab.h>
21 21
22#include <asm/page.h> 22#include <asm/page.h>
23#include <asm/pgtable.h> 23#include <asm/pgtable.h>
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 5b069e4f5e48..2c0d032ac898 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -72,7 +72,6 @@
72#include <linux/module.h> 72#include <linux/module.h>
73#include <linux/kthread.h> 73#include <linux/kthread.h>
74#include <linux/prio_tree.h> 74#include <linux/prio_tree.h>
75#include <linux/gfp.h>
76#include <linux/fs.h> 75#include <linux/fs.h>
77#include <linux/debugfs.h> 76#include <linux/debugfs.h>
78#include <linux/seq_file.h> 77#include <linux/seq_file.h>
diff --git a/mm/ksm.c b/mm/ksm.c
index a93f1b7f508c..8cdfc2a1e8bf 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -751,7 +751,7 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page,
751 * page 751 * page
752 */ 752 */
753 if (page_mapcount(page) + 1 + swapped != page_count(page)) { 753 if (page_mapcount(page) + 1 + swapped != page_count(page)) {
754 set_pte_at_notify(mm, addr, ptep, entry); 754 set_pte_at(mm, addr, ptep, entry);
755 goto out_unlock; 755 goto out_unlock;
756 } 756 }
757 entry = pte_wrprotect(entry); 757 entry = pte_wrprotect(entry);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 6e8533e2861b..f0ff5a7af318 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1359,16 +1359,19 @@ void mem_cgroup_update_file_mapped(struct page *page, int val)
1359 1359
1360 lock_page_cgroup(pc); 1360 lock_page_cgroup(pc);
1361 mem = pc->mem_cgroup; 1361 mem = pc->mem_cgroup;
1362 if (!mem) 1362 if (!mem || !PageCgroupUsed(pc))
1363 goto done;
1364
1365 if (!PageCgroupUsed(pc))
1366 goto done; 1363 goto done;
1367 1364
1368 /* 1365 /*
1369 * Preemption is already disabled. We can use __this_cpu_xxx 1366 * Preemption is already disabled. We can use __this_cpu_xxx
1370 */ 1367 */
1371 __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_FILE_MAPPED], val); 1368 if (val > 0) {
1369 __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);
1370 SetPageCgroupFileMapped(pc);
1371 } else {
1372 __this_cpu_dec(mem->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);
1373 ClearPageCgroupFileMapped(pc);
1374 }
1372 1375
1373done: 1376done:
1374 unlock_page_cgroup(pc); 1377 unlock_page_cgroup(pc);
@@ -1801,16 +1804,13 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem,
1801static void __mem_cgroup_move_account(struct page_cgroup *pc, 1804static void __mem_cgroup_move_account(struct page_cgroup *pc,
1802 struct mem_cgroup *from, struct mem_cgroup *to, bool uncharge) 1805 struct mem_cgroup *from, struct mem_cgroup *to, bool uncharge)
1803{ 1806{
1804 struct page *page;
1805
1806 VM_BUG_ON(from == to); 1807 VM_BUG_ON(from == to);
1807 VM_BUG_ON(PageLRU(pc->page)); 1808 VM_BUG_ON(PageLRU(pc->page));
1808 VM_BUG_ON(!PageCgroupLocked(pc)); 1809 VM_BUG_ON(!PageCgroupLocked(pc));
1809 VM_BUG_ON(!PageCgroupUsed(pc)); 1810 VM_BUG_ON(!PageCgroupUsed(pc));
1810 VM_BUG_ON(pc->mem_cgroup != from); 1811 VM_BUG_ON(pc->mem_cgroup != from);
1811 1812
1812 page = pc->page; 1813 if (PageCgroupFileMapped(pc)) {
1813 if (page_mapped(page) && !PageAnon(page)) {
1814 /* Update mapped_file data for mem_cgroup */ 1814 /* Update mapped_file data for mem_cgroup */
1815 preempt_disable(); 1815 preempt_disable();
1816 __this_cpu_dec(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]); 1816 __this_cpu_dec(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]);
@@ -3691,8 +3691,10 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
3691 else 3691 else
3692 mem = vmalloc(size); 3692 mem = vmalloc(size);
3693 3693
3694 if (mem) 3694 if (!mem)
3695 memset(mem, 0, size); 3695 return NULL;
3696
3697 memset(mem, 0, size);
3696 mem->stat = alloc_percpu(struct mem_cgroup_stat_cpu); 3698 mem->stat = alloc_percpu(struct mem_cgroup_stat_cpu);
3697 if (!mem->stat) { 3699 if (!mem->stat) {
3698 if (size < PAGE_SIZE) 3700 if (size < PAGE_SIZE)
@@ -3946,28 +3948,6 @@ one_by_one:
3946 } 3948 }
3947 return ret; 3949 return ret;
3948} 3950}
3949#else /* !CONFIG_MMU */
3950static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
3951 struct cgroup *cgroup,
3952 struct task_struct *p,
3953 bool threadgroup)
3954{
3955 return 0;
3956}
3957static void mem_cgroup_cancel_attach(struct cgroup_subsys *ss,
3958 struct cgroup *cgroup,
3959 struct task_struct *p,
3960 bool threadgroup)
3961{
3962}
3963static void mem_cgroup_move_task(struct cgroup_subsys *ss,
3964 struct cgroup *cont,
3965 struct cgroup *old_cont,
3966 struct task_struct *p,
3967 bool threadgroup)
3968{
3969}
3970#endif
3971 3951
3972/** 3952/**
3973 * is_target_pte_for_mc - check a pte whether it is valid for move charge 3953 * is_target_pte_for_mc - check a pte whether it is valid for move charge
@@ -4330,6 +4310,28 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss,
4330 } 4310 }
4331 mem_cgroup_clear_mc(); 4311 mem_cgroup_clear_mc();
4332} 4312}
4313#else /* !CONFIG_MMU */
4314static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
4315 struct cgroup *cgroup,
4316 struct task_struct *p,
4317 bool threadgroup)
4318{
4319 return 0;
4320}
4321static void mem_cgroup_cancel_attach(struct cgroup_subsys *ss,
4322 struct cgroup *cgroup,
4323 struct task_struct *p,
4324 bool threadgroup)
4325{
4326}
4327static void mem_cgroup_move_task(struct cgroup_subsys *ss,
4328 struct cgroup *cont,
4329 struct cgroup *old_cont,
4330 struct task_struct *p,
4331 bool threadgroup)
4332{
4333}
4334#endif
4333 4335
4334struct cgroup_subsys mem_cgroup_subsys = { 4336struct cgroup_subsys mem_cgroup_subsys = {
4335 .name = "memory", 4337 .name = "memory",
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index d1f335162976..620b0b461593 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -44,6 +44,7 @@
44#include <linux/migrate.h> 44#include <linux/migrate.h>
45#include <linux/page-isolation.h> 45#include <linux/page-isolation.h>
46#include <linux/suspend.h> 46#include <linux/suspend.h>
47#include <linux/slab.h>
47#include "internal.h" 48#include "internal.h"
48 49
49int sysctl_memory_failure_early_kill __read_mostly = 0; 50int sysctl_memory_failure_early_kill __read_mostly = 0;
diff --git a/mm/memory.c b/mm/memory.c
index 5b7f2002e54b..833952d8b74d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -56,6 +56,7 @@
56#include <linux/kallsyms.h> 56#include <linux/kallsyms.h>
57#include <linux/swapops.h> 57#include <linux/swapops.h>
58#include <linux/elf.h> 58#include <linux/elf.h>
59#include <linux/gfp.h>
59 60
60#include <asm/io.h> 61#include <asm/io.h>
61#include <asm/pgalloc.h> 62#include <asm/pgalloc.h>
@@ -124,7 +125,7 @@ core_initcall(init_zero_pfn);
124 125
125#if defined(SPLIT_RSS_COUNTING) 126#if defined(SPLIT_RSS_COUNTING)
126 127
127void __sync_task_rss_stat(struct task_struct *task, struct mm_struct *mm) 128static void __sync_task_rss_stat(struct task_struct *task, struct mm_struct *mm)
128{ 129{
129 int i; 130 int i;
130 131
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index bda230e52acd..08f40a2f3fe0 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -73,7 +73,6 @@
73#include <linux/sched.h> 73#include <linux/sched.h>
74#include <linux/nodemask.h> 74#include <linux/nodemask.h>
75#include <linux/cpuset.h> 75#include <linux/cpuset.h>
76#include <linux/gfp.h>
77#include <linux/slab.h> 76#include <linux/slab.h>
78#include <linux/string.h> 77#include <linux/string.h>
79#include <linux/module.h> 78#include <linux/module.h>
@@ -806,9 +805,13 @@ static long do_get_mempolicy(int *policy, nodemask_t *nmask,
806 805
807 err = 0; 806 err = 0;
808 if (nmask) { 807 if (nmask) {
809 task_lock(current); 808 if (mpol_store_user_nodemask(pol)) {
810 get_policy_nodemask(pol, nmask); 809 *nmask = pol->w.user_nodemask;
811 task_unlock(current); 810 } else {
811 task_lock(current);
812 get_policy_nodemask(pol, nmask);
813 task_unlock(current);
814 }
812 } 815 }
813 816
814 out: 817 out:
@@ -1756,10 +1759,12 @@ struct mempolicy *__mpol_dup(struct mempolicy *old)
1756 1759
1757 if (!new) 1760 if (!new)
1758 return ERR_PTR(-ENOMEM); 1761 return ERR_PTR(-ENOMEM);
1762 rcu_read_lock();
1759 if (current_cpuset_is_being_rebound()) { 1763 if (current_cpuset_is_being_rebound()) {
1760 nodemask_t mems = cpuset_mems_allowed(current); 1764 nodemask_t mems = cpuset_mems_allowed(current);
1761 mpol_rebind_policy(old, &mems); 1765 mpol_rebind_policy(old, &mems);
1762 } 1766 }
1767 rcu_read_unlock();
1763 *new = *old; 1768 *new = *old;
1764 atomic_set(&new->refcnt, 1); 1769 atomic_set(&new->refcnt, 1);
1765 return new; 1770 return new;
@@ -2193,8 +2198,8 @@ int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context)
2193 char *rest = nodelist; 2198 char *rest = nodelist;
2194 while (isdigit(*rest)) 2199 while (isdigit(*rest))
2195 rest++; 2200 rest++;
2196 if (!*rest) 2201 if (*rest)
2197 err = 0; 2202 goto out;
2198 } 2203 }
2199 break; 2204 break;
2200 case MPOL_INTERLEAVE: 2205 case MPOL_INTERLEAVE:
@@ -2203,7 +2208,6 @@ int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context)
2203 */ 2208 */
2204 if (!nodelist) 2209 if (!nodelist)
2205 nodes = node_states[N_HIGH_MEMORY]; 2210 nodes = node_states[N_HIGH_MEMORY];
2206 err = 0;
2207 break; 2211 break;
2208 case MPOL_LOCAL: 2212 case MPOL_LOCAL:
2209 /* 2213 /*
@@ -2213,11 +2217,19 @@ int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context)
2213 goto out; 2217 goto out;
2214 mode = MPOL_PREFERRED; 2218 mode = MPOL_PREFERRED;
2215 break; 2219 break;
2216 2220 case MPOL_DEFAULT:
2217 /* 2221 /*
2218 * case MPOL_BIND: mpol_new() enforces non-empty nodemask. 2222 * Insist on a empty nodelist
2219 * case MPOL_DEFAULT: mpol_new() enforces empty nodemask, ignores flags. 2223 */
2220 */ 2224 if (!nodelist)
2225 err = 0;
2226 goto out;
2227 case MPOL_BIND:
2228 /*
2229 * Insist on a nodelist
2230 */
2231 if (!nodelist)
2232 goto out;
2221 } 2233 }
2222 2234
2223 mode_flags = 0; 2235 mode_flags = 0;
@@ -2231,13 +2243,14 @@ int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context)
2231 else if (!strcmp(flags, "relative")) 2243 else if (!strcmp(flags, "relative"))
2232 mode_flags |= MPOL_F_RELATIVE_NODES; 2244 mode_flags |= MPOL_F_RELATIVE_NODES;
2233 else 2245 else
2234 err = 1; 2246 goto out;
2235 } 2247 }
2236 2248
2237 new = mpol_new(mode, mode_flags, &nodes); 2249 new = mpol_new(mode, mode_flags, &nodes);
2238 if (IS_ERR(new)) 2250 if (IS_ERR(new))
2239 err = 1; 2251 goto out;
2240 else { 2252
2253 {
2241 int ret; 2254 int ret;
2242 NODEMASK_SCRATCH(scratch); 2255 NODEMASK_SCRATCH(scratch);
2243 if (scratch) { 2256 if (scratch) {
@@ -2248,13 +2261,15 @@ int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context)
2248 ret = -ENOMEM; 2261 ret = -ENOMEM;
2249 NODEMASK_SCRATCH_FREE(scratch); 2262 NODEMASK_SCRATCH_FREE(scratch);
2250 if (ret) { 2263 if (ret) {
2251 err = 1;
2252 mpol_put(new); 2264 mpol_put(new);
2253 } else if (no_context) { 2265 goto out;
2254 /* save for contextualization */
2255 new->w.user_nodemask = nodes;
2256 } 2266 }
2257 } 2267 }
2268 err = 0;
2269 if (no_context) {
2270 /* save for contextualization */
2271 new->w.user_nodemask = nodes;
2272 }
2258 2273
2259out: 2274out:
2260 /* Restore string for error message */ 2275 /* Restore string for error message */
diff --git a/mm/migrate.c b/mm/migrate.c
index 88000b89fc9a..d3f3f7f81075 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -32,6 +32,7 @@
32#include <linux/security.h> 32#include <linux/security.h>
33#include <linux/memcontrol.h> 33#include <linux/memcontrol.h>
34#include <linux/syscalls.h> 34#include <linux/syscalls.h>
35#include <linux/gfp.h>
35 36
36#include "internal.h" 37#include "internal.h"
37 38
diff --git a/mm/mincore.c b/mm/mincore.c
index 7a3436ef39eb..f77433c20279 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -7,8 +7,8 @@
7/* 7/*
8 * The mincore() system call. 8 * The mincore() system call.
9 */ 9 */
10#include <linux/slab.h>
11#include <linux/pagemap.h> 10#include <linux/pagemap.h>
11#include <linux/gfp.h>
12#include <linux/mm.h> 12#include <linux/mm.h>
13#include <linux/mman.h> 13#include <linux/mman.h>
14#include <linux/syscalls.h> 14#include <linux/syscalls.h>
diff --git a/mm/mmap.c b/mm/mmap.c
index 75557c639ad4..f90ea92f755a 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -507,11 +507,12 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start,
507 struct address_space *mapping = NULL; 507 struct address_space *mapping = NULL;
508 struct prio_tree_root *root = NULL; 508 struct prio_tree_root *root = NULL;
509 struct file *file = vma->vm_file; 509 struct file *file = vma->vm_file;
510 struct anon_vma *anon_vma = NULL;
511 long adjust_next = 0; 510 long adjust_next = 0;
512 int remove_next = 0; 511 int remove_next = 0;
513 512
514 if (next && !insert) { 513 if (next && !insert) {
514 struct vm_area_struct *exporter = NULL;
515
515 if (end >= next->vm_end) { 516 if (end >= next->vm_end) {
516 /* 517 /*
517 * vma expands, overlapping all the next, and 518 * vma expands, overlapping all the next, and
@@ -519,7 +520,7 @@ int vma_adjust(struct vm_area_struct *vma, unsigned long start,
519 */ 520 */
520again: remove_next = 1 + (end > next->vm_end); 521again: remove_next = 1 + (end > next->vm_end);
521 end = next->vm_end; 522 end = next->vm_end;
522 anon_vma = next->anon_vma; 523 exporter = next;
523 importer = vma; 524 importer = vma;
524 } else if (end > next->vm_start) { 525 } else if (end > next->vm_start) {
525 /* 526 /*
@@ -527,7 +528,7 @@ again: remove_next = 1 + (end > next->vm_end);
527 * mprotect case 5 shifting the boundary up. 528 * mprotect case 5 shifting the boundary up.
528 */ 529 */
529 adjust_next = (end - next->vm_start) >> PAGE_SHIFT; 530 adjust_next = (end - next->vm_start) >> PAGE_SHIFT;
530 anon_vma = next->anon_vma; 531 exporter = next;
531 importer = vma; 532 importer = vma;
532 } else if (end < vma->vm_end) { 533 } else if (end < vma->vm_end) {
533 /* 534 /*
@@ -536,28 +537,19 @@ again: remove_next = 1 + (end > next->vm_end);
536 * mprotect case 4 shifting the boundary down. 537 * mprotect case 4 shifting the boundary down.
537 */ 538 */
538 adjust_next = - ((vma->vm_end - end) >> PAGE_SHIFT); 539 adjust_next = - ((vma->vm_end - end) >> PAGE_SHIFT);
539 anon_vma = next->anon_vma; 540 exporter = vma;
540 importer = next; 541 importer = next;
541 } 542 }
542 }
543 543
544 /*
545 * When changing only vma->vm_end, we don't really need anon_vma lock.
546 */
547 if (vma->anon_vma && (insert || importer || start != vma->vm_start))
548 anon_vma = vma->anon_vma;
549 if (anon_vma) {
550 /* 544 /*
551 * Easily overlooked: when mprotect shifts the boundary, 545 * Easily overlooked: when mprotect shifts the boundary,
552 * make sure the expanding vma has anon_vma set if the 546 * make sure the expanding vma has anon_vma set if the
553 * shrinking vma had, to cover any anon pages imported. 547 * shrinking vma had, to cover any anon pages imported.
554 */ 548 */
555 if (importer && !importer->anon_vma) { 549 if (exporter && exporter->anon_vma && !importer->anon_vma) {
556 /* Block reverse map lookups until things are set up. */ 550 if (anon_vma_clone(importer, exporter))
557 if (anon_vma_clone(importer, vma)) {
558 return -ENOMEM; 551 return -ENOMEM;
559 } 552 importer->anon_vma = exporter->anon_vma;
560 importer->anon_vma = anon_vma;
561 } 553 }
562 } 554 }
563 555
@@ -825,6 +817,61 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
825} 817}
826 818
827/* 819/*
820 * Rough compatbility check to quickly see if it's even worth looking
821 * at sharing an anon_vma.
822 *
823 * They need to have the same vm_file, and the flags can only differ
824 * in things that mprotect may change.
825 *
826 * NOTE! The fact that we share an anon_vma doesn't _have_ to mean that
827 * we can merge the two vma's. For example, we refuse to merge a vma if
828 * there is a vm_ops->close() function, because that indicates that the
829 * driver is doing some kind of reference counting. But that doesn't
830 * really matter for the anon_vma sharing case.
831 */
832static int anon_vma_compatible(struct vm_area_struct *a, struct vm_area_struct *b)
833{
834 return a->vm_end == b->vm_start &&
835 mpol_equal(vma_policy(a), vma_policy(b)) &&
836 a->vm_file == b->vm_file &&
837 !((a->vm_flags ^ b->vm_flags) & ~(VM_READ|VM_WRITE|VM_EXEC)) &&
838 b->vm_pgoff == a->vm_pgoff + ((b->vm_start - a->vm_start) >> PAGE_SHIFT);
839}
840
841/*
842 * Do some basic sanity checking to see if we can re-use the anon_vma
843 * from 'old'. The 'a'/'b' vma's are in VM order - one of them will be
844 * the same as 'old', the other will be the new one that is trying
845 * to share the anon_vma.
846 *
847 * NOTE! This runs with mm_sem held for reading, so it is possible that
848 * the anon_vma of 'old' is concurrently in the process of being set up
849 * by another page fault trying to merge _that_. But that's ok: if it
850 * is being set up, that automatically means that it will be a singleton
851 * acceptable for merging, so we can do all of this optimistically. But
852 * we do that ACCESS_ONCE() to make sure that we never re-load the pointer.
853 *
854 * IOW: that the "list_is_singular()" test on the anon_vma_chain only
855 * matters for the 'stable anon_vma' case (ie the thing we want to avoid
856 * is to return an anon_vma that is "complex" due to having gone through
857 * a fork).
858 *
859 * We also make sure that the two vma's are compatible (adjacent,
860 * and with the same memory policies). That's all stable, even with just
861 * a read lock on the mm_sem.
862 */
863static struct anon_vma *reusable_anon_vma(struct vm_area_struct *old, struct vm_area_struct *a, struct vm_area_struct *b)
864{
865 if (anon_vma_compatible(a, b)) {
866 struct anon_vma *anon_vma = ACCESS_ONCE(old->anon_vma);
867
868 if (anon_vma && list_is_singular(&old->anon_vma_chain))
869 return anon_vma;
870 }
871 return NULL;
872}
873
874/*
828 * find_mergeable_anon_vma is used by anon_vma_prepare, to check 875 * find_mergeable_anon_vma is used by anon_vma_prepare, to check
829 * neighbouring vmas for a suitable anon_vma, before it goes off 876 * neighbouring vmas for a suitable anon_vma, before it goes off
830 * to allocate a new anon_vma. It checks because a repetitive 877 * to allocate a new anon_vma. It checks because a repetitive
@@ -834,28 +881,16 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
834 */ 881 */
835struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma) 882struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
836{ 883{
884 struct anon_vma *anon_vma;
837 struct vm_area_struct *near; 885 struct vm_area_struct *near;
838 unsigned long vm_flags;
839 886
840 near = vma->vm_next; 887 near = vma->vm_next;
841 if (!near) 888 if (!near)
842 goto try_prev; 889 goto try_prev;
843 890
844 /* 891 anon_vma = reusable_anon_vma(near, vma, near);
845 * Since only mprotect tries to remerge vmas, match flags 892 if (anon_vma)
846 * which might be mprotected into each other later on. 893 return anon_vma;
847 * Neither mlock nor madvise tries to remerge at present,
848 * so leave their flags as obstructing a merge.
849 */
850 vm_flags = vma->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC);
851 vm_flags |= near->vm_flags & (VM_READ|VM_WRITE|VM_EXEC);
852
853 if (near->anon_vma && vma->vm_end == near->vm_start &&
854 mpol_equal(vma_policy(vma), vma_policy(near)) &&
855 can_vma_merge_before(near, vm_flags,
856 NULL, vma->vm_file, vma->vm_pgoff +
857 ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT)))
858 return near->anon_vma;
859try_prev: 894try_prev:
860 /* 895 /*
861 * It is potentially slow to have to call find_vma_prev here. 896 * It is potentially slow to have to call find_vma_prev here.
@@ -868,14 +903,9 @@ try_prev:
868 if (!near) 903 if (!near)
869 goto none; 904 goto none;
870 905
871 vm_flags = vma->vm_flags & ~(VM_READ|VM_WRITE|VM_EXEC); 906 anon_vma = reusable_anon_vma(near, near, vma);
872 vm_flags |= near->vm_flags & (VM_READ|VM_WRITE|VM_EXEC); 907 if (anon_vma)
873 908 return anon_vma;
874 if (near->anon_vma && near->vm_end == vma->vm_start &&
875 mpol_equal(vma_policy(near), vma_policy(vma)) &&
876 can_vma_merge_after(near, vm_flags,
877 NULL, vma->vm_file, vma->vm_pgoff))
878 return near->anon_vma;
879none: 909none:
880 /* 910 /*
881 * There's no absolute need to look only at touching neighbours: 911 * There's no absolute need to look only at touching neighbours:
diff --git a/mm/mmu_context.c b/mm/mmu_context.c
index 0777654147c9..9e82e937000e 100644
--- a/mm/mmu_context.c
+++ b/mm/mmu_context.c
@@ -53,6 +53,7 @@ void unuse_mm(struct mm_struct *mm)
53 struct task_struct *tsk = current; 53 struct task_struct *tsk = current;
54 54
55 task_lock(tsk); 55 task_lock(tsk);
56 sync_mm_rss(tsk, mm);
56 tsk->mm = NULL; 57 tsk->mm = NULL;
57 /* active_mm is still 'mm' */ 58 /* active_mm is still 'mm' */
58 enter_lazy_tlb(mm, tsk); 59 enter_lazy_tlb(mm, tsk);
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 7e33f2cb3c77..438951d366f2 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -16,6 +16,7 @@
16#include <linux/err.h> 16#include <linux/err.h>
17#include <linux/rcupdate.h> 17#include <linux/rcupdate.h>
18#include <linux/sched.h> 18#include <linux/sched.h>
19#include <linux/slab.h>
19 20
20/* 21/*
21 * This function can't run concurrently against mmu_notifier_register 22 * This function can't run concurrently against mmu_notifier_register
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 8bc969d8112d..2d1bf7cf8851 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -10,7 +10,6 @@
10 10
11#include <linux/mm.h> 11#include <linux/mm.h>
12#include <linux/hugetlb.h> 12#include <linux/hugetlb.h>
13#include <linux/slab.h>
14#include <linux/shm.h> 13#include <linux/shm.h>
15#include <linux/mman.h> 14#include <linux/mman.h>
16#include <linux/fs.h> 15#include <linux/fs.h>
diff --git a/mm/mremap.c b/mm/mremap.c
index e9c75efce609..cde56ee51ef7 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -9,7 +9,6 @@
9 9
10#include <linux/mm.h> 10#include <linux/mm.h>
11#include <linux/hugetlb.h> 11#include <linux/hugetlb.h>
12#include <linux/slab.h>
13#include <linux/shm.h> 12#include <linux/shm.h>
14#include <linux/ksm.h> 13#include <linux/ksm.h>
15#include <linux/mman.h> 14#include <linux/mman.h>
diff --git a/mm/nommu.c b/mm/nommu.c
index 605ace8982a8..63fa17d121f0 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -146,7 +146,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
146 (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE); 146 (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
147 147
148 for (i = 0; i < nr_pages; i++) { 148 for (i = 0; i < nr_pages; i++) {
149 vma = find_extend_vma(mm, start); 149 vma = find_vma(mm, start);
150 if (!vma) 150 if (!vma)
151 goto finish_or_fault; 151 goto finish_or_fault;
152 152
@@ -162,7 +162,7 @@ int __get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
162 } 162 }
163 if (vmas) 163 if (vmas)
164 vmas[i] = vma; 164 vmas[i] = vma;
165 start += PAGE_SIZE; 165 start = (start + PAGE_SIZE) & PAGE_MASK;
166 } 166 }
167 167
168 return i; 168 return i;
@@ -764,7 +764,7 @@ EXPORT_SYMBOL(find_vma);
764 */ 764 */
765struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr) 765struct vm_area_struct *find_extend_vma(struct mm_struct *mm, unsigned long addr)
766{ 766{
767 return find_vma(mm, addr & PAGE_MASK); 767 return find_vma(mm, addr);
768} 768}
769 769
770/* 770/*
@@ -1040,10 +1040,9 @@ static int do_mmap_shared_file(struct vm_area_struct *vma)
1040 if (ret != -ENOSYS) 1040 if (ret != -ENOSYS)
1041 return ret; 1041 return ret;
1042 1042
1043 /* getting an ENOSYS error indicates that direct mmap isn't 1043 /* getting -ENOSYS indicates that direct mmap isn't possible (as
1044 * possible (as opposed to tried but failed) so we'll fall 1044 * opposed to tried but failed) so we can only give a suitable error as
1045 * through to making a private copy of the data and mapping 1045 * it's not possible to make a private copy if MAP_SHARED was given */
1046 * that if we can */
1047 return -ENODEV; 1046 return -ENODEV;
1048} 1047}
1049 1048
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 9b223af6a147..b68e802a7a7d 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -18,6 +18,7 @@
18#include <linux/oom.h> 18#include <linux/oom.h>
19#include <linux/mm.h> 19#include <linux/mm.h>
20#include <linux/err.h> 20#include <linux/err.h>
21#include <linux/gfp.h>
21#include <linux/sched.h> 22#include <linux/sched.h>
22#include <linux/swap.h> 23#include <linux/swap.h>
23#include <linux/timex.h> 24#include <linux/timex.h>
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index 3dd88539a0e6..6c0081441a32 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -284,6 +284,7 @@ static DEFINE_MUTEX(swap_cgroup_mutex);
284struct swap_cgroup_ctrl { 284struct swap_cgroup_ctrl {
285 struct page **map; 285 struct page **map;
286 unsigned long length; 286 unsigned long length;
287 spinlock_t lock;
287}; 288};
288 289
289struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES]; 290struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES];
@@ -353,16 +354,22 @@ unsigned short swap_cgroup_cmpxchg(swp_entry_t ent,
353 struct swap_cgroup_ctrl *ctrl; 354 struct swap_cgroup_ctrl *ctrl;
354 struct page *mappage; 355 struct page *mappage;
355 struct swap_cgroup *sc; 356 struct swap_cgroup *sc;
357 unsigned long flags;
358 unsigned short retval;
356 359
357 ctrl = &swap_cgroup_ctrl[type]; 360 ctrl = &swap_cgroup_ctrl[type];
358 361
359 mappage = ctrl->map[idx]; 362 mappage = ctrl->map[idx];
360 sc = page_address(mappage); 363 sc = page_address(mappage);
361 sc += pos; 364 sc += pos;
362 if (cmpxchg(&sc->id, old, new) == old) 365 spin_lock_irqsave(&ctrl->lock, flags);
363 return old; 366 retval = sc->id;
367 if (retval == old)
368 sc->id = new;
364 else 369 else
365 return 0; 370 retval = 0;
371 spin_unlock_irqrestore(&ctrl->lock, flags);
372 return retval;
366} 373}
367 374
368/** 375/**
@@ -383,13 +390,17 @@ unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
383 struct page *mappage; 390 struct page *mappage;
384 struct swap_cgroup *sc; 391 struct swap_cgroup *sc;
385 unsigned short old; 392 unsigned short old;
393 unsigned long flags;
386 394
387 ctrl = &swap_cgroup_ctrl[type]; 395 ctrl = &swap_cgroup_ctrl[type];
388 396
389 mappage = ctrl->map[idx]; 397 mappage = ctrl->map[idx];
390 sc = page_address(mappage); 398 sc = page_address(mappage);
391 sc += pos; 399 sc += pos;
392 old = xchg(&sc->id, id); 400 spin_lock_irqsave(&ctrl->lock, flags);
401 old = sc->id;
402 sc->id = id;
403 spin_unlock_irqrestore(&ctrl->lock, flags);
393 404
394 return old; 405 return old;
395} 406}
@@ -441,6 +452,7 @@ int swap_cgroup_swapon(int type, unsigned long max_pages)
441 mutex_lock(&swap_cgroup_mutex); 452 mutex_lock(&swap_cgroup_mutex);
442 ctrl->length = length; 453 ctrl->length = length;
443 ctrl->map = array; 454 ctrl->map = array;
455 spin_lock_init(&ctrl->lock);
444 if (swap_cgroup_prepare(type)) { 456 if (swap_cgroup_prepare(type)) {
445 /* memory shortage */ 457 /* memory shortage */
446 ctrl->map = NULL; 458 ctrl->map = NULL;
diff --git a/mm/page_io.c b/mm/page_io.c
index a19af956ee1b..31a3b962230a 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -12,6 +12,7 @@
12 12
13#include <linux/mm.h> 13#include <linux/mm.h>
14#include <linux/kernel_stat.h> 14#include <linux/kernel_stat.h>
15#include <linux/gfp.h>
15#include <linux/pagemap.h> 16#include <linux/pagemap.h>
16#include <linux/swap.h> 17#include <linux/swap.h>
17#include <linux/bio.h> 18#include <linux/bio.h>
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index 7b47a57b6646..8b1a2ce21ee5 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -80,6 +80,37 @@ static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end,
80 return err; 80 return err;
81} 81}
82 82
83#ifdef CONFIG_HUGETLB_PAGE
84static unsigned long hugetlb_entry_end(struct hstate *h, unsigned long addr,
85 unsigned long end)
86{
87 unsigned long boundary = (addr & huge_page_mask(h)) + huge_page_size(h);
88 return boundary < end ? boundary : end;
89}
90
91static int walk_hugetlb_range(struct vm_area_struct *vma,
92 unsigned long addr, unsigned long end,
93 struct mm_walk *walk)
94{
95 struct hstate *h = hstate_vma(vma);
96 unsigned long next;
97 unsigned long hmask = huge_page_mask(h);
98 pte_t *pte;
99 int err = 0;
100
101 do {
102 next = hugetlb_entry_end(h, addr, end);
103 pte = huge_pte_offset(walk->mm, addr & hmask);
104 if (pte && walk->hugetlb_entry)
105 err = walk->hugetlb_entry(pte, hmask, addr, next, walk);
106 if (err)
107 return err;
108 } while (addr = next, addr != end);
109
110 return 0;
111}
112#endif
113
83/** 114/**
84 * walk_page_range - walk a memory map's page tables with a callback 115 * walk_page_range - walk a memory map's page tables with a callback
85 * @mm: memory map to walk 116 * @mm: memory map to walk
@@ -128,20 +159,16 @@ int walk_page_range(unsigned long addr, unsigned long end,
128 vma = find_vma(walk->mm, addr); 159 vma = find_vma(walk->mm, addr);
129#ifdef CONFIG_HUGETLB_PAGE 160#ifdef CONFIG_HUGETLB_PAGE
130 if (vma && is_vm_hugetlb_page(vma)) { 161 if (vma && is_vm_hugetlb_page(vma)) {
131 pte_t *pte;
132 struct hstate *hs;
133
134 if (vma->vm_end < next) 162 if (vma->vm_end < next)
135 next = vma->vm_end; 163 next = vma->vm_end;
136 hs = hstate_vma(vma); 164 /*
137 pte = huge_pte_offset(walk->mm, 165 * Hugepage is very tightly coupled with vma, so
138 addr & huge_page_mask(hs)); 166 * walk through hugetlb entries within a given vma.
139 if (pte && !huge_pte_none(huge_ptep_get(pte)) 167 */
140 && walk->hugetlb_entry) 168 err = walk_hugetlb_range(vma, addr, next, walk);
141 err = walk->hugetlb_entry(pte, addr,
142 next, walk);
143 if (err) 169 if (err)
144 break; 170 break;
171 pgd = pgd_offset(walk->mm, next);
145 continue; 172 continue;
146 } 173 }
147#endif 174#endif
diff --git a/mm/percpu.c b/mm/percpu.c
index 768419d44ad7..6e09741ddc62 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1304,6 +1304,32 @@ void free_percpu(void __percpu *ptr)
1304EXPORT_SYMBOL_GPL(free_percpu); 1304EXPORT_SYMBOL_GPL(free_percpu);
1305 1305
1306/** 1306/**
1307 * is_kernel_percpu_address - test whether address is from static percpu area
1308 * @addr: address to test
1309 *
1310 * Test whether @addr belongs to in-kernel static percpu area. Module
1311 * static percpu areas are not considered. For those, use
1312 * is_module_percpu_address().
1313 *
1314 * RETURNS:
1315 * %true if @addr is from in-kernel static percpu area, %false otherwise.
1316 */
1317bool is_kernel_percpu_address(unsigned long addr)
1318{
1319 const size_t static_size = __per_cpu_end - __per_cpu_start;
1320 void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr);
1321 unsigned int cpu;
1322
1323 for_each_possible_cpu(cpu) {
1324 void *start = per_cpu_ptr(base, cpu);
1325
1326 if ((void *)addr >= start && (void *)addr < start + static_size)
1327 return true;
1328 }
1329 return false;
1330}
1331
1332/**
1307 * per_cpu_ptr_to_phys - convert translated percpu address to physical address 1333 * per_cpu_ptr_to_phys - convert translated percpu address to physical address
1308 * @addr: the address to be converted to physical address 1334 * @addr: the address to be converted to physical address
1309 * 1335 *
diff --git a/mm/percpu_up.c b/mm/percpu_up.c
new file mode 100644
index 000000000000..c4351c7f57d2
--- /dev/null
+++ b/mm/percpu_up.c
@@ -0,0 +1,30 @@
1/*
2 * mm/percpu_up.c - dummy percpu memory allocator implementation for UP
3 */
4
5#include <linux/module.h>
6#include <linux/percpu.h>
7#include <linux/slab.h>
8
9void __percpu *__alloc_percpu(size_t size, size_t align)
10{
11 /*
12 * Can't easily make larger alignment work with kmalloc. WARN
13 * on it. Larger alignment should only be used for module
14 * percpu sections on SMP for which this path isn't used.
15 */
16 WARN_ON_ONCE(align > SMP_CACHE_BYTES);
17 return kzalloc(size, GFP_KERNEL);
18}
19EXPORT_SYMBOL_GPL(__alloc_percpu);
20
21void free_percpu(void __percpu *p)
22{
23 kfree(p);
24}
25EXPORT_SYMBOL_GPL(free_percpu);
26
27phys_addr_t per_cpu_ptr_to_phys(void *addr)
28{
29 return __pa(addr);
30}
diff --git a/mm/quicklist.c b/mm/quicklist.c
index 6633965bb27b..2876349339a7 100644
--- a/mm/quicklist.c
+++ b/mm/quicklist.c
@@ -14,6 +14,7 @@
14 */ 14 */
15#include <linux/kernel.h> 15#include <linux/kernel.h>
16 16
17#include <linux/gfp.h>
17#include <linux/mm.h> 18#include <linux/mm.h>
18#include <linux/mmzone.h> 19#include <linux/mmzone.h>
19#include <linux/module.h> 20#include <linux/module.h>
diff --git a/mm/readahead.c b/mm/readahead.c
index 337b20e946f6..dfa9a1a03a11 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -9,6 +9,7 @@
9 9
10#include <linux/kernel.h> 10#include <linux/kernel.h>
11#include <linux/fs.h> 11#include <linux/fs.h>
12#include <linux/gfp.h>
12#include <linux/mm.h> 13#include <linux/mm.h>
13#include <linux/module.h> 14#include <linux/module.h>
14#include <linux/blkdev.h> 15#include <linux/blkdev.h>
@@ -502,7 +503,7 @@ void page_cache_sync_readahead(struct address_space *mapping,
502 return; 503 return;
503 504
504 /* be dumb */ 505 /* be dumb */
505 if (filp->f_mode & FMODE_RANDOM) { 506 if (filp && (filp->f_mode & FMODE_RANDOM)) {
506 force_page_cache_readahead(mapping, filp, offset, req_size); 507 force_page_cache_readahead(mapping, filp, offset, req_size);
507 return; 508 return;
508 } 509 }
diff --git a/mm/rmap.c b/mm/rmap.c
index fcd593c9c997..526704e8215d 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -182,7 +182,7 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
182{ 182{
183 struct anon_vma_chain *avc, *pavc; 183 struct anon_vma_chain *avc, *pavc;
184 184
185 list_for_each_entry(pavc, &src->anon_vma_chain, same_vma) { 185 list_for_each_entry_reverse(pavc, &src->anon_vma_chain, same_vma) {
186 avc = anon_vma_chain_alloc(); 186 avc = anon_vma_chain_alloc();
187 if (!avc) 187 if (!avc)
188 goto enomem_failure; 188 goto enomem_failure;
@@ -232,6 +232,7 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
232 out_error_free_anon_vma: 232 out_error_free_anon_vma:
233 anon_vma_free(anon_vma); 233 anon_vma_free(anon_vma);
234 out_error: 234 out_error:
235 unlink_anon_vmas(vma);
235 return -ENOMEM; 236 return -ENOMEM;
236} 237}
237 238
@@ -729,13 +730,29 @@ void page_move_anon_rmap(struct page *page,
729 * @page: the page to add the mapping to 730 * @page: the page to add the mapping to
730 * @vma: the vm area in which the mapping is added 731 * @vma: the vm area in which the mapping is added
731 * @address: the user virtual address mapped 732 * @address: the user virtual address mapped
733 * @exclusive: the page is exclusively owned by the current process
732 */ 734 */
733static void __page_set_anon_rmap(struct page *page, 735static void __page_set_anon_rmap(struct page *page,
734 struct vm_area_struct *vma, unsigned long address) 736 struct vm_area_struct *vma, unsigned long address, int exclusive)
735{ 737{
736 struct anon_vma *anon_vma = vma->anon_vma; 738 struct anon_vma *anon_vma = vma->anon_vma;
737 739
738 BUG_ON(!anon_vma); 740 BUG_ON(!anon_vma);
741
742 /*
743 * If the page isn't exclusively mapped into this vma,
744 * we must use the _oldest_ possible anon_vma for the
745 * page mapping!
746 *
747 * So take the last AVC chain entry in the vma, which is
748 * the deepest ancestor, and use the anon_vma from that.
749 */
750 if (!exclusive) {
751 struct anon_vma_chain *avc;
752 avc = list_entry(vma->anon_vma_chain.prev, struct anon_vma_chain, same_vma);
753 anon_vma = avc->anon_vma;
754 }
755
739 anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; 756 anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
740 page->mapping = (struct address_space *) anon_vma; 757 page->mapping = (struct address_space *) anon_vma;
741 page->index = linear_page_index(vma, address); 758 page->index = linear_page_index(vma, address);
@@ -790,7 +807,7 @@ void page_add_anon_rmap(struct page *page,
790 VM_BUG_ON(!PageLocked(page)); 807 VM_BUG_ON(!PageLocked(page));
791 VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end); 808 VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
792 if (first) 809 if (first)
793 __page_set_anon_rmap(page, vma, address); 810 __page_set_anon_rmap(page, vma, address, 0);
794 else 811 else
795 __page_check_anon_rmap(page, vma, address); 812 __page_check_anon_rmap(page, vma, address);
796} 813}
@@ -812,7 +829,7 @@ void page_add_new_anon_rmap(struct page *page,
812 SetPageSwapBacked(page); 829 SetPageSwapBacked(page);
813 atomic_set(&page->_mapcount, 0); /* increment count (starts at -1) */ 830 atomic_set(&page->_mapcount, 0); /* increment count (starts at -1) */
814 __inc_zone_page_state(page, NR_ANON_PAGES); 831 __inc_zone_page_state(page, NR_ANON_PAGES);
815 __page_set_anon_rmap(page, vma, address); 832 __page_set_anon_rmap(page, vma, address, 1);
816 if (page_evictable(page, vma)) 833 if (page_evictable(page, vma))
817 lru_cache_add_lru(page, LRU_ACTIVE_ANON); 834 lru_cache_add_lru(page, LRU_ACTIVE_ANON);
818 else 835 else
diff --git a/mm/slab.c b/mm/slab.c
index a9f325b28bed..bac0f4fcc216 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3602,21 +3602,10 @@ EXPORT_SYMBOL(kmem_cache_alloc_notrace);
3602 */ 3602 */
3603int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr) 3603int kmem_ptr_validate(struct kmem_cache *cachep, const void *ptr)
3604{ 3604{
3605 unsigned long addr = (unsigned long)ptr;
3606 unsigned long min_addr = PAGE_OFFSET;
3607 unsigned long align_mask = BYTES_PER_WORD - 1;
3608 unsigned long size = cachep->buffer_size; 3605 unsigned long size = cachep->buffer_size;
3609 struct page *page; 3606 struct page *page;
3610 3607
3611 if (unlikely(addr < min_addr)) 3608 if (unlikely(!kern_ptr_validate(ptr, size)))
3612 goto out;
3613 if (unlikely(addr > (unsigned long)high_memory - size))
3614 goto out;
3615 if (unlikely(addr & align_mask))
3616 goto out;
3617 if (unlikely(!kern_addr_valid(addr)))
3618 goto out;
3619 if (unlikely(!kern_addr_valid(addr + size - 1)))
3620 goto out; 3609 goto out;
3621 page = virt_to_page(ptr); 3610 page = virt_to_page(ptr);
3622 if (unlikely(!PageSlab(page))) 3611 if (unlikely(!PageSlab(page)))
diff --git a/mm/slub.c b/mm/slub.c
index b364844a1068..7d6c8b1ccf63 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -2386,6 +2386,9 @@ int kmem_ptr_validate(struct kmem_cache *s, const void *object)
2386{ 2386{
2387 struct page *page; 2387 struct page *page;
2388 2388
2389 if (!kern_ptr_validate(object, s->size))
2390 return 0;
2391
2389 page = get_object_page(object); 2392 page = get_object_page(object);
2390 2393
2391 if (!page || s != page->slab) 2394 if (!page || s != page->slab)
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 392b9bb5bc01..aa33fd67fa41 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -22,6 +22,7 @@
22#include <linux/bootmem.h> 22#include <linux/bootmem.h>
23#include <linux/highmem.h> 23#include <linux/highmem.h>
24#include <linux/module.h> 24#include <linux/module.h>
25#include <linux/slab.h>
25#include <linux/spinlock.h> 26#include <linux/spinlock.h>
26#include <linux/vmalloc.h> 27#include <linux/vmalloc.h>
27#include <linux/sched.h> 28#include <linux/sched.h>
diff --git a/mm/sparse.c b/mm/sparse.c
index 22896d589133..dc0cc4d43ff3 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -2,6 +2,7 @@
2 * sparse memory mappings. 2 * sparse memory mappings.
3 */ 3 */
4#include <linux/mm.h> 4#include <linux/mm.h>
5#include <linux/slab.h>
5#include <linux/mmzone.h> 6#include <linux/mmzone.h>
6#include <linux/bootmem.h> 7#include <linux/bootmem.h>
7#include <linux/highmem.h> 8#include <linux/highmem.h>
diff --git a/mm/swap.c b/mm/swap.c
index 9036b89813ac..7cd60bf0a972 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -30,6 +30,7 @@
30#include <linux/notifier.h> 30#include <linux/notifier.h>
31#include <linux/backing-dev.h> 31#include <linux/backing-dev.h>
32#include <linux/memcontrol.h> 32#include <linux/memcontrol.h>
33#include <linux/gfp.h>
33 34
34#include "internal.h" 35#include "internal.h"
35 36
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 6d1daeb1cb4a..e10f5833167f 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -8,6 +8,7 @@
8 */ 8 */
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/mm.h> 10#include <linux/mm.h>
11#include <linux/gfp.h>
11#include <linux/kernel_stat.h> 12#include <linux/kernel_stat.h>
12#include <linux/swap.h> 13#include <linux/swap.h>
13#include <linux/swapops.h> 14#include <linux/swapops.h>
diff --git a/mm/truncate.c b/mm/truncate.c
index e87e37244829..f42675a3615d 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -9,6 +9,7 @@
9 9
10#include <linux/kernel.h> 10#include <linux/kernel.h>
11#include <linux/backing-dev.h> 11#include <linux/backing-dev.h>
12#include <linux/gfp.h>
12#include <linux/mm.h> 13#include <linux/mm.h>
13#include <linux/swap.h> 14#include <linux/swap.h>
14#include <linux/module.h> 15#include <linux/module.h>
diff --git a/mm/util.c b/mm/util.c
index 834db7be240f..f5712e8964be 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -186,6 +186,27 @@ void kzfree(const void *p)
186} 186}
187EXPORT_SYMBOL(kzfree); 187EXPORT_SYMBOL(kzfree);
188 188
189int kern_ptr_validate(const void *ptr, unsigned long size)
190{
191 unsigned long addr = (unsigned long)ptr;
192 unsigned long min_addr = PAGE_OFFSET;
193 unsigned long align_mask = sizeof(void *) - 1;
194
195 if (unlikely(addr < min_addr))
196 goto out;
197 if (unlikely(addr > (unsigned long)high_memory - size))
198 goto out;
199 if (unlikely(addr & align_mask))
200 goto out;
201 if (unlikely(!kern_addr_valid(addr)))
202 goto out;
203 if (unlikely(!kern_addr_valid(addr + size - 1)))
204 goto out;
205 return 1;
206out:
207 return 0;
208}
209
189/* 210/*
190 * strndup_user - duplicate an existing string from user space 211 * strndup_user - duplicate an existing string from user space
191 * @s: The string to duplicate 212 * @s: The string to duplicate
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 79c809895fba..3ff3311447f5 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -13,7 +13,7 @@
13 13
14#include <linux/mm.h> 14#include <linux/mm.h>
15#include <linux/module.h> 15#include <linux/module.h>
16#include <linux/slab.h> 16#include <linux/gfp.h>
17#include <linux/kernel_stat.h> 17#include <linux/kernel_stat.h>
18#include <linux/swap.h> 18#include <linux/swap.h>
19#include <linux/pagemap.h> 19#include <linux/pagemap.h>
@@ -1535,13 +1535,6 @@ static void get_scan_ratio(struct zone *zone, struct scan_control *sc,
1535 unsigned long ap, fp; 1535 unsigned long ap, fp;
1536 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); 1536 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
1537 1537
1538 /* If we have no swap space, do not bother scanning anon pages. */
1539 if (!sc->may_swap || (nr_swap_pages <= 0)) {
1540 percent[0] = 0;
1541 percent[1] = 100;
1542 return;
1543 }
1544
1545 anon = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_ANON) + 1538 anon = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_ANON) +
1546 zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON); 1539 zone_nr_lru_pages(zone, sc, LRU_INACTIVE_ANON);
1547 file = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_FILE) + 1540 file = zone_nr_lru_pages(zone, sc, LRU_ACTIVE_FILE) +
@@ -1639,20 +1632,22 @@ static void shrink_zone(int priority, struct zone *zone,
1639 unsigned long nr_reclaimed = sc->nr_reclaimed; 1632 unsigned long nr_reclaimed = sc->nr_reclaimed;
1640 unsigned long nr_to_reclaim = sc->nr_to_reclaim; 1633 unsigned long nr_to_reclaim = sc->nr_to_reclaim;
1641 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc); 1634 struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(zone, sc);
1635 int noswap = 0;
1642 1636
1643 get_scan_ratio(zone, sc, percent); 1637 /* If we have no swap space, do not bother scanning anon pages. */
1638 if (!sc->may_swap || (nr_swap_pages <= 0)) {
1639 noswap = 1;
1640 percent[0] = 0;
1641 percent[1] = 100;
1642 } else
1643 get_scan_ratio(zone, sc, percent);
1644 1644
1645 for_each_evictable_lru(l) { 1645 for_each_evictable_lru(l) {
1646 int file = is_file_lru(l); 1646 int file = is_file_lru(l);
1647 unsigned long scan; 1647 unsigned long scan;
1648 1648
1649 if (percent[file] == 0) {
1650 nr[l] = 0;
1651 continue;
1652 }
1653
1654 scan = zone_nr_lru_pages(zone, sc, l); 1649 scan = zone_nr_lru_pages(zone, sc, l);
1655 if (priority) { 1650 if (priority || noswap) {
1656 scan >>= priority; 1651 scan >>= priority;
1657 scan = (scan * percent[file]) / 100; 1652 scan = (scan * percent[file]) / 100;
1658 } 1653 }
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 7f760cbc73f3..fa12ea3051fb 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -12,6 +12,7 @@
12#include <linux/mm.h> 12#include <linux/mm.h>
13#include <linux/err.h> 13#include <linux/err.h>
14#include <linux/module.h> 14#include <linux/module.h>
15#include <linux/slab.h>
15#include <linux/cpu.h> 16#include <linux/cpu.h>
16#include <linux/vmstat.h> 17#include <linux/vmstat.h>
17#include <linux/sched.h> 18#include <linux/sched.h>