aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorJ. Bruce Fields <bfields@redhat.com>2010-12-17 13:29:07 -0500
committerJ. Bruce Fields <bfields@redhat.com>2010-12-17 13:29:07 -0500
commitec66ee3797e5848356cf593c6ec7aabf30a00cf1 (patch)
tree7ed5c84cc914644ffa1cd1b6a2b45db53fc224e8 /mm
parent1205065764f2eda3216ebe213143f69891ee3460 (diff)
parentb0c3844d8af6b9f3f18f31e1b0502fbefa2166be (diff)
Merge commit 'v2.6.37-rc6' into for-2.6.38
Diffstat (limited to 'mm')
-rw-r--r--mm/filemap.c5
-rw-r--r--mm/hugetlb.c3
-rw-r--r--mm/ksm.c7
-rw-r--r--mm/memcontrol.c66
-rw-r--r--mm/memory-failure.c8
-rw-r--r--mm/memory_hotplug.c31
-rw-r--r--mm/mempolicy.c3
-rw-r--r--mm/mmap.c16
-rw-r--r--mm/nommu.c1
-rw-r--r--mm/page_alloc.c33
-rw-r--r--mm/pagewalk.c5
-rw-r--r--mm/slub.c4
-rw-r--r--mm/truncate.c4
-rw-r--r--mm/vmalloc.c28
-rw-r--r--mm/vmscan.c7
-rw-r--r--mm/vmstat.c4
16 files changed, 155 insertions, 70 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index ea89840fc65f..6b9aee20f242 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -143,13 +143,18 @@ void __remove_from_page_cache(struct page *page)
143void remove_from_page_cache(struct page *page) 143void remove_from_page_cache(struct page *page)
144{ 144{
145 struct address_space *mapping = page->mapping; 145 struct address_space *mapping = page->mapping;
146 void (*freepage)(struct page *);
146 147
147 BUG_ON(!PageLocked(page)); 148 BUG_ON(!PageLocked(page));
148 149
150 freepage = mapping->a_ops->freepage;
149 spin_lock_irq(&mapping->tree_lock); 151 spin_lock_irq(&mapping->tree_lock);
150 __remove_from_page_cache(page); 152 __remove_from_page_cache(page);
151 spin_unlock_irq(&mapping->tree_lock); 153 spin_unlock_irq(&mapping->tree_lock);
152 mem_cgroup_uncharge_cache_page(page); 154 mem_cgroup_uncharge_cache_page(page);
155
156 if (freepage)
157 freepage(page);
153} 158}
154EXPORT_SYMBOL(remove_from_page_cache); 159EXPORT_SYMBOL(remove_from_page_cache);
155 160
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index c4a3558589ab..85855240933d 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2738,7 +2738,8 @@ out_page_table_lock:
2738 unlock_page(pagecache_page); 2738 unlock_page(pagecache_page);
2739 put_page(pagecache_page); 2739 put_page(pagecache_page);
2740 } 2740 }
2741 unlock_page(page); 2741 if (page != pagecache_page)
2742 unlock_page(page);
2742 2743
2743out_mutex: 2744out_mutex:
2744 mutex_unlock(&hugetlb_instantiation_mutex); 2745 mutex_unlock(&hugetlb_instantiation_mutex);
diff --git a/mm/ksm.c b/mm/ksm.c
index 65ab5c7067d9..43bc893470b4 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1724,8 +1724,13 @@ static int ksm_memory_callback(struct notifier_block *self,
1724 /* 1724 /*
1725 * Keep it very simple for now: just lock out ksmd and 1725 * Keep it very simple for now: just lock out ksmd and
1726 * MADV_UNMERGEABLE while any memory is going offline. 1726 * MADV_UNMERGEABLE while any memory is going offline.
1727 * mutex_lock_nested() is necessary because lockdep was alarmed
1728 * that here we take ksm_thread_mutex inside notifier chain
1729 * mutex, and later take notifier chain mutex inside
1730 * ksm_thread_mutex to unlock it. But that's safe because both
1731 * are inside mem_hotplug_mutex.
1727 */ 1732 */
1728 mutex_lock(&ksm_thread_mutex); 1733 mutex_lock_nested(&ksm_thread_mutex, SINGLE_DEPTH_NESTING);
1729 break; 1734 break;
1730 1735
1731 case MEM_OFFLINE: 1736 case MEM_OFFLINE:
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 2efa8ea07ff7..7a22b4129211 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -61,7 +61,14 @@ struct mem_cgroup *root_mem_cgroup __read_mostly;
61#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP 61#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
62/* Turned on only when memory cgroup is enabled && really_do_swap_account = 1 */ 62/* Turned on only when memory cgroup is enabled && really_do_swap_account = 1 */
63int do_swap_account __read_mostly; 63int do_swap_account __read_mostly;
64static int really_do_swap_account __initdata = 1; /* for remember boot option*/ 64
65/* for remember boot option*/
66#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP_ENABLED
67static int really_do_swap_account __initdata = 1;
68#else
69static int really_do_swap_account __initdata = 0;
70#endif
71
65#else 72#else
66#define do_swap_account (0) 73#define do_swap_account (0)
67#endif 74#endif
@@ -278,13 +285,14 @@ enum move_type {
278 285
279/* "mc" and its members are protected by cgroup_mutex */ 286/* "mc" and its members are protected by cgroup_mutex */
280static struct move_charge_struct { 287static struct move_charge_struct {
281 spinlock_t lock; /* for from, to, moving_task */ 288 spinlock_t lock; /* for from, to */
282 struct mem_cgroup *from; 289 struct mem_cgroup *from;
283 struct mem_cgroup *to; 290 struct mem_cgroup *to;
284 unsigned long precharge; 291 unsigned long precharge;
285 unsigned long moved_charge; 292 unsigned long moved_charge;
286 unsigned long moved_swap; 293 unsigned long moved_swap;
287 struct task_struct *moving_task; /* a task moving charges */ 294 struct task_struct *moving_task; /* a task moving charges */
295 struct mm_struct *mm;
288 wait_queue_head_t waitq; /* a waitq for other context */ 296 wait_queue_head_t waitq; /* a waitq for other context */
289} mc = { 297} mc = {
290 .lock = __SPIN_LOCK_UNLOCKED(mc.lock), 298 .lock = __SPIN_LOCK_UNLOCKED(mc.lock),
@@ -2152,7 +2160,7 @@ static void __mem_cgroup_move_account(struct page_cgroup *pc,
2152{ 2160{
2153 VM_BUG_ON(from == to); 2161 VM_BUG_ON(from == to);
2154 VM_BUG_ON(PageLRU(pc->page)); 2162 VM_BUG_ON(PageLRU(pc->page));
2155 VM_BUG_ON(!PageCgroupLocked(pc)); 2163 VM_BUG_ON(!page_is_cgroup_locked(pc));
2156 VM_BUG_ON(!PageCgroupUsed(pc)); 2164 VM_BUG_ON(!PageCgroupUsed(pc));
2157 VM_BUG_ON(pc->mem_cgroup != from); 2165 VM_BUG_ON(pc->mem_cgroup != from);
2158 2166
@@ -4631,7 +4639,7 @@ static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm)
4631 unsigned long precharge; 4639 unsigned long precharge;
4632 struct vm_area_struct *vma; 4640 struct vm_area_struct *vma;
4633 4641
4634 down_read(&mm->mmap_sem); 4642 /* We've already held the mmap_sem */
4635 for (vma = mm->mmap; vma; vma = vma->vm_next) { 4643 for (vma = mm->mmap; vma; vma = vma->vm_next) {
4636 struct mm_walk mem_cgroup_count_precharge_walk = { 4644 struct mm_walk mem_cgroup_count_precharge_walk = {
4637 .pmd_entry = mem_cgroup_count_precharge_pte_range, 4645 .pmd_entry = mem_cgroup_count_precharge_pte_range,
@@ -4643,7 +4651,6 @@ static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm)
4643 walk_page_range(vma->vm_start, vma->vm_end, 4651 walk_page_range(vma->vm_start, vma->vm_end,
4644 &mem_cgroup_count_precharge_walk); 4652 &mem_cgroup_count_precharge_walk);
4645 } 4653 }
4646 up_read(&mm->mmap_sem);
4647 4654
4648 precharge = mc.precharge; 4655 precharge = mc.precharge;
4649 mc.precharge = 0; 4656 mc.precharge = 0;
@@ -4694,11 +4701,16 @@ static void mem_cgroup_clear_mc(void)
4694 4701
4695 mc.moved_swap = 0; 4702 mc.moved_swap = 0;
4696 } 4703 }
4704 if (mc.mm) {
4705 up_read(&mc.mm->mmap_sem);
4706 mmput(mc.mm);
4707 }
4697 spin_lock(&mc.lock); 4708 spin_lock(&mc.lock);
4698 mc.from = NULL; 4709 mc.from = NULL;
4699 mc.to = NULL; 4710 mc.to = NULL;
4700 mc.moving_task = NULL;
4701 spin_unlock(&mc.lock); 4711 spin_unlock(&mc.lock);
4712 mc.moving_task = NULL;
4713 mc.mm = NULL;
4702 mem_cgroup_end_move(from); 4714 mem_cgroup_end_move(from);
4703 memcg_oom_recover(from); 4715 memcg_oom_recover(from);
4704 memcg_oom_recover(to); 4716 memcg_oom_recover(to);
@@ -4724,12 +4736,21 @@ static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
4724 return 0; 4736 return 0;
4725 /* We move charges only when we move a owner of the mm */ 4737 /* We move charges only when we move a owner of the mm */
4726 if (mm->owner == p) { 4738 if (mm->owner == p) {
4739 /*
4740 * We do all the move charge works under one mmap_sem to
4741 * avoid deadlock with down_write(&mmap_sem)
4742 * -> try_charge() -> if (mc.moving_task) -> sleep.
4743 */
4744 down_read(&mm->mmap_sem);
4745
4727 VM_BUG_ON(mc.from); 4746 VM_BUG_ON(mc.from);
4728 VM_BUG_ON(mc.to); 4747 VM_BUG_ON(mc.to);
4729 VM_BUG_ON(mc.precharge); 4748 VM_BUG_ON(mc.precharge);
4730 VM_BUG_ON(mc.moved_charge); 4749 VM_BUG_ON(mc.moved_charge);
4731 VM_BUG_ON(mc.moved_swap); 4750 VM_BUG_ON(mc.moved_swap);
4732 VM_BUG_ON(mc.moving_task); 4751 VM_BUG_ON(mc.moving_task);
4752 VM_BUG_ON(mc.mm);
4753
4733 mem_cgroup_start_move(from); 4754 mem_cgroup_start_move(from);
4734 spin_lock(&mc.lock); 4755 spin_lock(&mc.lock);
4735 mc.from = from; 4756 mc.from = from;
@@ -4737,14 +4758,16 @@ static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
4737 mc.precharge = 0; 4758 mc.precharge = 0;
4738 mc.moved_charge = 0; 4759 mc.moved_charge = 0;
4739 mc.moved_swap = 0; 4760 mc.moved_swap = 0;
4740 mc.moving_task = current;
4741 spin_unlock(&mc.lock); 4761 spin_unlock(&mc.lock);
4762 mc.moving_task = current;
4763 mc.mm = mm;
4742 4764
4743 ret = mem_cgroup_precharge_mc(mm); 4765 ret = mem_cgroup_precharge_mc(mm);
4744 if (ret) 4766 if (ret)
4745 mem_cgroup_clear_mc(); 4767 mem_cgroup_clear_mc();
4746 } 4768 /* We call up_read() and mmput() in clear_mc(). */
4747 mmput(mm); 4769 } else
4770 mmput(mm);
4748 } 4771 }
4749 return ret; 4772 return ret;
4750} 4773}
@@ -4832,7 +4855,7 @@ static void mem_cgroup_move_charge(struct mm_struct *mm)
4832 struct vm_area_struct *vma; 4855 struct vm_area_struct *vma;
4833 4856
4834 lru_add_drain_all(); 4857 lru_add_drain_all();
4835 down_read(&mm->mmap_sem); 4858 /* We've already held the mmap_sem */
4836 for (vma = mm->mmap; vma; vma = vma->vm_next) { 4859 for (vma = mm->mmap; vma; vma = vma->vm_next) {
4837 int ret; 4860 int ret;
4838 struct mm_walk mem_cgroup_move_charge_walk = { 4861 struct mm_walk mem_cgroup_move_charge_walk = {
@@ -4851,7 +4874,6 @@ static void mem_cgroup_move_charge(struct mm_struct *mm)
4851 */ 4874 */
4852 break; 4875 break;
4853 } 4876 }
4854 up_read(&mm->mmap_sem);
4855} 4877}
4856 4878
4857static void mem_cgroup_move_task(struct cgroup_subsys *ss, 4879static void mem_cgroup_move_task(struct cgroup_subsys *ss,
@@ -4860,17 +4882,11 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss,
4860 struct task_struct *p, 4882 struct task_struct *p,
4861 bool threadgroup) 4883 bool threadgroup)
4862{ 4884{
4863 struct mm_struct *mm; 4885 if (!mc.mm)
4864
4865 if (!mc.to)
4866 /* no need to move charge */ 4886 /* no need to move charge */
4867 return; 4887 return;
4868 4888
4869 mm = get_task_mm(p); 4889 mem_cgroup_move_charge(mc.mm);
4870 if (mm) {
4871 mem_cgroup_move_charge(mm);
4872 mmput(mm);
4873 }
4874 mem_cgroup_clear_mc(); 4890 mem_cgroup_clear_mc();
4875} 4891}
4876#else /* !CONFIG_MMU */ 4892#else /* !CONFIG_MMU */
@@ -4911,10 +4927,20 @@ struct cgroup_subsys mem_cgroup_subsys = {
4911}; 4927};
4912 4928
4913#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP 4929#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
4930static int __init enable_swap_account(char *s)
4931{
4932 /* consider enabled if no parameter or 1 is given */
4933 if (!s || !strcmp(s, "1"))
4934 really_do_swap_account = 1;
4935 else if (!strcmp(s, "0"))
4936 really_do_swap_account = 0;
4937 return 1;
4938}
4939__setup("swapaccount", enable_swap_account);
4914 4940
4915static int __init disable_swap_account(char *s) 4941static int __init disable_swap_account(char *s)
4916{ 4942{
4917 really_do_swap_account = 0; 4943 enable_swap_account("0");
4918 return 1; 4944 return 1;
4919} 4945}
4920__setup("noswapaccount", disable_swap_account); 4946__setup("noswapaccount", disable_swap_account);
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 124324134ff6..46ab2c044b0e 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -51,6 +51,7 @@
51#include <linux/slab.h> 51#include <linux/slab.h>
52#include <linux/swapops.h> 52#include <linux/swapops.h>
53#include <linux/hugetlb.h> 53#include <linux/hugetlb.h>
54#include <linux/memory_hotplug.h>
54#include "internal.h" 55#include "internal.h"
55 56
56int sysctl_memory_failure_early_kill __read_mostly = 0; 57int sysctl_memory_failure_early_kill __read_mostly = 0;
@@ -1230,11 +1231,10 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags)
1230 return 1; 1231 return 1;
1231 1232
1232 /* 1233 /*
1233 * The lock_system_sleep prevents a race with memory hotplug, 1234 * The lock_memory_hotplug prevents a race with memory hotplug.
1234 * because the isolation assumes there's only a single user.
1235 * This is a big hammer, a better would be nicer. 1235 * This is a big hammer, a better would be nicer.
1236 */ 1236 */
1237 lock_system_sleep(); 1237 lock_memory_hotplug();
1238 1238
1239 /* 1239 /*
1240 * Isolate the page, so that it doesn't get reallocated if it 1240 * Isolate the page, so that it doesn't get reallocated if it
@@ -1264,7 +1264,7 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags)
1264 ret = 1; 1264 ret = 1;
1265 } 1265 }
1266 unset_migratetype_isolate(p); 1266 unset_migratetype_isolate(p);
1267 unlock_system_sleep(); 1267 unlock_memory_hotplug();
1268 return ret; 1268 return ret;
1269} 1269}
1270 1270
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 9260314a221e..2c6523af5473 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -34,6 +34,23 @@
34 34
35#include "internal.h" 35#include "internal.h"
36 36
37DEFINE_MUTEX(mem_hotplug_mutex);
38
39void lock_memory_hotplug(void)
40{
41 mutex_lock(&mem_hotplug_mutex);
42
43 /* for exclusive hibernation if CONFIG_HIBERNATION=y */
44 lock_system_sleep();
45}
46
47void unlock_memory_hotplug(void)
48{
49 unlock_system_sleep();
50 mutex_unlock(&mem_hotplug_mutex);
51}
52
53
37/* add this memory to iomem resource */ 54/* add this memory to iomem resource */
38static struct resource *register_memory_resource(u64 start, u64 size) 55static struct resource *register_memory_resource(u64 start, u64 size)
39{ 56{
@@ -493,7 +510,7 @@ int mem_online_node(int nid)
493 pg_data_t *pgdat; 510 pg_data_t *pgdat;
494 int ret; 511 int ret;
495 512
496 lock_system_sleep(); 513 lock_memory_hotplug();
497 pgdat = hotadd_new_pgdat(nid, 0); 514 pgdat = hotadd_new_pgdat(nid, 0);
498 if (pgdat) { 515 if (pgdat) {
499 ret = -ENOMEM; 516 ret = -ENOMEM;
@@ -504,7 +521,7 @@ int mem_online_node(int nid)
504 BUG_ON(ret); 521 BUG_ON(ret);
505 522
506out: 523out:
507 unlock_system_sleep(); 524 unlock_memory_hotplug();
508 return ret; 525 return ret;
509} 526}
510 527
@@ -516,7 +533,7 @@ int __ref add_memory(int nid, u64 start, u64 size)
516 struct resource *res; 533 struct resource *res;
517 int ret; 534 int ret;
518 535
519 lock_system_sleep(); 536 lock_memory_hotplug();
520 537
521 res = register_memory_resource(start, size); 538 res = register_memory_resource(start, size);
522 ret = -EEXIST; 539 ret = -EEXIST;
@@ -563,7 +580,7 @@ error:
563 release_memory_resource(res); 580 release_memory_resource(res);
564 581
565out: 582out:
566 unlock_system_sleep(); 583 unlock_memory_hotplug();
567 return ret; 584 return ret;
568} 585}
569EXPORT_SYMBOL_GPL(add_memory); 586EXPORT_SYMBOL_GPL(add_memory);
@@ -791,7 +808,7 @@ static int offline_pages(unsigned long start_pfn,
791 if (!test_pages_in_a_zone(start_pfn, end_pfn)) 808 if (!test_pages_in_a_zone(start_pfn, end_pfn))
792 return -EINVAL; 809 return -EINVAL;
793 810
794 lock_system_sleep(); 811 lock_memory_hotplug();
795 812
796 zone = page_zone(pfn_to_page(start_pfn)); 813 zone = page_zone(pfn_to_page(start_pfn));
797 node = zone_to_nid(zone); 814 node = zone_to_nid(zone);
@@ -880,7 +897,7 @@ repeat:
880 writeback_set_ratelimit(); 897 writeback_set_ratelimit();
881 898
882 memory_notify(MEM_OFFLINE, &arg); 899 memory_notify(MEM_OFFLINE, &arg);
883 unlock_system_sleep(); 900 unlock_memory_hotplug();
884 return 0; 901 return 0;
885 902
886failed_removal: 903failed_removal:
@@ -891,7 +908,7 @@ failed_removal:
891 undo_isolate_page_range(start_pfn, end_pfn); 908 undo_isolate_page_range(start_pfn, end_pfn);
892 909
893out: 910out:
894 unlock_system_sleep(); 911 unlock_memory_hotplug();
895 return ret; 912 return ret;
896} 913}
897 914
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 4a57f135b76e..11ff260fb282 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1307,15 +1307,18 @@ SYSCALL_DEFINE4(migrate_pages, pid_t, pid, unsigned long, maxnode,
1307 goto out; 1307 goto out;
1308 1308
1309 /* Find the mm_struct */ 1309 /* Find the mm_struct */
1310 rcu_read_lock();
1310 read_lock(&tasklist_lock); 1311 read_lock(&tasklist_lock);
1311 task = pid ? find_task_by_vpid(pid) : current; 1312 task = pid ? find_task_by_vpid(pid) : current;
1312 if (!task) { 1313 if (!task) {
1313 read_unlock(&tasklist_lock); 1314 read_unlock(&tasklist_lock);
1315 rcu_read_unlock();
1314 err = -ESRCH; 1316 err = -ESRCH;
1315 goto out; 1317 goto out;
1316 } 1318 }
1317 mm = get_task_mm(task); 1319 mm = get_task_mm(task);
1318 read_unlock(&tasklist_lock); 1320 read_unlock(&tasklist_lock);
1321 rcu_read_unlock();
1319 1322
1320 err = -EINVAL; 1323 err = -EINVAL;
1321 if (!mm) 1324 if (!mm)
diff --git a/mm/mmap.c b/mm/mmap.c
index b179abb1474a..50a4aa0255a0 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2462,6 +2462,7 @@ int install_special_mapping(struct mm_struct *mm,
2462 unsigned long addr, unsigned long len, 2462 unsigned long addr, unsigned long len,
2463 unsigned long vm_flags, struct page **pages) 2463 unsigned long vm_flags, struct page **pages)
2464{ 2464{
2465 int ret;
2465 struct vm_area_struct *vma; 2466 struct vm_area_struct *vma;
2466 2467
2467 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); 2468 vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL);
@@ -2479,16 +2480,23 @@ int install_special_mapping(struct mm_struct *mm,
2479 vma->vm_ops = &special_mapping_vmops; 2480 vma->vm_ops = &special_mapping_vmops;
2480 vma->vm_private_data = pages; 2481 vma->vm_private_data = pages;
2481 2482
2482 if (unlikely(insert_vm_struct(mm, vma))) { 2483 ret = security_file_mmap(NULL, 0, 0, 0, vma->vm_start, 1);
2483 kmem_cache_free(vm_area_cachep, vma); 2484 if (ret)
2484 return -ENOMEM; 2485 goto out;
2485 } 2486
2487 ret = insert_vm_struct(mm, vma);
2488 if (ret)
2489 goto out;
2486 2490
2487 mm->total_vm += len >> PAGE_SHIFT; 2491 mm->total_vm += len >> PAGE_SHIFT;
2488 2492
2489 perf_event_mmap(vma); 2493 perf_event_mmap(vma);
2490 2494
2491 return 0; 2495 return 0;
2496
2497out:
2498 kmem_cache_free(vm_area_cachep, vma);
2499 return ret;
2492} 2500}
2493 2501
2494static DEFINE_MUTEX(mm_all_locks_mutex); 2502static DEFINE_MUTEX(mm_all_locks_mutex);
diff --git a/mm/nommu.c b/mm/nommu.c
index 3613517c7592..27a9ac588516 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1717,6 +1717,7 @@ void exit_mmap(struct mm_struct *mm)
1717 mm->mmap = vma->vm_next; 1717 mm->mmap = vma->vm_next;
1718 delete_vma_from_mm(vma); 1718 delete_vma_from_mm(vma);
1719 delete_vma(mm, vma); 1719 delete_vma(mm, vma);
1720 cond_resched();
1720 } 1721 }
1721 1722
1722 kleave(""); 1723 kleave("");
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 07a654486f75..ff7e15872398 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -104,19 +104,24 @@ gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK;
104 * only be modified with pm_mutex held, unless the suspend/hibernate code is 104 * only be modified with pm_mutex held, unless the suspend/hibernate code is
105 * guaranteed not to run in parallel with that modification). 105 * guaranteed not to run in parallel with that modification).
106 */ 106 */
107void set_gfp_allowed_mask(gfp_t mask) 107
108static gfp_t saved_gfp_mask;
109
110void pm_restore_gfp_mask(void)
108{ 111{
109 WARN_ON(!mutex_is_locked(&pm_mutex)); 112 WARN_ON(!mutex_is_locked(&pm_mutex));
110 gfp_allowed_mask = mask; 113 if (saved_gfp_mask) {
114 gfp_allowed_mask = saved_gfp_mask;
115 saved_gfp_mask = 0;
116 }
111} 117}
112 118
113gfp_t clear_gfp_allowed_mask(gfp_t mask) 119void pm_restrict_gfp_mask(void)
114{ 120{
115 gfp_t ret = gfp_allowed_mask;
116
117 WARN_ON(!mutex_is_locked(&pm_mutex)); 121 WARN_ON(!mutex_is_locked(&pm_mutex));
118 gfp_allowed_mask &= ~mask; 122 WARN_ON(saved_gfp_mask);
119 return ret; 123 saved_gfp_mask = gfp_allowed_mask;
124 gfp_allowed_mask &= ~GFP_IOFS;
120} 125}
121#endif /* CONFIG_PM_SLEEP */ 126#endif /* CONFIG_PM_SLEEP */
122 127
@@ -3008,14 +3013,6 @@ static __init_refok int __build_all_zonelists(void *data)
3008 build_zonelist_cache(pgdat); 3013 build_zonelist_cache(pgdat);
3009 } 3014 }
3010 3015
3011#ifdef CONFIG_MEMORY_HOTPLUG
3012 /* Setup real pagesets for the new zone */
3013 if (data) {
3014 struct zone *zone = data;
3015 setup_zone_pageset(zone);
3016 }
3017#endif
3018
3019 /* 3016 /*
3020 * Initialize the boot_pagesets that are going to be used 3017 * Initialize the boot_pagesets that are going to be used
3021 * for bootstrapping processors. The real pagesets for 3018 * for bootstrapping processors. The real pagesets for
@@ -3064,7 +3061,11 @@ void build_all_zonelists(void *data)
3064 } else { 3061 } else {
3065 /* we have to stop all cpus to guarantee there is no user 3062 /* we have to stop all cpus to guarantee there is no user
3066 of zonelist */ 3063 of zonelist */
3067 stop_machine(__build_all_zonelists, data, NULL); 3064#ifdef CONFIG_MEMORY_HOTPLUG
3065 if (data)
3066 setup_zone_pageset((struct zone *)data);
3067#endif
3068 stop_machine(__build_all_zonelists, NULL, NULL);
3068 /* cpuset refresh routine should be here */ 3069 /* cpuset refresh routine should be here */
3069 } 3070 }
3070 vm_total_pages = nr_free_pagecache_pages(); 3071 vm_total_pages = nr_free_pagecache_pages();
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index 8b1a2ce21ee5..38cc58b8b2b0 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -139,7 +139,6 @@ int walk_page_range(unsigned long addr, unsigned long end,
139 pgd_t *pgd; 139 pgd_t *pgd;
140 unsigned long next; 140 unsigned long next;
141 int err = 0; 141 int err = 0;
142 struct vm_area_struct *vma;
143 142
144 if (addr >= end) 143 if (addr >= end)
145 return err; 144 return err;
@@ -149,15 +148,17 @@ int walk_page_range(unsigned long addr, unsigned long end,
149 148
150 pgd = pgd_offset(walk->mm, addr); 149 pgd = pgd_offset(walk->mm, addr);
151 do { 150 do {
151 struct vm_area_struct *uninitialized_var(vma);
152
152 next = pgd_addr_end(addr, end); 153 next = pgd_addr_end(addr, end);
153 154
155#ifdef CONFIG_HUGETLB_PAGE
154 /* 156 /*
155 * handle hugetlb vma individually because pagetable walk for 157 * handle hugetlb vma individually because pagetable walk for
156 * the hugetlb page is dependent on the architecture and 158 * the hugetlb page is dependent on the architecture and
157 * we can't handled it in the same manner as non-huge pages. 159 * we can't handled it in the same manner as non-huge pages.
158 */ 160 */
159 vma = find_vma(walk->mm, addr); 161 vma = find_vma(walk->mm, addr);
160#ifdef CONFIG_HUGETLB_PAGE
161 if (vma && is_vm_hugetlb_page(vma)) { 162 if (vma && is_vm_hugetlb_page(vma)) {
162 if (vma->vm_end < next) 163 if (vma->vm_end < next)
163 next = vma->vm_end; 164 next = vma->vm_end;
diff --git a/mm/slub.c b/mm/slub.c
index 981fb730aa04..bec0e355fbad 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3401,13 +3401,13 @@ static int validate_slab(struct kmem_cache *s, struct page *page,
3401 3401
3402 for_each_free_object(p, s, page->freelist) { 3402 for_each_free_object(p, s, page->freelist) {
3403 set_bit(slab_index(p, s, addr), map); 3403 set_bit(slab_index(p, s, addr), map);
3404 if (!check_object(s, page, p, 0)) 3404 if (!check_object(s, page, p, SLUB_RED_INACTIVE))
3405 return 0; 3405 return 0;
3406 } 3406 }
3407 3407
3408 for_each_object(p, s, addr, page->objects) 3408 for_each_object(p, s, addr, page->objects)
3409 if (!test_bit(slab_index(p, s, addr), map)) 3409 if (!test_bit(slab_index(p, s, addr), map))
3410 if (!check_object(s, page, p, 1)) 3410 if (!check_object(s, page, p, SLUB_RED_ACTIVE))
3411 return 0; 3411 return 0;
3412 return 1; 3412 return 1;
3413} 3413}
diff --git a/mm/truncate.c b/mm/truncate.c
index ba887bff48c5..3c2d5ddfa0d4 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -390,6 +390,10 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
390 __remove_from_page_cache(page); 390 __remove_from_page_cache(page);
391 spin_unlock_irq(&mapping->tree_lock); 391 spin_unlock_irq(&mapping->tree_lock);
392 mem_cgroup_uncharge_cache_page(page); 392 mem_cgroup_uncharge_cache_page(page);
393
394 if (mapping->a_ops->freepage)
395 mapping->a_ops->freepage(page);
396
393 page_cache_release(page); /* pagecache ref */ 397 page_cache_release(page); /* pagecache ref */
394 return 1; 398 return 1;
395failed: 399failed:
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index a3d66b3dc5cb..eb5cc7d00c5a 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -31,8 +31,6 @@
31#include <asm/tlbflush.h> 31#include <asm/tlbflush.h>
32#include <asm/shmparam.h> 32#include <asm/shmparam.h>
33 33
34bool vmap_lazy_unmap __read_mostly = true;
35
36/*** Page table manipulation functions ***/ 34/*** Page table manipulation functions ***/
37 35
38static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end) 36static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end)
@@ -503,9 +501,6 @@ static unsigned long lazy_max_pages(void)
503{ 501{
504 unsigned int log; 502 unsigned int log;
505 503
506 if (!vmap_lazy_unmap)
507 return 0;
508
509 log = fls(num_online_cpus()); 504 log = fls(num_online_cpus());
510 505
511 return log * (32UL * 1024 * 1024 / PAGE_SIZE); 506 return log * (32UL * 1024 * 1024 / PAGE_SIZE);
@@ -566,7 +561,6 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end,
566 if (va->va_end > *end) 561 if (va->va_end > *end)
567 *end = va->va_end; 562 *end = va->va_end;
568 nr += (va->va_end - va->va_start) >> PAGE_SHIFT; 563 nr += (va->va_end - va->va_start) >> PAGE_SHIFT;
569 unmap_vmap_area(va);
570 list_add_tail(&va->purge_list, &valist); 564 list_add_tail(&va->purge_list, &valist);
571 va->flags |= VM_LAZY_FREEING; 565 va->flags |= VM_LAZY_FREEING;
572 va->flags &= ~VM_LAZY_FREE; 566 va->flags &= ~VM_LAZY_FREE;
@@ -611,10 +605,11 @@ static void purge_vmap_area_lazy(void)
611} 605}
612 606
613/* 607/*
614 * Free and unmap a vmap area, caller ensuring flush_cache_vunmap had been 608 * Free a vmap area, caller ensuring that the area has been unmapped
615 * called for the correct range previously. 609 * and flush_cache_vunmap had been called for the correct range
610 * previously.
616 */ 611 */
617static void free_unmap_vmap_area_noflush(struct vmap_area *va) 612static void free_vmap_area_noflush(struct vmap_area *va)
618{ 613{
619 va->flags |= VM_LAZY_FREE; 614 va->flags |= VM_LAZY_FREE;
620 atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr); 615 atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr);
@@ -623,6 +618,16 @@ static void free_unmap_vmap_area_noflush(struct vmap_area *va)
623} 618}
624 619
625/* 620/*
621 * Free and unmap a vmap area, caller ensuring flush_cache_vunmap had been
622 * called for the correct range previously.
623 */
624static void free_unmap_vmap_area_noflush(struct vmap_area *va)
625{
626 unmap_vmap_area(va);
627 free_vmap_area_noflush(va);
628}
629
630/*
626 * Free and unmap a vmap area 631 * Free and unmap a vmap area
627 */ 632 */
628static void free_unmap_vmap_area(struct vmap_area *va) 633static void free_unmap_vmap_area(struct vmap_area *va)
@@ -798,7 +803,7 @@ static void free_vmap_block(struct vmap_block *vb)
798 spin_unlock(&vmap_block_tree_lock); 803 spin_unlock(&vmap_block_tree_lock);
799 BUG_ON(tmp != vb); 804 BUG_ON(tmp != vb);
800 805
801 free_unmap_vmap_area_noflush(vb->va); 806 free_vmap_area_noflush(vb->va);
802 call_rcu(&vb->rcu_head, rcu_free_vb); 807 call_rcu(&vb->rcu_head, rcu_free_vb);
803} 808}
804 809
@@ -936,6 +941,8 @@ static void vb_free(const void *addr, unsigned long size)
936 rcu_read_unlock(); 941 rcu_read_unlock();
937 BUG_ON(!vb); 942 BUG_ON(!vb);
938 943
944 vunmap_page_range((unsigned long)addr, (unsigned long)addr + size);
945
939 spin_lock(&vb->lock); 946 spin_lock(&vb->lock);
940 BUG_ON(bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order)); 947 BUG_ON(bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order));
941 948
@@ -988,7 +995,6 @@ void vm_unmap_aliases(void)
988 995
989 s = vb->va->va_start + (i << PAGE_SHIFT); 996 s = vb->va->va_start + (i << PAGE_SHIFT);
990 e = vb->va->va_start + (j << PAGE_SHIFT); 997 e = vb->va->va_start + (j << PAGE_SHIFT);
991 vunmap_page_range(s, e);
992 flush = 1; 998 flush = 1;
993 999
994 if (s < start) 1000 if (s < start)
diff --git a/mm/vmscan.c b/mm/vmscan.c
index d31d7ce52c0e..9ca587c69274 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -494,9 +494,16 @@ static int __remove_mapping(struct address_space *mapping, struct page *page)
494 spin_unlock_irq(&mapping->tree_lock); 494 spin_unlock_irq(&mapping->tree_lock);
495 swapcache_free(swap, page); 495 swapcache_free(swap, page);
496 } else { 496 } else {
497 void (*freepage)(struct page *);
498
499 freepage = mapping->a_ops->freepage;
500
497 __remove_from_page_cache(page); 501 __remove_from_page_cache(page);
498 spin_unlock_irq(&mapping->tree_lock); 502 spin_unlock_irq(&mapping->tree_lock);
499 mem_cgroup_uncharge_cache_page(page); 503 mem_cgroup_uncharge_cache_page(page);
504
505 if (freepage != NULL)
506 freepage(page);
500 } 507 }
501 508
502 return 1; 509 return 1;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 42eac4d33216..8f62f17ee1c7 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -750,8 +750,6 @@ static const char * const vmstat_text[] = {
750 "nr_shmem", 750 "nr_shmem",
751 "nr_dirtied", 751 "nr_dirtied",
752 "nr_written", 752 "nr_written",
753 "nr_dirty_threshold",
754 "nr_dirty_background_threshold",
755 753
756#ifdef CONFIG_NUMA 754#ifdef CONFIG_NUMA
757 "numa_hit", 755 "numa_hit",
@@ -761,6 +759,8 @@ static const char * const vmstat_text[] = {
761 "numa_local", 759 "numa_local",
762 "numa_other", 760 "numa_other",
763#endif 761#endif
762 "nr_dirty_threshold",
763 "nr_dirty_background_threshold",
764 764
765#ifdef CONFIG_VM_EVENT_COUNTERS 765#ifdef CONFIG_VM_EVENT_COUNTERS
766 "pgpgin", 766 "pgpgin",