diff options
author | J. Bruce Fields <bfields@redhat.com> | 2010-12-17 13:29:07 -0500 |
---|---|---|
committer | J. Bruce Fields <bfields@redhat.com> | 2010-12-17 13:29:07 -0500 |
commit | ec66ee3797e5848356cf593c6ec7aabf30a00cf1 (patch) | |
tree | 7ed5c84cc914644ffa1cd1b6a2b45db53fc224e8 /mm | |
parent | 1205065764f2eda3216ebe213143f69891ee3460 (diff) | |
parent | b0c3844d8af6b9f3f18f31e1b0502fbefa2166be (diff) |
Merge commit 'v2.6.37-rc6' into for-2.6.38
Diffstat (limited to 'mm')
-rw-r--r-- | mm/filemap.c | 5 | ||||
-rw-r--r-- | mm/hugetlb.c | 3 | ||||
-rw-r--r-- | mm/ksm.c | 7 | ||||
-rw-r--r-- | mm/memcontrol.c | 66 | ||||
-rw-r--r-- | mm/memory-failure.c | 8 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 31 | ||||
-rw-r--r-- | mm/mempolicy.c | 3 | ||||
-rw-r--r-- | mm/mmap.c | 16 | ||||
-rw-r--r-- | mm/nommu.c | 1 | ||||
-rw-r--r-- | mm/page_alloc.c | 33 | ||||
-rw-r--r-- | mm/pagewalk.c | 5 | ||||
-rw-r--r-- | mm/slub.c | 4 | ||||
-rw-r--r-- | mm/truncate.c | 4 | ||||
-rw-r--r-- | mm/vmalloc.c | 28 | ||||
-rw-r--r-- | mm/vmscan.c | 7 | ||||
-rw-r--r-- | mm/vmstat.c | 4 |
16 files changed, 155 insertions, 70 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index ea89840fc65f..6b9aee20f242 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -143,13 +143,18 @@ void __remove_from_page_cache(struct page *page) | |||
143 | void remove_from_page_cache(struct page *page) | 143 | void remove_from_page_cache(struct page *page) |
144 | { | 144 | { |
145 | struct address_space *mapping = page->mapping; | 145 | struct address_space *mapping = page->mapping; |
146 | void (*freepage)(struct page *); | ||
146 | 147 | ||
147 | BUG_ON(!PageLocked(page)); | 148 | BUG_ON(!PageLocked(page)); |
148 | 149 | ||
150 | freepage = mapping->a_ops->freepage; | ||
149 | spin_lock_irq(&mapping->tree_lock); | 151 | spin_lock_irq(&mapping->tree_lock); |
150 | __remove_from_page_cache(page); | 152 | __remove_from_page_cache(page); |
151 | spin_unlock_irq(&mapping->tree_lock); | 153 | spin_unlock_irq(&mapping->tree_lock); |
152 | mem_cgroup_uncharge_cache_page(page); | 154 | mem_cgroup_uncharge_cache_page(page); |
155 | |||
156 | if (freepage) | ||
157 | freepage(page); | ||
153 | } | 158 | } |
154 | EXPORT_SYMBOL(remove_from_page_cache); | 159 | EXPORT_SYMBOL(remove_from_page_cache); |
155 | 160 | ||
diff --git a/mm/hugetlb.c b/mm/hugetlb.c index c4a3558589ab..85855240933d 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c | |||
@@ -2738,7 +2738,8 @@ out_page_table_lock: | |||
2738 | unlock_page(pagecache_page); | 2738 | unlock_page(pagecache_page); |
2739 | put_page(pagecache_page); | 2739 | put_page(pagecache_page); |
2740 | } | 2740 | } |
2741 | unlock_page(page); | 2741 | if (page != pagecache_page) |
2742 | unlock_page(page); | ||
2742 | 2743 | ||
2743 | out_mutex: | 2744 | out_mutex: |
2744 | mutex_unlock(&hugetlb_instantiation_mutex); | 2745 | mutex_unlock(&hugetlb_instantiation_mutex); |
@@ -1724,8 +1724,13 @@ static int ksm_memory_callback(struct notifier_block *self, | |||
1724 | /* | 1724 | /* |
1725 | * Keep it very simple for now: just lock out ksmd and | 1725 | * Keep it very simple for now: just lock out ksmd and |
1726 | * MADV_UNMERGEABLE while any memory is going offline. | 1726 | * MADV_UNMERGEABLE while any memory is going offline. |
1727 | * mutex_lock_nested() is necessary because lockdep was alarmed | ||
1728 | * that here we take ksm_thread_mutex inside notifier chain | ||
1729 | * mutex, and later take notifier chain mutex inside | ||
1730 | * ksm_thread_mutex to unlock it. But that's safe because both | ||
1731 | * are inside mem_hotplug_mutex. | ||
1727 | */ | 1732 | */ |
1728 | mutex_lock(&ksm_thread_mutex); | 1733 | mutex_lock_nested(&ksm_thread_mutex, SINGLE_DEPTH_NESTING); |
1729 | break; | 1734 | break; |
1730 | 1735 | ||
1731 | case MEM_OFFLINE: | 1736 | case MEM_OFFLINE: |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 2efa8ea07ff7..7a22b4129211 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -61,7 +61,14 @@ struct mem_cgroup *root_mem_cgroup __read_mostly; | |||
61 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP | 61 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP |
62 | /* Turned on only when memory cgroup is enabled && really_do_swap_account = 1 */ | 62 | /* Turned on only when memory cgroup is enabled && really_do_swap_account = 1 */ |
63 | int do_swap_account __read_mostly; | 63 | int do_swap_account __read_mostly; |
64 | static int really_do_swap_account __initdata = 1; /* for remember boot option*/ | 64 | |
65 | /* for remember boot option*/ | ||
66 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP_ENABLED | ||
67 | static int really_do_swap_account __initdata = 1; | ||
68 | #else | ||
69 | static int really_do_swap_account __initdata = 0; | ||
70 | #endif | ||
71 | |||
65 | #else | 72 | #else |
66 | #define do_swap_account (0) | 73 | #define do_swap_account (0) |
67 | #endif | 74 | #endif |
@@ -278,13 +285,14 @@ enum move_type { | |||
278 | 285 | ||
279 | /* "mc" and its members are protected by cgroup_mutex */ | 286 | /* "mc" and its members are protected by cgroup_mutex */ |
280 | static struct move_charge_struct { | 287 | static struct move_charge_struct { |
281 | spinlock_t lock; /* for from, to, moving_task */ | 288 | spinlock_t lock; /* for from, to */ |
282 | struct mem_cgroup *from; | 289 | struct mem_cgroup *from; |
283 | struct mem_cgroup *to; | 290 | struct mem_cgroup *to; |
284 | unsigned long precharge; | 291 | unsigned long precharge; |
285 | unsigned long moved_charge; | 292 | unsigned long moved_charge; |
286 | unsigned long moved_swap; | 293 | unsigned long moved_swap; |
287 | struct task_struct *moving_task; /* a task moving charges */ | 294 | struct task_struct *moving_task; /* a task moving charges */ |
295 | struct mm_struct *mm; | ||
288 | wait_queue_head_t waitq; /* a waitq for other context */ | 296 | wait_queue_head_t waitq; /* a waitq for other context */ |
289 | } mc = { | 297 | } mc = { |
290 | .lock = __SPIN_LOCK_UNLOCKED(mc.lock), | 298 | .lock = __SPIN_LOCK_UNLOCKED(mc.lock), |
@@ -2152,7 +2160,7 @@ static void __mem_cgroup_move_account(struct page_cgroup *pc, | |||
2152 | { | 2160 | { |
2153 | VM_BUG_ON(from == to); | 2161 | VM_BUG_ON(from == to); |
2154 | VM_BUG_ON(PageLRU(pc->page)); | 2162 | VM_BUG_ON(PageLRU(pc->page)); |
2155 | VM_BUG_ON(!PageCgroupLocked(pc)); | 2163 | VM_BUG_ON(!page_is_cgroup_locked(pc)); |
2156 | VM_BUG_ON(!PageCgroupUsed(pc)); | 2164 | VM_BUG_ON(!PageCgroupUsed(pc)); |
2157 | VM_BUG_ON(pc->mem_cgroup != from); | 2165 | VM_BUG_ON(pc->mem_cgroup != from); |
2158 | 2166 | ||
@@ -4631,7 +4639,7 @@ static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm) | |||
4631 | unsigned long precharge; | 4639 | unsigned long precharge; |
4632 | struct vm_area_struct *vma; | 4640 | struct vm_area_struct *vma; |
4633 | 4641 | ||
4634 | down_read(&mm->mmap_sem); | 4642 | /* We've already held the mmap_sem */ |
4635 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | 4643 | for (vma = mm->mmap; vma; vma = vma->vm_next) { |
4636 | struct mm_walk mem_cgroup_count_precharge_walk = { | 4644 | struct mm_walk mem_cgroup_count_precharge_walk = { |
4637 | .pmd_entry = mem_cgroup_count_precharge_pte_range, | 4645 | .pmd_entry = mem_cgroup_count_precharge_pte_range, |
@@ -4643,7 +4651,6 @@ static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm) | |||
4643 | walk_page_range(vma->vm_start, vma->vm_end, | 4651 | walk_page_range(vma->vm_start, vma->vm_end, |
4644 | &mem_cgroup_count_precharge_walk); | 4652 | &mem_cgroup_count_precharge_walk); |
4645 | } | 4653 | } |
4646 | up_read(&mm->mmap_sem); | ||
4647 | 4654 | ||
4648 | precharge = mc.precharge; | 4655 | precharge = mc.precharge; |
4649 | mc.precharge = 0; | 4656 | mc.precharge = 0; |
@@ -4694,11 +4701,16 @@ static void mem_cgroup_clear_mc(void) | |||
4694 | 4701 | ||
4695 | mc.moved_swap = 0; | 4702 | mc.moved_swap = 0; |
4696 | } | 4703 | } |
4704 | if (mc.mm) { | ||
4705 | up_read(&mc.mm->mmap_sem); | ||
4706 | mmput(mc.mm); | ||
4707 | } | ||
4697 | spin_lock(&mc.lock); | 4708 | spin_lock(&mc.lock); |
4698 | mc.from = NULL; | 4709 | mc.from = NULL; |
4699 | mc.to = NULL; | 4710 | mc.to = NULL; |
4700 | mc.moving_task = NULL; | ||
4701 | spin_unlock(&mc.lock); | 4711 | spin_unlock(&mc.lock); |
4712 | mc.moving_task = NULL; | ||
4713 | mc.mm = NULL; | ||
4702 | mem_cgroup_end_move(from); | 4714 | mem_cgroup_end_move(from); |
4703 | memcg_oom_recover(from); | 4715 | memcg_oom_recover(from); |
4704 | memcg_oom_recover(to); | 4716 | memcg_oom_recover(to); |
@@ -4724,12 +4736,21 @@ static int mem_cgroup_can_attach(struct cgroup_subsys *ss, | |||
4724 | return 0; | 4736 | return 0; |
4725 | /* We move charges only when we move a owner of the mm */ | 4737 | /* We move charges only when we move a owner of the mm */ |
4726 | if (mm->owner == p) { | 4738 | if (mm->owner == p) { |
4739 | /* | ||
4740 | * We do all the move charge works under one mmap_sem to | ||
4741 | * avoid deadlock with down_write(&mmap_sem) | ||
4742 | * -> try_charge() -> if (mc.moving_task) -> sleep. | ||
4743 | */ | ||
4744 | down_read(&mm->mmap_sem); | ||
4745 | |||
4727 | VM_BUG_ON(mc.from); | 4746 | VM_BUG_ON(mc.from); |
4728 | VM_BUG_ON(mc.to); | 4747 | VM_BUG_ON(mc.to); |
4729 | VM_BUG_ON(mc.precharge); | 4748 | VM_BUG_ON(mc.precharge); |
4730 | VM_BUG_ON(mc.moved_charge); | 4749 | VM_BUG_ON(mc.moved_charge); |
4731 | VM_BUG_ON(mc.moved_swap); | 4750 | VM_BUG_ON(mc.moved_swap); |
4732 | VM_BUG_ON(mc.moving_task); | 4751 | VM_BUG_ON(mc.moving_task); |
4752 | VM_BUG_ON(mc.mm); | ||
4753 | |||
4733 | mem_cgroup_start_move(from); | 4754 | mem_cgroup_start_move(from); |
4734 | spin_lock(&mc.lock); | 4755 | spin_lock(&mc.lock); |
4735 | mc.from = from; | 4756 | mc.from = from; |
@@ -4737,14 +4758,16 @@ static int mem_cgroup_can_attach(struct cgroup_subsys *ss, | |||
4737 | mc.precharge = 0; | 4758 | mc.precharge = 0; |
4738 | mc.moved_charge = 0; | 4759 | mc.moved_charge = 0; |
4739 | mc.moved_swap = 0; | 4760 | mc.moved_swap = 0; |
4740 | mc.moving_task = current; | ||
4741 | spin_unlock(&mc.lock); | 4761 | spin_unlock(&mc.lock); |
4762 | mc.moving_task = current; | ||
4763 | mc.mm = mm; | ||
4742 | 4764 | ||
4743 | ret = mem_cgroup_precharge_mc(mm); | 4765 | ret = mem_cgroup_precharge_mc(mm); |
4744 | if (ret) | 4766 | if (ret) |
4745 | mem_cgroup_clear_mc(); | 4767 | mem_cgroup_clear_mc(); |
4746 | } | 4768 | /* We call up_read() and mmput() in clear_mc(). */ |
4747 | mmput(mm); | 4769 | } else |
4770 | mmput(mm); | ||
4748 | } | 4771 | } |
4749 | return ret; | 4772 | return ret; |
4750 | } | 4773 | } |
@@ -4832,7 +4855,7 @@ static void mem_cgroup_move_charge(struct mm_struct *mm) | |||
4832 | struct vm_area_struct *vma; | 4855 | struct vm_area_struct *vma; |
4833 | 4856 | ||
4834 | lru_add_drain_all(); | 4857 | lru_add_drain_all(); |
4835 | down_read(&mm->mmap_sem); | 4858 | /* We've already held the mmap_sem */ |
4836 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | 4859 | for (vma = mm->mmap; vma; vma = vma->vm_next) { |
4837 | int ret; | 4860 | int ret; |
4838 | struct mm_walk mem_cgroup_move_charge_walk = { | 4861 | struct mm_walk mem_cgroup_move_charge_walk = { |
@@ -4851,7 +4874,6 @@ static void mem_cgroup_move_charge(struct mm_struct *mm) | |||
4851 | */ | 4874 | */ |
4852 | break; | 4875 | break; |
4853 | } | 4876 | } |
4854 | up_read(&mm->mmap_sem); | ||
4855 | } | 4877 | } |
4856 | 4878 | ||
4857 | static void mem_cgroup_move_task(struct cgroup_subsys *ss, | 4879 | static void mem_cgroup_move_task(struct cgroup_subsys *ss, |
@@ -4860,17 +4882,11 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss, | |||
4860 | struct task_struct *p, | 4882 | struct task_struct *p, |
4861 | bool threadgroup) | 4883 | bool threadgroup) |
4862 | { | 4884 | { |
4863 | struct mm_struct *mm; | 4885 | if (!mc.mm) |
4864 | |||
4865 | if (!mc.to) | ||
4866 | /* no need to move charge */ | 4886 | /* no need to move charge */ |
4867 | return; | 4887 | return; |
4868 | 4888 | ||
4869 | mm = get_task_mm(p); | 4889 | mem_cgroup_move_charge(mc.mm); |
4870 | if (mm) { | ||
4871 | mem_cgroup_move_charge(mm); | ||
4872 | mmput(mm); | ||
4873 | } | ||
4874 | mem_cgroup_clear_mc(); | 4890 | mem_cgroup_clear_mc(); |
4875 | } | 4891 | } |
4876 | #else /* !CONFIG_MMU */ | 4892 | #else /* !CONFIG_MMU */ |
@@ -4911,10 +4927,20 @@ struct cgroup_subsys mem_cgroup_subsys = { | |||
4911 | }; | 4927 | }; |
4912 | 4928 | ||
4913 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP | 4929 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP |
4930 | static int __init enable_swap_account(char *s) | ||
4931 | { | ||
4932 | /* consider enabled if no parameter or 1 is given */ | ||
4933 | if (!s || !strcmp(s, "1")) | ||
4934 | really_do_swap_account = 1; | ||
4935 | else if (!strcmp(s, "0")) | ||
4936 | really_do_swap_account = 0; | ||
4937 | return 1; | ||
4938 | } | ||
4939 | __setup("swapaccount", enable_swap_account); | ||
4914 | 4940 | ||
4915 | static int __init disable_swap_account(char *s) | 4941 | static int __init disable_swap_account(char *s) |
4916 | { | 4942 | { |
4917 | really_do_swap_account = 0; | 4943 | enable_swap_account("0"); |
4918 | return 1; | 4944 | return 1; |
4919 | } | 4945 | } |
4920 | __setup("noswapaccount", disable_swap_account); | 4946 | __setup("noswapaccount", disable_swap_account); |
diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 124324134ff6..46ab2c044b0e 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c | |||
@@ -51,6 +51,7 @@ | |||
51 | #include <linux/slab.h> | 51 | #include <linux/slab.h> |
52 | #include <linux/swapops.h> | 52 | #include <linux/swapops.h> |
53 | #include <linux/hugetlb.h> | 53 | #include <linux/hugetlb.h> |
54 | #include <linux/memory_hotplug.h> | ||
54 | #include "internal.h" | 55 | #include "internal.h" |
55 | 56 | ||
56 | int sysctl_memory_failure_early_kill __read_mostly = 0; | 57 | int sysctl_memory_failure_early_kill __read_mostly = 0; |
@@ -1230,11 +1231,10 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags) | |||
1230 | return 1; | 1231 | return 1; |
1231 | 1232 | ||
1232 | /* | 1233 | /* |
1233 | * The lock_system_sleep prevents a race with memory hotplug, | 1234 | * The lock_memory_hotplug prevents a race with memory hotplug. |
1234 | * because the isolation assumes there's only a single user. | ||
1235 | * This is a big hammer, a better would be nicer. | 1235 | * This is a big hammer, a better would be nicer. |
1236 | */ | 1236 | */ |
1237 | lock_system_sleep(); | 1237 | lock_memory_hotplug(); |
1238 | 1238 | ||
1239 | /* | 1239 | /* |
1240 | * Isolate the page, so that it doesn't get reallocated if it | 1240 | * Isolate the page, so that it doesn't get reallocated if it |
@@ -1264,7 +1264,7 @@ static int get_any_page(struct page *p, unsigned long pfn, int flags) | |||
1264 | ret = 1; | 1264 | ret = 1; |
1265 | } | 1265 | } |
1266 | unset_migratetype_isolate(p); | 1266 | unset_migratetype_isolate(p); |
1267 | unlock_system_sleep(); | 1267 | unlock_memory_hotplug(); |
1268 | return ret; | 1268 | return ret; |
1269 | } | 1269 | } |
1270 | 1270 | ||
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 9260314a221e..2c6523af5473 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c | |||
@@ -34,6 +34,23 @@ | |||
34 | 34 | ||
35 | #include "internal.h" | 35 | #include "internal.h" |
36 | 36 | ||
37 | DEFINE_MUTEX(mem_hotplug_mutex); | ||
38 | |||
39 | void lock_memory_hotplug(void) | ||
40 | { | ||
41 | mutex_lock(&mem_hotplug_mutex); | ||
42 | |||
43 | /* for exclusive hibernation if CONFIG_HIBERNATION=y */ | ||
44 | lock_system_sleep(); | ||
45 | } | ||
46 | |||
47 | void unlock_memory_hotplug(void) | ||
48 | { | ||
49 | unlock_system_sleep(); | ||
50 | mutex_unlock(&mem_hotplug_mutex); | ||
51 | } | ||
52 | |||
53 | |||
37 | /* add this memory to iomem resource */ | 54 | /* add this memory to iomem resource */ |
38 | static struct resource *register_memory_resource(u64 start, u64 size) | 55 | static struct resource *register_memory_resource(u64 start, u64 size) |
39 | { | 56 | { |
@@ -493,7 +510,7 @@ int mem_online_node(int nid) | |||
493 | pg_data_t *pgdat; | 510 | pg_data_t *pgdat; |
494 | int ret; | 511 | int ret; |
495 | 512 | ||
496 | lock_system_sleep(); | 513 | lock_memory_hotplug(); |
497 | pgdat = hotadd_new_pgdat(nid, 0); | 514 | pgdat = hotadd_new_pgdat(nid, 0); |
498 | if (pgdat) { | 515 | if (pgdat) { |
499 | ret = -ENOMEM; | 516 | ret = -ENOMEM; |
@@ -504,7 +521,7 @@ int mem_online_node(int nid) | |||
504 | BUG_ON(ret); | 521 | BUG_ON(ret); |
505 | 522 | ||
506 | out: | 523 | out: |
507 | unlock_system_sleep(); | 524 | unlock_memory_hotplug(); |
508 | return ret; | 525 | return ret; |
509 | } | 526 | } |
510 | 527 | ||
@@ -516,7 +533,7 @@ int __ref add_memory(int nid, u64 start, u64 size) | |||
516 | struct resource *res; | 533 | struct resource *res; |
517 | int ret; | 534 | int ret; |
518 | 535 | ||
519 | lock_system_sleep(); | 536 | lock_memory_hotplug(); |
520 | 537 | ||
521 | res = register_memory_resource(start, size); | 538 | res = register_memory_resource(start, size); |
522 | ret = -EEXIST; | 539 | ret = -EEXIST; |
@@ -563,7 +580,7 @@ error: | |||
563 | release_memory_resource(res); | 580 | release_memory_resource(res); |
564 | 581 | ||
565 | out: | 582 | out: |
566 | unlock_system_sleep(); | 583 | unlock_memory_hotplug(); |
567 | return ret; | 584 | return ret; |
568 | } | 585 | } |
569 | EXPORT_SYMBOL_GPL(add_memory); | 586 | EXPORT_SYMBOL_GPL(add_memory); |
@@ -791,7 +808,7 @@ static int offline_pages(unsigned long start_pfn, | |||
791 | if (!test_pages_in_a_zone(start_pfn, end_pfn)) | 808 | if (!test_pages_in_a_zone(start_pfn, end_pfn)) |
792 | return -EINVAL; | 809 | return -EINVAL; |
793 | 810 | ||
794 | lock_system_sleep(); | 811 | lock_memory_hotplug(); |
795 | 812 | ||
796 | zone = page_zone(pfn_to_page(start_pfn)); | 813 | zone = page_zone(pfn_to_page(start_pfn)); |
797 | node = zone_to_nid(zone); | 814 | node = zone_to_nid(zone); |
@@ -880,7 +897,7 @@ repeat: | |||
880 | writeback_set_ratelimit(); | 897 | writeback_set_ratelimit(); |
881 | 898 | ||
882 | memory_notify(MEM_OFFLINE, &arg); | 899 | memory_notify(MEM_OFFLINE, &arg); |
883 | unlock_system_sleep(); | 900 | unlock_memory_hotplug(); |
884 | return 0; | 901 | return 0; |
885 | 902 | ||
886 | failed_removal: | 903 | failed_removal: |
@@ -891,7 +908,7 @@ failed_removal: | |||
891 | undo_isolate_page_range(start_pfn, end_pfn); | 908 | undo_isolate_page_range(start_pfn, end_pfn); |
892 | 909 | ||
893 | out: | 910 | out: |
894 | unlock_system_sleep(); | 911 | unlock_memory_hotplug(); |
895 | return ret; | 912 | return ret; |
896 | } | 913 | } |
897 | 914 | ||
diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 4a57f135b76e..11ff260fb282 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c | |||
@@ -1307,15 +1307,18 @@ SYSCALL_DEFINE4(migrate_pages, pid_t, pid, unsigned long, maxnode, | |||
1307 | goto out; | 1307 | goto out; |
1308 | 1308 | ||
1309 | /* Find the mm_struct */ | 1309 | /* Find the mm_struct */ |
1310 | rcu_read_lock(); | ||
1310 | read_lock(&tasklist_lock); | 1311 | read_lock(&tasklist_lock); |
1311 | task = pid ? find_task_by_vpid(pid) : current; | 1312 | task = pid ? find_task_by_vpid(pid) : current; |
1312 | if (!task) { | 1313 | if (!task) { |
1313 | read_unlock(&tasklist_lock); | 1314 | read_unlock(&tasklist_lock); |
1315 | rcu_read_unlock(); | ||
1314 | err = -ESRCH; | 1316 | err = -ESRCH; |
1315 | goto out; | 1317 | goto out; |
1316 | } | 1318 | } |
1317 | mm = get_task_mm(task); | 1319 | mm = get_task_mm(task); |
1318 | read_unlock(&tasklist_lock); | 1320 | read_unlock(&tasklist_lock); |
1321 | rcu_read_unlock(); | ||
1319 | 1322 | ||
1320 | err = -EINVAL; | 1323 | err = -EINVAL; |
1321 | if (!mm) | 1324 | if (!mm) |
@@ -2462,6 +2462,7 @@ int install_special_mapping(struct mm_struct *mm, | |||
2462 | unsigned long addr, unsigned long len, | 2462 | unsigned long addr, unsigned long len, |
2463 | unsigned long vm_flags, struct page **pages) | 2463 | unsigned long vm_flags, struct page **pages) |
2464 | { | 2464 | { |
2465 | int ret; | ||
2465 | struct vm_area_struct *vma; | 2466 | struct vm_area_struct *vma; |
2466 | 2467 | ||
2467 | vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); | 2468 | vma = kmem_cache_zalloc(vm_area_cachep, GFP_KERNEL); |
@@ -2479,16 +2480,23 @@ int install_special_mapping(struct mm_struct *mm, | |||
2479 | vma->vm_ops = &special_mapping_vmops; | 2480 | vma->vm_ops = &special_mapping_vmops; |
2480 | vma->vm_private_data = pages; | 2481 | vma->vm_private_data = pages; |
2481 | 2482 | ||
2482 | if (unlikely(insert_vm_struct(mm, vma))) { | 2483 | ret = security_file_mmap(NULL, 0, 0, 0, vma->vm_start, 1); |
2483 | kmem_cache_free(vm_area_cachep, vma); | 2484 | if (ret) |
2484 | return -ENOMEM; | 2485 | goto out; |
2485 | } | 2486 | |
2487 | ret = insert_vm_struct(mm, vma); | ||
2488 | if (ret) | ||
2489 | goto out; | ||
2486 | 2490 | ||
2487 | mm->total_vm += len >> PAGE_SHIFT; | 2491 | mm->total_vm += len >> PAGE_SHIFT; |
2488 | 2492 | ||
2489 | perf_event_mmap(vma); | 2493 | perf_event_mmap(vma); |
2490 | 2494 | ||
2491 | return 0; | 2495 | return 0; |
2496 | |||
2497 | out: | ||
2498 | kmem_cache_free(vm_area_cachep, vma); | ||
2499 | return ret; | ||
2492 | } | 2500 | } |
2493 | 2501 | ||
2494 | static DEFINE_MUTEX(mm_all_locks_mutex); | 2502 | static DEFINE_MUTEX(mm_all_locks_mutex); |
diff --git a/mm/nommu.c b/mm/nommu.c index 3613517c7592..27a9ac588516 100644 --- a/mm/nommu.c +++ b/mm/nommu.c | |||
@@ -1717,6 +1717,7 @@ void exit_mmap(struct mm_struct *mm) | |||
1717 | mm->mmap = vma->vm_next; | 1717 | mm->mmap = vma->vm_next; |
1718 | delete_vma_from_mm(vma); | 1718 | delete_vma_from_mm(vma); |
1719 | delete_vma(mm, vma); | 1719 | delete_vma(mm, vma); |
1720 | cond_resched(); | ||
1720 | } | 1721 | } |
1721 | 1722 | ||
1722 | kleave(""); | 1723 | kleave(""); |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 07a654486f75..ff7e15872398 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -104,19 +104,24 @@ gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK; | |||
104 | * only be modified with pm_mutex held, unless the suspend/hibernate code is | 104 | * only be modified with pm_mutex held, unless the suspend/hibernate code is |
105 | * guaranteed not to run in parallel with that modification). | 105 | * guaranteed not to run in parallel with that modification). |
106 | */ | 106 | */ |
107 | void set_gfp_allowed_mask(gfp_t mask) | 107 | |
108 | static gfp_t saved_gfp_mask; | ||
109 | |||
110 | void pm_restore_gfp_mask(void) | ||
108 | { | 111 | { |
109 | WARN_ON(!mutex_is_locked(&pm_mutex)); | 112 | WARN_ON(!mutex_is_locked(&pm_mutex)); |
110 | gfp_allowed_mask = mask; | 113 | if (saved_gfp_mask) { |
114 | gfp_allowed_mask = saved_gfp_mask; | ||
115 | saved_gfp_mask = 0; | ||
116 | } | ||
111 | } | 117 | } |
112 | 118 | ||
113 | gfp_t clear_gfp_allowed_mask(gfp_t mask) | 119 | void pm_restrict_gfp_mask(void) |
114 | { | 120 | { |
115 | gfp_t ret = gfp_allowed_mask; | ||
116 | |||
117 | WARN_ON(!mutex_is_locked(&pm_mutex)); | 121 | WARN_ON(!mutex_is_locked(&pm_mutex)); |
118 | gfp_allowed_mask &= ~mask; | 122 | WARN_ON(saved_gfp_mask); |
119 | return ret; | 123 | saved_gfp_mask = gfp_allowed_mask; |
124 | gfp_allowed_mask &= ~GFP_IOFS; | ||
120 | } | 125 | } |
121 | #endif /* CONFIG_PM_SLEEP */ | 126 | #endif /* CONFIG_PM_SLEEP */ |
122 | 127 | ||
@@ -3008,14 +3013,6 @@ static __init_refok int __build_all_zonelists(void *data) | |||
3008 | build_zonelist_cache(pgdat); | 3013 | build_zonelist_cache(pgdat); |
3009 | } | 3014 | } |
3010 | 3015 | ||
3011 | #ifdef CONFIG_MEMORY_HOTPLUG | ||
3012 | /* Setup real pagesets for the new zone */ | ||
3013 | if (data) { | ||
3014 | struct zone *zone = data; | ||
3015 | setup_zone_pageset(zone); | ||
3016 | } | ||
3017 | #endif | ||
3018 | |||
3019 | /* | 3016 | /* |
3020 | * Initialize the boot_pagesets that are going to be used | 3017 | * Initialize the boot_pagesets that are going to be used |
3021 | * for bootstrapping processors. The real pagesets for | 3018 | * for bootstrapping processors. The real pagesets for |
@@ -3064,7 +3061,11 @@ void build_all_zonelists(void *data) | |||
3064 | } else { | 3061 | } else { |
3065 | /* we have to stop all cpus to guarantee there is no user | 3062 | /* we have to stop all cpus to guarantee there is no user |
3066 | of zonelist */ | 3063 | of zonelist */ |
3067 | stop_machine(__build_all_zonelists, data, NULL); | 3064 | #ifdef CONFIG_MEMORY_HOTPLUG |
3065 | if (data) | ||
3066 | setup_zone_pageset((struct zone *)data); | ||
3067 | #endif | ||
3068 | stop_machine(__build_all_zonelists, NULL, NULL); | ||
3068 | /* cpuset refresh routine should be here */ | 3069 | /* cpuset refresh routine should be here */ |
3069 | } | 3070 | } |
3070 | vm_total_pages = nr_free_pagecache_pages(); | 3071 | vm_total_pages = nr_free_pagecache_pages(); |
diff --git a/mm/pagewalk.c b/mm/pagewalk.c index 8b1a2ce21ee5..38cc58b8b2b0 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c | |||
@@ -139,7 +139,6 @@ int walk_page_range(unsigned long addr, unsigned long end, | |||
139 | pgd_t *pgd; | 139 | pgd_t *pgd; |
140 | unsigned long next; | 140 | unsigned long next; |
141 | int err = 0; | 141 | int err = 0; |
142 | struct vm_area_struct *vma; | ||
143 | 142 | ||
144 | if (addr >= end) | 143 | if (addr >= end) |
145 | return err; | 144 | return err; |
@@ -149,15 +148,17 @@ int walk_page_range(unsigned long addr, unsigned long end, | |||
149 | 148 | ||
150 | pgd = pgd_offset(walk->mm, addr); | 149 | pgd = pgd_offset(walk->mm, addr); |
151 | do { | 150 | do { |
151 | struct vm_area_struct *uninitialized_var(vma); | ||
152 | |||
152 | next = pgd_addr_end(addr, end); | 153 | next = pgd_addr_end(addr, end); |
153 | 154 | ||
155 | #ifdef CONFIG_HUGETLB_PAGE | ||
154 | /* | 156 | /* |
155 | * handle hugetlb vma individually because pagetable walk for | 157 | * handle hugetlb vma individually because pagetable walk for |
156 | * the hugetlb page is dependent on the architecture and | 158 | * the hugetlb page is dependent on the architecture and |
157 | * we can't handled it in the same manner as non-huge pages. | 159 | * we can't handled it in the same manner as non-huge pages. |
158 | */ | 160 | */ |
159 | vma = find_vma(walk->mm, addr); | 161 | vma = find_vma(walk->mm, addr); |
160 | #ifdef CONFIG_HUGETLB_PAGE | ||
161 | if (vma && is_vm_hugetlb_page(vma)) { | 162 | if (vma && is_vm_hugetlb_page(vma)) { |
162 | if (vma->vm_end < next) | 163 | if (vma->vm_end < next) |
163 | next = vma->vm_end; | 164 | next = vma->vm_end; |
@@ -3401,13 +3401,13 @@ static int validate_slab(struct kmem_cache *s, struct page *page, | |||
3401 | 3401 | ||
3402 | for_each_free_object(p, s, page->freelist) { | 3402 | for_each_free_object(p, s, page->freelist) { |
3403 | set_bit(slab_index(p, s, addr), map); | 3403 | set_bit(slab_index(p, s, addr), map); |
3404 | if (!check_object(s, page, p, 0)) | 3404 | if (!check_object(s, page, p, SLUB_RED_INACTIVE)) |
3405 | return 0; | 3405 | return 0; |
3406 | } | 3406 | } |
3407 | 3407 | ||
3408 | for_each_object(p, s, addr, page->objects) | 3408 | for_each_object(p, s, addr, page->objects) |
3409 | if (!test_bit(slab_index(p, s, addr), map)) | 3409 | if (!test_bit(slab_index(p, s, addr), map)) |
3410 | if (!check_object(s, page, p, 1)) | 3410 | if (!check_object(s, page, p, SLUB_RED_ACTIVE)) |
3411 | return 0; | 3411 | return 0; |
3412 | return 1; | 3412 | return 1; |
3413 | } | 3413 | } |
diff --git a/mm/truncate.c b/mm/truncate.c index ba887bff48c5..3c2d5ddfa0d4 100644 --- a/mm/truncate.c +++ b/mm/truncate.c | |||
@@ -390,6 +390,10 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page) | |||
390 | __remove_from_page_cache(page); | 390 | __remove_from_page_cache(page); |
391 | spin_unlock_irq(&mapping->tree_lock); | 391 | spin_unlock_irq(&mapping->tree_lock); |
392 | mem_cgroup_uncharge_cache_page(page); | 392 | mem_cgroup_uncharge_cache_page(page); |
393 | |||
394 | if (mapping->a_ops->freepage) | ||
395 | mapping->a_ops->freepage(page); | ||
396 | |||
393 | page_cache_release(page); /* pagecache ref */ | 397 | page_cache_release(page); /* pagecache ref */ |
394 | return 1; | 398 | return 1; |
395 | failed: | 399 | failed: |
diff --git a/mm/vmalloc.c b/mm/vmalloc.c index a3d66b3dc5cb..eb5cc7d00c5a 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c | |||
@@ -31,8 +31,6 @@ | |||
31 | #include <asm/tlbflush.h> | 31 | #include <asm/tlbflush.h> |
32 | #include <asm/shmparam.h> | 32 | #include <asm/shmparam.h> |
33 | 33 | ||
34 | bool vmap_lazy_unmap __read_mostly = true; | ||
35 | |||
36 | /*** Page table manipulation functions ***/ | 34 | /*** Page table manipulation functions ***/ |
37 | 35 | ||
38 | static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end) | 36 | static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end) |
@@ -503,9 +501,6 @@ static unsigned long lazy_max_pages(void) | |||
503 | { | 501 | { |
504 | unsigned int log; | 502 | unsigned int log; |
505 | 503 | ||
506 | if (!vmap_lazy_unmap) | ||
507 | return 0; | ||
508 | |||
509 | log = fls(num_online_cpus()); | 504 | log = fls(num_online_cpus()); |
510 | 505 | ||
511 | return log * (32UL * 1024 * 1024 / PAGE_SIZE); | 506 | return log * (32UL * 1024 * 1024 / PAGE_SIZE); |
@@ -566,7 +561,6 @@ static void __purge_vmap_area_lazy(unsigned long *start, unsigned long *end, | |||
566 | if (va->va_end > *end) | 561 | if (va->va_end > *end) |
567 | *end = va->va_end; | 562 | *end = va->va_end; |
568 | nr += (va->va_end - va->va_start) >> PAGE_SHIFT; | 563 | nr += (va->va_end - va->va_start) >> PAGE_SHIFT; |
569 | unmap_vmap_area(va); | ||
570 | list_add_tail(&va->purge_list, &valist); | 564 | list_add_tail(&va->purge_list, &valist); |
571 | va->flags |= VM_LAZY_FREEING; | 565 | va->flags |= VM_LAZY_FREEING; |
572 | va->flags &= ~VM_LAZY_FREE; | 566 | va->flags &= ~VM_LAZY_FREE; |
@@ -611,10 +605,11 @@ static void purge_vmap_area_lazy(void) | |||
611 | } | 605 | } |
612 | 606 | ||
613 | /* | 607 | /* |
614 | * Free and unmap a vmap area, caller ensuring flush_cache_vunmap had been | 608 | * Free a vmap area, caller ensuring that the area has been unmapped |
615 | * called for the correct range previously. | 609 | * and flush_cache_vunmap had been called for the correct range |
610 | * previously. | ||
616 | */ | 611 | */ |
617 | static void free_unmap_vmap_area_noflush(struct vmap_area *va) | 612 | static void free_vmap_area_noflush(struct vmap_area *va) |
618 | { | 613 | { |
619 | va->flags |= VM_LAZY_FREE; | 614 | va->flags |= VM_LAZY_FREE; |
620 | atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr); | 615 | atomic_add((va->va_end - va->va_start) >> PAGE_SHIFT, &vmap_lazy_nr); |
@@ -623,6 +618,16 @@ static void free_unmap_vmap_area_noflush(struct vmap_area *va) | |||
623 | } | 618 | } |
624 | 619 | ||
625 | /* | 620 | /* |
621 | * Free and unmap a vmap area, caller ensuring flush_cache_vunmap had been | ||
622 | * called for the correct range previously. | ||
623 | */ | ||
624 | static void free_unmap_vmap_area_noflush(struct vmap_area *va) | ||
625 | { | ||
626 | unmap_vmap_area(va); | ||
627 | free_vmap_area_noflush(va); | ||
628 | } | ||
629 | |||
630 | /* | ||
626 | * Free and unmap a vmap area | 631 | * Free and unmap a vmap area |
627 | */ | 632 | */ |
628 | static void free_unmap_vmap_area(struct vmap_area *va) | 633 | static void free_unmap_vmap_area(struct vmap_area *va) |
@@ -798,7 +803,7 @@ static void free_vmap_block(struct vmap_block *vb) | |||
798 | spin_unlock(&vmap_block_tree_lock); | 803 | spin_unlock(&vmap_block_tree_lock); |
799 | BUG_ON(tmp != vb); | 804 | BUG_ON(tmp != vb); |
800 | 805 | ||
801 | free_unmap_vmap_area_noflush(vb->va); | 806 | free_vmap_area_noflush(vb->va); |
802 | call_rcu(&vb->rcu_head, rcu_free_vb); | 807 | call_rcu(&vb->rcu_head, rcu_free_vb); |
803 | } | 808 | } |
804 | 809 | ||
@@ -936,6 +941,8 @@ static void vb_free(const void *addr, unsigned long size) | |||
936 | rcu_read_unlock(); | 941 | rcu_read_unlock(); |
937 | BUG_ON(!vb); | 942 | BUG_ON(!vb); |
938 | 943 | ||
944 | vunmap_page_range((unsigned long)addr, (unsigned long)addr + size); | ||
945 | |||
939 | spin_lock(&vb->lock); | 946 | spin_lock(&vb->lock); |
940 | BUG_ON(bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order)); | 947 | BUG_ON(bitmap_allocate_region(vb->dirty_map, offset >> PAGE_SHIFT, order)); |
941 | 948 | ||
@@ -988,7 +995,6 @@ void vm_unmap_aliases(void) | |||
988 | 995 | ||
989 | s = vb->va->va_start + (i << PAGE_SHIFT); | 996 | s = vb->va->va_start + (i << PAGE_SHIFT); |
990 | e = vb->va->va_start + (j << PAGE_SHIFT); | 997 | e = vb->va->va_start + (j << PAGE_SHIFT); |
991 | vunmap_page_range(s, e); | ||
992 | flush = 1; | 998 | flush = 1; |
993 | 999 | ||
994 | if (s < start) | 1000 | if (s < start) |
diff --git a/mm/vmscan.c b/mm/vmscan.c index d31d7ce52c0e..9ca587c69274 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -494,9 +494,16 @@ static int __remove_mapping(struct address_space *mapping, struct page *page) | |||
494 | spin_unlock_irq(&mapping->tree_lock); | 494 | spin_unlock_irq(&mapping->tree_lock); |
495 | swapcache_free(swap, page); | 495 | swapcache_free(swap, page); |
496 | } else { | 496 | } else { |
497 | void (*freepage)(struct page *); | ||
498 | |||
499 | freepage = mapping->a_ops->freepage; | ||
500 | |||
497 | __remove_from_page_cache(page); | 501 | __remove_from_page_cache(page); |
498 | spin_unlock_irq(&mapping->tree_lock); | 502 | spin_unlock_irq(&mapping->tree_lock); |
499 | mem_cgroup_uncharge_cache_page(page); | 503 | mem_cgroup_uncharge_cache_page(page); |
504 | |||
505 | if (freepage != NULL) | ||
506 | freepage(page); | ||
500 | } | 507 | } |
501 | 508 | ||
502 | return 1; | 509 | return 1; |
diff --git a/mm/vmstat.c b/mm/vmstat.c index 42eac4d33216..8f62f17ee1c7 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c | |||
@@ -750,8 +750,6 @@ static const char * const vmstat_text[] = { | |||
750 | "nr_shmem", | 750 | "nr_shmem", |
751 | "nr_dirtied", | 751 | "nr_dirtied", |
752 | "nr_written", | 752 | "nr_written", |
753 | "nr_dirty_threshold", | ||
754 | "nr_dirty_background_threshold", | ||
755 | 753 | ||
756 | #ifdef CONFIG_NUMA | 754 | #ifdef CONFIG_NUMA |
757 | "numa_hit", | 755 | "numa_hit", |
@@ -761,6 +759,8 @@ static const char * const vmstat_text[] = { | |||
761 | "numa_local", | 759 | "numa_local", |
762 | "numa_other", | 760 | "numa_other", |
763 | #endif | 761 | #endif |
762 | "nr_dirty_threshold", | ||
763 | "nr_dirty_background_threshold", | ||
764 | 764 | ||
765 | #ifdef CONFIG_VM_EVENT_COUNTERS | 765 | #ifdef CONFIG_VM_EVENT_COUNTERS |
766 | "pgpgin", | 766 | "pgpgin", |