diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/memcontrol.c | 66 | ||||
-rw-r--r-- | mm/nommu.c | 1 | ||||
-rw-r--r-- | mm/page_alloc.c | 14 | ||||
-rw-r--r-- | mm/pagewalk.c | 5 |
4 files changed, 55 insertions, 31 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 2efa8ea07ff7..7a22b4129211 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -61,7 +61,14 @@ struct mem_cgroup *root_mem_cgroup __read_mostly; | |||
61 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP | 61 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP |
62 | /* Turned on only when memory cgroup is enabled && really_do_swap_account = 1 */ | 62 | /* Turned on only when memory cgroup is enabled && really_do_swap_account = 1 */ |
63 | int do_swap_account __read_mostly; | 63 | int do_swap_account __read_mostly; |
64 | static int really_do_swap_account __initdata = 1; /* for remember boot option*/ | 64 | |
65 | /* for remember boot option*/ | ||
66 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP_ENABLED | ||
67 | static int really_do_swap_account __initdata = 1; | ||
68 | #else | ||
69 | static int really_do_swap_account __initdata = 0; | ||
70 | #endif | ||
71 | |||
65 | #else | 72 | #else |
66 | #define do_swap_account (0) | 73 | #define do_swap_account (0) |
67 | #endif | 74 | #endif |
@@ -278,13 +285,14 @@ enum move_type { | |||
278 | 285 | ||
279 | /* "mc" and its members are protected by cgroup_mutex */ | 286 | /* "mc" and its members are protected by cgroup_mutex */ |
280 | static struct move_charge_struct { | 287 | static struct move_charge_struct { |
281 | spinlock_t lock; /* for from, to, moving_task */ | 288 | spinlock_t lock; /* for from, to */ |
282 | struct mem_cgroup *from; | 289 | struct mem_cgroup *from; |
283 | struct mem_cgroup *to; | 290 | struct mem_cgroup *to; |
284 | unsigned long precharge; | 291 | unsigned long precharge; |
285 | unsigned long moved_charge; | 292 | unsigned long moved_charge; |
286 | unsigned long moved_swap; | 293 | unsigned long moved_swap; |
287 | struct task_struct *moving_task; /* a task moving charges */ | 294 | struct task_struct *moving_task; /* a task moving charges */ |
295 | struct mm_struct *mm; | ||
288 | wait_queue_head_t waitq; /* a waitq for other context */ | 296 | wait_queue_head_t waitq; /* a waitq for other context */ |
289 | } mc = { | 297 | } mc = { |
290 | .lock = __SPIN_LOCK_UNLOCKED(mc.lock), | 298 | .lock = __SPIN_LOCK_UNLOCKED(mc.lock), |
@@ -2152,7 +2160,7 @@ static void __mem_cgroup_move_account(struct page_cgroup *pc, | |||
2152 | { | 2160 | { |
2153 | VM_BUG_ON(from == to); | 2161 | VM_BUG_ON(from == to); |
2154 | VM_BUG_ON(PageLRU(pc->page)); | 2162 | VM_BUG_ON(PageLRU(pc->page)); |
2155 | VM_BUG_ON(!PageCgroupLocked(pc)); | 2163 | VM_BUG_ON(!page_is_cgroup_locked(pc)); |
2156 | VM_BUG_ON(!PageCgroupUsed(pc)); | 2164 | VM_BUG_ON(!PageCgroupUsed(pc)); |
2157 | VM_BUG_ON(pc->mem_cgroup != from); | 2165 | VM_BUG_ON(pc->mem_cgroup != from); |
2158 | 2166 | ||
@@ -4631,7 +4639,7 @@ static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm) | |||
4631 | unsigned long precharge; | 4639 | unsigned long precharge; |
4632 | struct vm_area_struct *vma; | 4640 | struct vm_area_struct *vma; |
4633 | 4641 | ||
4634 | down_read(&mm->mmap_sem); | 4642 | /* We've already held the mmap_sem */ |
4635 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | 4643 | for (vma = mm->mmap; vma; vma = vma->vm_next) { |
4636 | struct mm_walk mem_cgroup_count_precharge_walk = { | 4644 | struct mm_walk mem_cgroup_count_precharge_walk = { |
4637 | .pmd_entry = mem_cgroup_count_precharge_pte_range, | 4645 | .pmd_entry = mem_cgroup_count_precharge_pte_range, |
@@ -4643,7 +4651,6 @@ static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm) | |||
4643 | walk_page_range(vma->vm_start, vma->vm_end, | 4651 | walk_page_range(vma->vm_start, vma->vm_end, |
4644 | &mem_cgroup_count_precharge_walk); | 4652 | &mem_cgroup_count_precharge_walk); |
4645 | } | 4653 | } |
4646 | up_read(&mm->mmap_sem); | ||
4647 | 4654 | ||
4648 | precharge = mc.precharge; | 4655 | precharge = mc.precharge; |
4649 | mc.precharge = 0; | 4656 | mc.precharge = 0; |
@@ -4694,11 +4701,16 @@ static void mem_cgroup_clear_mc(void) | |||
4694 | 4701 | ||
4695 | mc.moved_swap = 0; | 4702 | mc.moved_swap = 0; |
4696 | } | 4703 | } |
4704 | if (mc.mm) { | ||
4705 | up_read(&mc.mm->mmap_sem); | ||
4706 | mmput(mc.mm); | ||
4707 | } | ||
4697 | spin_lock(&mc.lock); | 4708 | spin_lock(&mc.lock); |
4698 | mc.from = NULL; | 4709 | mc.from = NULL; |
4699 | mc.to = NULL; | 4710 | mc.to = NULL; |
4700 | mc.moving_task = NULL; | ||
4701 | spin_unlock(&mc.lock); | 4711 | spin_unlock(&mc.lock); |
4712 | mc.moving_task = NULL; | ||
4713 | mc.mm = NULL; | ||
4702 | mem_cgroup_end_move(from); | 4714 | mem_cgroup_end_move(from); |
4703 | memcg_oom_recover(from); | 4715 | memcg_oom_recover(from); |
4704 | memcg_oom_recover(to); | 4716 | memcg_oom_recover(to); |
@@ -4724,12 +4736,21 @@ static int mem_cgroup_can_attach(struct cgroup_subsys *ss, | |||
4724 | return 0; | 4736 | return 0; |
4725 | /* We move charges only when we move a owner of the mm */ | 4737 | /* We move charges only when we move a owner of the mm */ |
4726 | if (mm->owner == p) { | 4738 | if (mm->owner == p) { |
4739 | /* | ||
4740 | * We do all the move charge works under one mmap_sem to | ||
4741 | * avoid deadlock with down_write(&mmap_sem) | ||
4742 | * -> try_charge() -> if (mc.moving_task) -> sleep. | ||
4743 | */ | ||
4744 | down_read(&mm->mmap_sem); | ||
4745 | |||
4727 | VM_BUG_ON(mc.from); | 4746 | VM_BUG_ON(mc.from); |
4728 | VM_BUG_ON(mc.to); | 4747 | VM_BUG_ON(mc.to); |
4729 | VM_BUG_ON(mc.precharge); | 4748 | VM_BUG_ON(mc.precharge); |
4730 | VM_BUG_ON(mc.moved_charge); | 4749 | VM_BUG_ON(mc.moved_charge); |
4731 | VM_BUG_ON(mc.moved_swap); | 4750 | VM_BUG_ON(mc.moved_swap); |
4732 | VM_BUG_ON(mc.moving_task); | 4751 | VM_BUG_ON(mc.moving_task); |
4752 | VM_BUG_ON(mc.mm); | ||
4753 | |||
4733 | mem_cgroup_start_move(from); | 4754 | mem_cgroup_start_move(from); |
4734 | spin_lock(&mc.lock); | 4755 | spin_lock(&mc.lock); |
4735 | mc.from = from; | 4756 | mc.from = from; |
@@ -4737,14 +4758,16 @@ static int mem_cgroup_can_attach(struct cgroup_subsys *ss, | |||
4737 | mc.precharge = 0; | 4758 | mc.precharge = 0; |
4738 | mc.moved_charge = 0; | 4759 | mc.moved_charge = 0; |
4739 | mc.moved_swap = 0; | 4760 | mc.moved_swap = 0; |
4740 | mc.moving_task = current; | ||
4741 | spin_unlock(&mc.lock); | 4761 | spin_unlock(&mc.lock); |
4762 | mc.moving_task = current; | ||
4763 | mc.mm = mm; | ||
4742 | 4764 | ||
4743 | ret = mem_cgroup_precharge_mc(mm); | 4765 | ret = mem_cgroup_precharge_mc(mm); |
4744 | if (ret) | 4766 | if (ret) |
4745 | mem_cgroup_clear_mc(); | 4767 | mem_cgroup_clear_mc(); |
4746 | } | 4768 | /* We call up_read() and mmput() in clear_mc(). */ |
4747 | mmput(mm); | 4769 | } else |
4770 | mmput(mm); | ||
4748 | } | 4771 | } |
4749 | return ret; | 4772 | return ret; |
4750 | } | 4773 | } |
@@ -4832,7 +4855,7 @@ static void mem_cgroup_move_charge(struct mm_struct *mm) | |||
4832 | struct vm_area_struct *vma; | 4855 | struct vm_area_struct *vma; |
4833 | 4856 | ||
4834 | lru_add_drain_all(); | 4857 | lru_add_drain_all(); |
4835 | down_read(&mm->mmap_sem); | 4858 | /* We've already held the mmap_sem */ |
4836 | for (vma = mm->mmap; vma; vma = vma->vm_next) { | 4859 | for (vma = mm->mmap; vma; vma = vma->vm_next) { |
4837 | int ret; | 4860 | int ret; |
4838 | struct mm_walk mem_cgroup_move_charge_walk = { | 4861 | struct mm_walk mem_cgroup_move_charge_walk = { |
@@ -4851,7 +4874,6 @@ static void mem_cgroup_move_charge(struct mm_struct *mm) | |||
4851 | */ | 4874 | */ |
4852 | break; | 4875 | break; |
4853 | } | 4876 | } |
4854 | up_read(&mm->mmap_sem); | ||
4855 | } | 4877 | } |
4856 | 4878 | ||
4857 | static void mem_cgroup_move_task(struct cgroup_subsys *ss, | 4879 | static void mem_cgroup_move_task(struct cgroup_subsys *ss, |
@@ -4860,17 +4882,11 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss, | |||
4860 | struct task_struct *p, | 4882 | struct task_struct *p, |
4861 | bool threadgroup) | 4883 | bool threadgroup) |
4862 | { | 4884 | { |
4863 | struct mm_struct *mm; | 4885 | if (!mc.mm) |
4864 | |||
4865 | if (!mc.to) | ||
4866 | /* no need to move charge */ | 4886 | /* no need to move charge */ |
4867 | return; | 4887 | return; |
4868 | 4888 | ||
4869 | mm = get_task_mm(p); | 4889 | mem_cgroup_move_charge(mc.mm); |
4870 | if (mm) { | ||
4871 | mem_cgroup_move_charge(mm); | ||
4872 | mmput(mm); | ||
4873 | } | ||
4874 | mem_cgroup_clear_mc(); | 4890 | mem_cgroup_clear_mc(); |
4875 | } | 4891 | } |
4876 | #else /* !CONFIG_MMU */ | 4892 | #else /* !CONFIG_MMU */ |
@@ -4911,10 +4927,20 @@ struct cgroup_subsys mem_cgroup_subsys = { | |||
4911 | }; | 4927 | }; |
4912 | 4928 | ||
4913 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP | 4929 | #ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP |
4930 | static int __init enable_swap_account(char *s) | ||
4931 | { | ||
4932 | /* consider enabled if no parameter or 1 is given */ | ||
4933 | if (!s || !strcmp(s, "1")) | ||
4934 | really_do_swap_account = 1; | ||
4935 | else if (!strcmp(s, "0")) | ||
4936 | really_do_swap_account = 0; | ||
4937 | return 1; | ||
4938 | } | ||
4939 | __setup("swapaccount", enable_swap_account); | ||
4914 | 4940 | ||
4915 | static int __init disable_swap_account(char *s) | 4941 | static int __init disable_swap_account(char *s) |
4916 | { | 4942 | { |
4917 | really_do_swap_account = 0; | 4943 | enable_swap_account("0"); |
4918 | return 1; | 4944 | return 1; |
4919 | } | 4945 | } |
4920 | __setup("noswapaccount", disable_swap_account); | 4946 | __setup("noswapaccount", disable_swap_account); |
diff --git a/mm/nommu.c b/mm/nommu.c index 3613517c7592..27a9ac588516 100644 --- a/mm/nommu.c +++ b/mm/nommu.c | |||
@@ -1717,6 +1717,7 @@ void exit_mmap(struct mm_struct *mm) | |||
1717 | mm->mmap = vma->vm_next; | 1717 | mm->mmap = vma->vm_next; |
1718 | delete_vma_from_mm(vma); | 1718 | delete_vma_from_mm(vma); |
1719 | delete_vma(mm, vma); | 1719 | delete_vma(mm, vma); |
1720 | cond_resched(); | ||
1720 | } | 1721 | } |
1721 | 1722 | ||
1722 | kleave(""); | 1723 | kleave(""); |
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 07a654486f75..e4092704c1a9 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c | |||
@@ -3008,14 +3008,6 @@ static __init_refok int __build_all_zonelists(void *data) | |||
3008 | build_zonelist_cache(pgdat); | 3008 | build_zonelist_cache(pgdat); |
3009 | } | 3009 | } |
3010 | 3010 | ||
3011 | #ifdef CONFIG_MEMORY_HOTPLUG | ||
3012 | /* Setup real pagesets for the new zone */ | ||
3013 | if (data) { | ||
3014 | struct zone *zone = data; | ||
3015 | setup_zone_pageset(zone); | ||
3016 | } | ||
3017 | #endif | ||
3018 | |||
3019 | /* | 3011 | /* |
3020 | * Initialize the boot_pagesets that are going to be used | 3012 | * Initialize the boot_pagesets that are going to be used |
3021 | * for bootstrapping processors. The real pagesets for | 3013 | * for bootstrapping processors. The real pagesets for |
@@ -3064,7 +3056,11 @@ void build_all_zonelists(void *data) | |||
3064 | } else { | 3056 | } else { |
3065 | /* we have to stop all cpus to guarantee there is no user | 3057 | /* we have to stop all cpus to guarantee there is no user |
3066 | of zonelist */ | 3058 | of zonelist */ |
3067 | stop_machine(__build_all_zonelists, data, NULL); | 3059 | #ifdef CONFIG_MEMORY_HOTPLUG |
3060 | if (data) | ||
3061 | setup_zone_pageset((struct zone *)data); | ||
3062 | #endif | ||
3063 | stop_machine(__build_all_zonelists, NULL, NULL); | ||
3068 | /* cpuset refresh routine should be here */ | 3064 | /* cpuset refresh routine should be here */ |
3069 | } | 3065 | } |
3070 | vm_total_pages = nr_free_pagecache_pages(); | 3066 | vm_total_pages = nr_free_pagecache_pages(); |
diff --git a/mm/pagewalk.c b/mm/pagewalk.c index 8b1a2ce21ee5..38cc58b8b2b0 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c | |||
@@ -139,7 +139,6 @@ int walk_page_range(unsigned long addr, unsigned long end, | |||
139 | pgd_t *pgd; | 139 | pgd_t *pgd; |
140 | unsigned long next; | 140 | unsigned long next; |
141 | int err = 0; | 141 | int err = 0; |
142 | struct vm_area_struct *vma; | ||
143 | 142 | ||
144 | if (addr >= end) | 143 | if (addr >= end) |
145 | return err; | 144 | return err; |
@@ -149,15 +148,17 @@ int walk_page_range(unsigned long addr, unsigned long end, | |||
149 | 148 | ||
150 | pgd = pgd_offset(walk->mm, addr); | 149 | pgd = pgd_offset(walk->mm, addr); |
151 | do { | 150 | do { |
151 | struct vm_area_struct *uninitialized_var(vma); | ||
152 | |||
152 | next = pgd_addr_end(addr, end); | 153 | next = pgd_addr_end(addr, end); |
153 | 154 | ||
155 | #ifdef CONFIG_HUGETLB_PAGE | ||
154 | /* | 156 | /* |
155 | * handle hugetlb vma individually because pagetable walk for | 157 | * handle hugetlb vma individually because pagetable walk for |
156 | * the hugetlb page is dependent on the architecture and | 158 | * the hugetlb page is dependent on the architecture and |
157 | * we can't handled it in the same manner as non-huge pages. | 159 | * we can't handled it in the same manner as non-huge pages. |
158 | */ | 160 | */ |
159 | vma = find_vma(walk->mm, addr); | 161 | vma = find_vma(walk->mm, addr); |
160 | #ifdef CONFIG_HUGETLB_PAGE | ||
161 | if (vma && is_vm_hugetlb_page(vma)) { | 162 | if (vma && is_vm_hugetlb_page(vma)) { |
162 | if (vma->vm_end < next) | 163 | if (vma->vm_end < next) |
163 | next = vma->vm_end; | 164 | next = vma->vm_end; |