aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c85
1 files changed, 55 insertions, 30 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 2efa8ea07ff..00bb8a64d02 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -61,7 +61,14 @@ struct mem_cgroup *root_mem_cgroup __read_mostly;
61#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP 61#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
62/* Turned on only when memory cgroup is enabled && really_do_swap_account = 1 */ 62/* Turned on only when memory cgroup is enabled && really_do_swap_account = 1 */
63int do_swap_account __read_mostly; 63int do_swap_account __read_mostly;
64static int really_do_swap_account __initdata = 1; /* for remember boot option*/ 64
65/* for remember boot option*/
66#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP_ENABLED
67static int really_do_swap_account __initdata = 1;
68#else
69static int really_do_swap_account __initdata = 0;
70#endif
71
65#else 72#else
66#define do_swap_account (0) 73#define do_swap_account (0)
67#endif 74#endif
@@ -278,13 +285,14 @@ enum move_type {
278 285
279/* "mc" and its members are protected by cgroup_mutex */ 286/* "mc" and its members are protected by cgroup_mutex */
280static struct move_charge_struct { 287static struct move_charge_struct {
281 spinlock_t lock; /* for from, to, moving_task */ 288 spinlock_t lock; /* for from, to */
282 struct mem_cgroup *from; 289 struct mem_cgroup *from;
283 struct mem_cgroup *to; 290 struct mem_cgroup *to;
284 unsigned long precharge; 291 unsigned long precharge;
285 unsigned long moved_charge; 292 unsigned long moved_charge;
286 unsigned long moved_swap; 293 unsigned long moved_swap;
287 struct task_struct *moving_task; /* a task moving charges */ 294 struct task_struct *moving_task; /* a task moving charges */
295 struct mm_struct *mm;
288 wait_queue_head_t waitq; /* a waitq for other context */ 296 wait_queue_head_t waitq; /* a waitq for other context */
289} mc = { 297} mc = {
290 .lock = __SPIN_LOCK_UNLOCKED(mc.lock), 298 .lock = __SPIN_LOCK_UNLOCKED(mc.lock),
@@ -1917,19 +1925,18 @@ again:
1917 1925
1918 rcu_read_lock(); 1926 rcu_read_lock();
1919 p = rcu_dereference(mm->owner); 1927 p = rcu_dereference(mm->owner);
1920 VM_BUG_ON(!p);
1921 /* 1928 /*
1922 * because we don't have task_lock(), "p" can exit while 1929 * Because we don't have task_lock(), "p" can exit.
1923 * we're here. In that case, "mem" can point to root 1930 * In that case, "mem" can point to root or p can be NULL with
1924 * cgroup but never be NULL. (and task_struct itself is freed 1931 * race with swapoff. Then, we have small risk of mis-accouning.
1925 * by RCU, cgroup itself is RCU safe.) Then, we have small 1932 * But such kind of mis-account by race always happens because
1926 * risk here to get wrong cgroup. But such kind of mis-account 1933 * we don't have cgroup_mutex(). It's overkill and we allo that
1927 * by race always happens because we don't have cgroup_mutex(). 1934 * small race, here.
1928 * It's overkill and we allow that small race, here. 1935 * (*) swapoff at el will charge against mm-struct not against
1936 * task-struct. So, mm->owner can be NULL.
1929 */ 1937 */
1930 mem = mem_cgroup_from_task(p); 1938 mem = mem_cgroup_from_task(p);
1931 VM_BUG_ON(!mem); 1939 if (!mem || mem_cgroup_is_root(mem)) {
1932 if (mem_cgroup_is_root(mem)) {
1933 rcu_read_unlock(); 1940 rcu_read_unlock();
1934 goto done; 1941 goto done;
1935 } 1942 }
@@ -2152,7 +2159,7 @@ static void __mem_cgroup_move_account(struct page_cgroup *pc,
2152{ 2159{
2153 VM_BUG_ON(from == to); 2160 VM_BUG_ON(from == to);
2154 VM_BUG_ON(PageLRU(pc->page)); 2161 VM_BUG_ON(PageLRU(pc->page));
2155 VM_BUG_ON(!PageCgroupLocked(pc)); 2162 VM_BUG_ON(!page_is_cgroup_locked(pc));
2156 VM_BUG_ON(!PageCgroupUsed(pc)); 2163 VM_BUG_ON(!PageCgroupUsed(pc));
2157 VM_BUG_ON(pc->mem_cgroup != from); 2164 VM_BUG_ON(pc->mem_cgroup != from);
2158 2165
@@ -4631,7 +4638,7 @@ static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm)
4631 unsigned long precharge; 4638 unsigned long precharge;
4632 struct vm_area_struct *vma; 4639 struct vm_area_struct *vma;
4633 4640
4634 down_read(&mm->mmap_sem); 4641 /* We've already held the mmap_sem */
4635 for (vma = mm->mmap; vma; vma = vma->vm_next) { 4642 for (vma = mm->mmap; vma; vma = vma->vm_next) {
4636 struct mm_walk mem_cgroup_count_precharge_walk = { 4643 struct mm_walk mem_cgroup_count_precharge_walk = {
4637 .pmd_entry = mem_cgroup_count_precharge_pte_range, 4644 .pmd_entry = mem_cgroup_count_precharge_pte_range,
@@ -4643,7 +4650,6 @@ static unsigned long mem_cgroup_count_precharge(struct mm_struct *mm)
4643 walk_page_range(vma->vm_start, vma->vm_end, 4650 walk_page_range(vma->vm_start, vma->vm_end,
4644 &mem_cgroup_count_precharge_walk); 4651 &mem_cgroup_count_precharge_walk);
4645 } 4652 }
4646 up_read(&mm->mmap_sem);
4647 4653
4648 precharge = mc.precharge; 4654 precharge = mc.precharge;
4649 mc.precharge = 0; 4655 mc.precharge = 0;
@@ -4694,11 +4700,16 @@ static void mem_cgroup_clear_mc(void)
4694 4700
4695 mc.moved_swap = 0; 4701 mc.moved_swap = 0;
4696 } 4702 }
4703 if (mc.mm) {
4704 up_read(&mc.mm->mmap_sem);
4705 mmput(mc.mm);
4706 }
4697 spin_lock(&mc.lock); 4707 spin_lock(&mc.lock);
4698 mc.from = NULL; 4708 mc.from = NULL;
4699 mc.to = NULL; 4709 mc.to = NULL;
4700 mc.moving_task = NULL;
4701 spin_unlock(&mc.lock); 4710 spin_unlock(&mc.lock);
4711 mc.moving_task = NULL;
4712 mc.mm = NULL;
4702 mem_cgroup_end_move(from); 4713 mem_cgroup_end_move(from);
4703 memcg_oom_recover(from); 4714 memcg_oom_recover(from);
4704 memcg_oom_recover(to); 4715 memcg_oom_recover(to);
@@ -4724,12 +4735,21 @@ static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
4724 return 0; 4735 return 0;
4725 /* We move charges only when we move a owner of the mm */ 4736 /* We move charges only when we move a owner of the mm */
4726 if (mm->owner == p) { 4737 if (mm->owner == p) {
4738 /*
4739 * We do all the move charge works under one mmap_sem to
4740 * avoid deadlock with down_write(&mmap_sem)
4741 * -> try_charge() -> if (mc.moving_task) -> sleep.
4742 */
4743 down_read(&mm->mmap_sem);
4744
4727 VM_BUG_ON(mc.from); 4745 VM_BUG_ON(mc.from);
4728 VM_BUG_ON(mc.to); 4746 VM_BUG_ON(mc.to);
4729 VM_BUG_ON(mc.precharge); 4747 VM_BUG_ON(mc.precharge);
4730 VM_BUG_ON(mc.moved_charge); 4748 VM_BUG_ON(mc.moved_charge);
4731 VM_BUG_ON(mc.moved_swap); 4749 VM_BUG_ON(mc.moved_swap);
4732 VM_BUG_ON(mc.moving_task); 4750 VM_BUG_ON(mc.moving_task);
4751 VM_BUG_ON(mc.mm);
4752
4733 mem_cgroup_start_move(from); 4753 mem_cgroup_start_move(from);
4734 spin_lock(&mc.lock); 4754 spin_lock(&mc.lock);
4735 mc.from = from; 4755 mc.from = from;
@@ -4737,14 +4757,16 @@ static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
4737 mc.precharge = 0; 4757 mc.precharge = 0;
4738 mc.moved_charge = 0; 4758 mc.moved_charge = 0;
4739 mc.moved_swap = 0; 4759 mc.moved_swap = 0;
4740 mc.moving_task = current;
4741 spin_unlock(&mc.lock); 4760 spin_unlock(&mc.lock);
4761 mc.moving_task = current;
4762 mc.mm = mm;
4742 4763
4743 ret = mem_cgroup_precharge_mc(mm); 4764 ret = mem_cgroup_precharge_mc(mm);
4744 if (ret) 4765 if (ret)
4745 mem_cgroup_clear_mc(); 4766 mem_cgroup_clear_mc();
4746 } 4767 /* We call up_read() and mmput() in clear_mc(). */
4747 mmput(mm); 4768 } else
4769 mmput(mm);
4748 } 4770 }
4749 return ret; 4771 return ret;
4750} 4772}
@@ -4832,7 +4854,7 @@ static void mem_cgroup_move_charge(struct mm_struct *mm)
4832 struct vm_area_struct *vma; 4854 struct vm_area_struct *vma;
4833 4855
4834 lru_add_drain_all(); 4856 lru_add_drain_all();
4835 down_read(&mm->mmap_sem); 4857 /* We've already held the mmap_sem */
4836 for (vma = mm->mmap; vma; vma = vma->vm_next) { 4858 for (vma = mm->mmap; vma; vma = vma->vm_next) {
4837 int ret; 4859 int ret;
4838 struct mm_walk mem_cgroup_move_charge_walk = { 4860 struct mm_walk mem_cgroup_move_charge_walk = {
@@ -4851,7 +4873,6 @@ static void mem_cgroup_move_charge(struct mm_struct *mm)
4851 */ 4873 */
4852 break; 4874 break;
4853 } 4875 }
4854 up_read(&mm->mmap_sem);
4855} 4876}
4856 4877
4857static void mem_cgroup_move_task(struct cgroup_subsys *ss, 4878static void mem_cgroup_move_task(struct cgroup_subsys *ss,
@@ -4860,17 +4881,11 @@ static void mem_cgroup_move_task(struct cgroup_subsys *ss,
4860 struct task_struct *p, 4881 struct task_struct *p,
4861 bool threadgroup) 4882 bool threadgroup)
4862{ 4883{
4863 struct mm_struct *mm; 4884 if (!mc.mm)
4864
4865 if (!mc.to)
4866 /* no need to move charge */ 4885 /* no need to move charge */
4867 return; 4886 return;
4868 4887
4869 mm = get_task_mm(p); 4888 mem_cgroup_move_charge(mc.mm);
4870 if (mm) {
4871 mem_cgroup_move_charge(mm);
4872 mmput(mm);
4873 }
4874 mem_cgroup_clear_mc(); 4889 mem_cgroup_clear_mc();
4875} 4890}
4876#else /* !CONFIG_MMU */ 4891#else /* !CONFIG_MMU */
@@ -4911,10 +4926,20 @@ struct cgroup_subsys mem_cgroup_subsys = {
4911}; 4926};
4912 4927
4913#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP 4928#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP
4929static int __init enable_swap_account(char *s)
4930{
4931 /* consider enabled if no parameter or 1 is given */
4932 if (!s || !strcmp(s, "1"))
4933 really_do_swap_account = 1;
4934 else if (!strcmp(s, "0"))
4935 really_do_swap_account = 0;
4936 return 1;
4937}
4938__setup("swapaccount", enable_swap_account);
4914 4939
4915static int __init disable_swap_account(char *s) 4940static int __init disable_swap_account(char *s)
4916{ 4941{
4917 really_do_swap_account = 0; 4942 enable_swap_account("0");
4918 return 1; 4943 return 1;
4919} 4944}
4920__setup("noswapaccount", disable_swap_account); 4945__setup("noswapaccount", disable_swap_account);