aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2016-04-21 19:09:02 -0400
committerTejun Heo <tj@kernel.org>2016-04-25 15:45:14 -0400
commit264a0ae164bc0e9144bebcd25ff030d067b1a878 (patch)
treedde501787be0f898cfc460d6d09fde7da0e0d6ba
parent5cf1cacb49aee39c3e02ae87068fc3c6430659b0 (diff)
memcg: relocate charge moving from ->attach to ->post_attach
Hello, So, this ended up a lot simpler than I originally expected. I tested it lightly and it seems to work fine. Petr, can you please test these two patches w/o the lru drain drop patch and see whether the problem is gone? Thanks. ------ 8< ------ If charge moving is used, memcg performs relabeling of the affected pages from its ->attach callback which is called under both cgroup_threadgroup_rwsem and thus can't create new kthreads. This is fragile as various operations may depend on workqueues making forward progress which relies on the ability to create new kthreads. There's no reason to perform charge moving from ->attach which is deep in the task migration path. Move it to ->post_attach which is called after the actual migration is finished and cgroup_threadgroup_rwsem is dropped. * move_charge_struct->mm is added and ->can_attach is now responsible for pinning and recording the target mm. mem_cgroup_clear_mc() is updated accordingly. This also simplifies mem_cgroup_move_task(). * mem_cgroup_move_task() is now called from ->post_attach instead of ->attach. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Johannes Weiner <hannes@cmpxchg.org> Acked-by: Michal Hocko <mhocko@kernel.org> Debugged-and-tested-by: Petr Mladek <pmladek@suse.com> Reported-by: Cyril Hrubis <chrubis@suse.cz> Reported-by: Johannes Weiner <hannes@cmpxchg.org> Fixes: 1ed1328792ff ("sched, cgroup: replace signal_struct->group_rwsem with a global percpu_rwsem") Cc: <stable@vger.kernel.org> # 4.4+
-rw-r--r--mm/memcontrol.c37
1 files changed, 19 insertions, 18 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 36db05fa8acb..fe787f5c41bd 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -207,6 +207,7 @@ static void mem_cgroup_oom_notify(struct mem_cgroup *memcg);
207/* "mc" and its members are protected by cgroup_mutex */ 207/* "mc" and its members are protected by cgroup_mutex */
208static struct move_charge_struct { 208static struct move_charge_struct {
209 spinlock_t lock; /* for from, to */ 209 spinlock_t lock; /* for from, to */
210 struct mm_struct *mm;
210 struct mem_cgroup *from; 211 struct mem_cgroup *from;
211 struct mem_cgroup *to; 212 struct mem_cgroup *to;
212 unsigned long flags; 213 unsigned long flags;
@@ -4667,6 +4668,8 @@ static void __mem_cgroup_clear_mc(void)
4667 4668
4668static void mem_cgroup_clear_mc(void) 4669static void mem_cgroup_clear_mc(void)
4669{ 4670{
4671 struct mm_struct *mm = mc.mm;
4672
4670 /* 4673 /*
4671 * we must clear moving_task before waking up waiters at the end of 4674 * we must clear moving_task before waking up waiters at the end of
4672 * task migration. 4675 * task migration.
@@ -4676,7 +4679,10 @@ static void mem_cgroup_clear_mc(void)
4676 spin_lock(&mc.lock); 4679 spin_lock(&mc.lock);
4677 mc.from = NULL; 4680 mc.from = NULL;
4678 mc.to = NULL; 4681 mc.to = NULL;
4682 mc.mm = NULL;
4679 spin_unlock(&mc.lock); 4683 spin_unlock(&mc.lock);
4684
4685 mmput(mm);
4680} 4686}
4681 4687
4682static int mem_cgroup_can_attach(struct cgroup_taskset *tset) 4688static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
@@ -4733,6 +4739,7 @@ static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
4733 VM_BUG_ON(mc.moved_swap); 4739 VM_BUG_ON(mc.moved_swap);
4734 4740
4735 spin_lock(&mc.lock); 4741 spin_lock(&mc.lock);
4742 mc.mm = mm;
4736 mc.from = from; 4743 mc.from = from;
4737 mc.to = memcg; 4744 mc.to = memcg;
4738 mc.flags = move_flags; 4745 mc.flags = move_flags;
@@ -4742,8 +4749,9 @@ static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
4742 ret = mem_cgroup_precharge_mc(mm); 4749 ret = mem_cgroup_precharge_mc(mm);
4743 if (ret) 4750 if (ret)
4744 mem_cgroup_clear_mc(); 4751 mem_cgroup_clear_mc();
4752 } else {
4753 mmput(mm);
4745 } 4754 }
4746 mmput(mm);
4747 return ret; 4755 return ret;
4748} 4756}
4749 4757
@@ -4852,11 +4860,11 @@ put: /* get_mctgt_type() gets the page */
4852 return ret; 4860 return ret;
4853} 4861}
4854 4862
4855static void mem_cgroup_move_charge(struct mm_struct *mm) 4863static void mem_cgroup_move_charge(void)
4856{ 4864{
4857 struct mm_walk mem_cgroup_move_charge_walk = { 4865 struct mm_walk mem_cgroup_move_charge_walk = {
4858 .pmd_entry = mem_cgroup_move_charge_pte_range, 4866 .pmd_entry = mem_cgroup_move_charge_pte_range,
4859 .mm = mm, 4867 .mm = mc.mm,
4860 }; 4868 };
4861 4869
4862 lru_add_drain_all(); 4870 lru_add_drain_all();
@@ -4868,7 +4876,7 @@ static void mem_cgroup_move_charge(struct mm_struct *mm)
4868 atomic_inc(&mc.from->moving_account); 4876 atomic_inc(&mc.from->moving_account);
4869 synchronize_rcu(); 4877 synchronize_rcu();
4870retry: 4878retry:
4871 if (unlikely(!down_read_trylock(&mm->mmap_sem))) { 4879 if (unlikely(!down_read_trylock(&mc.mm->mmap_sem))) {
4872 /* 4880 /*
4873 * Someone who are holding the mmap_sem might be waiting in 4881 * Someone who are holding the mmap_sem might be waiting in
4874 * waitq. So we cancel all extra charges, wake up all waiters, 4882 * waitq. So we cancel all extra charges, wake up all waiters,
@@ -4885,23 +4893,16 @@ retry:
4885 * additional charge, the page walk just aborts. 4893 * additional charge, the page walk just aborts.
4886 */ 4894 */
4887 walk_page_range(0, ~0UL, &mem_cgroup_move_charge_walk); 4895 walk_page_range(0, ~0UL, &mem_cgroup_move_charge_walk);
4888 up_read(&mm->mmap_sem); 4896 up_read(&mc.mm->mmap_sem);
4889 atomic_dec(&mc.from->moving_account); 4897 atomic_dec(&mc.from->moving_account);
4890} 4898}
4891 4899
4892static void mem_cgroup_move_task(struct cgroup_taskset *tset) 4900static void mem_cgroup_move_task(void)
4893{ 4901{
4894 struct cgroup_subsys_state *css; 4902 if (mc.to) {
4895 struct task_struct *p = cgroup_taskset_first(tset, &css); 4903 mem_cgroup_move_charge();
4896 struct mm_struct *mm = get_task_mm(p);
4897
4898 if (mm) {
4899 if (mc.to)
4900 mem_cgroup_move_charge(mm);
4901 mmput(mm);
4902 }
4903 if (mc.to)
4904 mem_cgroup_clear_mc(); 4904 mem_cgroup_clear_mc();
4905 }
4905} 4906}
4906#else /* !CONFIG_MMU */ 4907#else /* !CONFIG_MMU */
4907static int mem_cgroup_can_attach(struct cgroup_taskset *tset) 4908static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
@@ -4911,7 +4912,7 @@ static int mem_cgroup_can_attach(struct cgroup_taskset *tset)
4911static void mem_cgroup_cancel_attach(struct cgroup_taskset *tset) 4912static void mem_cgroup_cancel_attach(struct cgroup_taskset *tset)
4912{ 4913{
4913} 4914}
4914static void mem_cgroup_move_task(struct cgroup_taskset *tset) 4915static void mem_cgroup_move_task(void)
4915{ 4916{
4916} 4917}
4917#endif 4918#endif
@@ -5195,7 +5196,7 @@ struct cgroup_subsys memory_cgrp_subsys = {
5195 .css_reset = mem_cgroup_css_reset, 5196 .css_reset = mem_cgroup_css_reset,
5196 .can_attach = mem_cgroup_can_attach, 5197 .can_attach = mem_cgroup_can_attach,
5197 .cancel_attach = mem_cgroup_cancel_attach, 5198 .cancel_attach = mem_cgroup_cancel_attach,
5198 .attach = mem_cgroup_move_task, 5199 .post_attach = mem_cgroup_move_task,
5199 .bind = mem_cgroup_bind, 5200 .bind = mem_cgroup_bind,
5200 .dfl_cftypes = memory_files, 5201 .dfl_cftypes = memory_files,
5201 .legacy_cftypes = mem_cgroup_legacy_files, 5202 .legacy_cftypes = mem_cgroup_legacy_files,