aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorDaisuke Nishimura <nishimura@mxp.nes.nec.co.jp>2010-03-10 18:22:16 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2010-03-12 18:52:36 -0500
commit8033b97c9b5ef063e3f4bf2efe1cd0a22093aaff (patch)
tree0d3dbf74d6c307d425d5d13a5c7c1efbf6c079f2 /mm
parent854ffa8d104e44111fec96764c0e0cb29223d54c (diff)
memcg: avoid oom during moving charge
This move-charge-at-task-migration feature has extra charges on "to"(pre-charges) and "from"(left-over charges) during moving charge. This means unnecessary oom can happen. This patch tries to avoid such oom. Signed-off-by: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Cc: Balbir Singh <balbir@linux.vnet.ibm.com> Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Li Zefan <lizf@cn.fujitsu.com> Cc: Paul Menage <menage@google.com> Cc: Daisuke Nishimura <nishimura@mxp.nes.nec.co.jp> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/memcontrol.c53
1 files changed, 51 insertions, 2 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f5fb9917787c..589084f00b70 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -254,7 +254,11 @@ static struct move_charge_struct {
254 struct mem_cgroup *to; 254 struct mem_cgroup *to;
255 unsigned long precharge; 255 unsigned long precharge;
256 unsigned long moved_charge; 256 unsigned long moved_charge;
257} mc; 257 struct task_struct *moving_task; /* a task moving charges */
258 wait_queue_head_t waitq; /* a waitq for other context */
259} mc = {
260 .waitq = __WAIT_QUEUE_HEAD_INITIALIZER(mc.waitq),
261};
258 262
259/* 263/*
260 * Maximum loops in mem_cgroup_hierarchical_reclaim(), used for soft 264 * Maximum loops in mem_cgroup_hierarchical_reclaim(), used for soft
@@ -1508,6 +1512,48 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
1508 if (mem_cgroup_check_under_limit(mem_over_limit)) 1512 if (mem_cgroup_check_under_limit(mem_over_limit))
1509 continue; 1513 continue;
1510 1514
1515 /* try to avoid oom while someone is moving charge */
1516 if (mc.moving_task && current != mc.moving_task) {
1517 struct mem_cgroup *from, *to;
1518 bool do_continue = false;
1519 /*
1520 * There is a small race that "from" or "to" can be
1521 * freed by rmdir, so we use css_tryget().
1522 */
1523 rcu_read_lock();
1524 from = mc.from;
1525 to = mc.to;
1526 if (from && css_tryget(&from->css)) {
1527 if (mem_over_limit->use_hierarchy)
1528 do_continue = css_is_ancestor(
1529 &from->css,
1530 &mem_over_limit->css);
1531 else
1532 do_continue = (from == mem_over_limit);
1533 css_put(&from->css);
1534 }
1535 if (!do_continue && to && css_tryget(&to->css)) {
1536 if (mem_over_limit->use_hierarchy)
1537 do_continue = css_is_ancestor(
1538 &to->css,
1539 &mem_over_limit->css);
1540 else
1541 do_continue = (to == mem_over_limit);
1542 css_put(&to->css);
1543 }
1544 rcu_read_unlock();
1545 if (do_continue) {
1546 DEFINE_WAIT(wait);
1547 prepare_to_wait(&mc.waitq, &wait,
1548 TASK_INTERRUPTIBLE);
1549 /* moving charge context might have finished. */
1550 if (mc.moving_task)
1551 schedule();
1552 finish_wait(&mc.waitq, &wait);
1553 continue;
1554 }
1555 }
1556
1511 if (!nr_retries--) { 1557 if (!nr_retries--) {
1512 if (oom) { 1558 if (oom) {
1513 mem_cgroup_out_of_memory(mem_over_limit, gfp_mask); 1559 mem_cgroup_out_of_memory(mem_over_limit, gfp_mask);
@@ -3381,7 +3427,6 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
3381 INIT_WORK(&stock->work, drain_local_stock); 3427 INIT_WORK(&stock->work, drain_local_stock);
3382 } 3428 }
3383 hotcpu_notifier(memcg_stock_cpu_callback, 0); 3429 hotcpu_notifier(memcg_stock_cpu_callback, 0);
3384
3385 } else { 3430 } else {
3386 parent = mem_cgroup_from_cont(cont->parent); 3431 parent = mem_cgroup_from_cont(cont->parent);
3387 mem->use_hierarchy = parent->use_hierarchy; 3432 mem->use_hierarchy = parent->use_hierarchy;
@@ -3641,6 +3686,8 @@ static void mem_cgroup_clear_mc(void)
3641 } 3686 }
3642 mc.from = NULL; 3687 mc.from = NULL;
3643 mc.to = NULL; 3688 mc.to = NULL;
3689 mc.moving_task = NULL;
3690 wake_up_all(&mc.waitq);
3644} 3691}
3645 3692
3646static int mem_cgroup_can_attach(struct cgroup_subsys *ss, 3693static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
@@ -3666,10 +3713,12 @@ static int mem_cgroup_can_attach(struct cgroup_subsys *ss,
3666 VM_BUG_ON(mc.to); 3713 VM_BUG_ON(mc.to);
3667 VM_BUG_ON(mc.precharge); 3714 VM_BUG_ON(mc.precharge);
3668 VM_BUG_ON(mc.moved_charge); 3715 VM_BUG_ON(mc.moved_charge);
3716 VM_BUG_ON(mc.moving_task);
3669 mc.from = from; 3717 mc.from = from;
3670 mc.to = mem; 3718 mc.to = mem;
3671 mc.precharge = 0; 3719 mc.precharge = 0;
3672 mc.moved_charge = 0; 3720 mc.moved_charge = 0;
3721 mc.moving_task = current;
3673 3722
3674 ret = mem_cgroup_precharge_mc(mm); 3723 ret = mem_cgroup_precharge_mc(mm);
3675 if (ret) 3724 if (ret)