aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
authorGlauber Costa <glommer@parallels.com>2013-02-22 19:34:50 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2013-02-23 20:50:18 -0500
commitee5e8472b8df54fc00031d3b1089d5be50fb2ce0 (patch)
tree5755ebac46f9bdf42becbad88a17a1ad28dc9cd9 /mm/memcontrol.c
parent45cf7ebd5a03317eb825c9dcb8599750d8b16145 (diff)
memcg: prevent changes to move_charge_at_immigrate during task attach
In memcg, we use the cgroup_lock basically to synchronize against attaching new children to a cgroup. We do this because we rely on cgroup core to provide us with this information. We need to guarantee that upon child creation, our tunables are consistent. For those, the calls to cgroup_lock() all live in handlers like mem_cgroup_hierarchy_write(), where we change a tunable in the group that is hierarchy-related. For instance, the use_hierarchy flag cannot be changed if the cgroup already have children. Furthermore, those values are propagated from the parent to the child when a new child is created. So if we don't lock like this, we can end up with the following situation: A B memcg_css_alloc() mem_cgroup_hierarchy_write() copy use hierarchy from parent change use hierarchy in parent finish creation. This is mainly because during create, we are still not fully connected to the css tree. So all iterators and the such that we could use, will fail to show that the group has children. My observation is that all of creation can proceed in parallel with those tasks, except value assignment. So what this patch series does is to first move all value assignment that is dependent on parent values from css_alloc to css_online, where the iterators all work, and then we lock only the value assignment. This will guarantee that parent and children always have consistent values. Together with an online test, that can be derived from the observation that the refcount of an online memcg can be made to be always positive, we should be able to synchronize our side without the cgroup lock. This patch: Currently, we rely on the cgroup_lock() to prevent changes to move_charge_at_immigrate during task migration. However, this is only needed because the current strategy keeps checking this value throughout the whole process. Since all we need is serialization, one needs only to guarantee that whatever decision we made in the beginning of a specific migration is respected throughout the process. We can achieve this by just saving it in mc. By doing this, no kind of locking is needed. Signed-off-by: Glauber Costa <glommer@parallels.com> Acked-by: Michal Hocko <mhocko@suse.cz> Cc: Tejun Heo <tj@kernel.org> Cc: Hiroyuki Kamezawa <kamezawa.hiroyuki@gmail.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c32
1 files changed, 19 insertions, 13 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ae0433885b69..8e0988fee888 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -416,8 +416,8 @@ static bool memcg_kmem_test_and_clear_dead(struct mem_cgroup *memcg)
416 416
417/* Stuffs for move charges at task migration. */ 417/* Stuffs for move charges at task migration. */
418/* 418/*
419 * Types of charges to be moved. "move_charge_at_immitgrate" is treated as a 419 * Types of charges to be moved. "move_charge_at_immitgrate" and
420 * left-shifted bitmap of these types. 420 * "immigrate_flags" are treated as a left-shifted bitmap of these types.
421 */ 421 */
422enum move_type { 422enum move_type {
423 MOVE_CHARGE_TYPE_ANON, /* private anonymous page and swap of it */ 423 MOVE_CHARGE_TYPE_ANON, /* private anonymous page and swap of it */
@@ -430,6 +430,7 @@ static struct move_charge_struct {
430 spinlock_t lock; /* for from, to */ 430 spinlock_t lock; /* for from, to */
431 struct mem_cgroup *from; 431 struct mem_cgroup *from;
432 struct mem_cgroup *to; 432 struct mem_cgroup *to;
433 unsigned long immigrate_flags;
433 unsigned long precharge; 434 unsigned long precharge;
434 unsigned long moved_charge; 435 unsigned long moved_charge;
435 unsigned long moved_swap; 436 unsigned long moved_swap;
@@ -442,14 +443,12 @@ static struct move_charge_struct {
442 443
443static bool move_anon(void) 444static bool move_anon(void)
444{ 445{
445 return test_bit(MOVE_CHARGE_TYPE_ANON, 446 return test_bit(MOVE_CHARGE_TYPE_ANON, &mc.immigrate_flags);
446 &mc.to->move_charge_at_immigrate);
447} 447}
448 448
449static bool move_file(void) 449static bool move_file(void)
450{ 450{
451 return test_bit(MOVE_CHARGE_TYPE_FILE, 451 return test_bit(MOVE_CHARGE_TYPE_FILE, &mc.immigrate_flags);
452 &mc.to->move_charge_at_immigrate);
453} 452}
454 453
455/* 454/*
@@ -5193,15 +5192,14 @@ static int mem_cgroup_move_charge_write(struct cgroup *cgrp,
5193 5192
5194 if (val >= (1 << NR_MOVE_TYPE)) 5193 if (val >= (1 << NR_MOVE_TYPE))
5195 return -EINVAL; 5194 return -EINVAL;
5195
5196 /* 5196 /*
5197 * We check this value several times in both in can_attach() and 5197 * No kind of locking is needed in here, because ->can_attach() will
5198 * attach(), so we need cgroup lock to prevent this value from being 5198 * check this value once in the beginning of the process, and then carry
5199 * inconsistent. 5199 * on with stale data. This means that changes to this value will only
5200 * affect task migrations starting after the change.
5200 */ 5201 */
5201 cgroup_lock();
5202 memcg->move_charge_at_immigrate = val; 5202 memcg->move_charge_at_immigrate = val;
5203 cgroup_unlock();
5204
5205 return 0; 5203 return 0;
5206} 5204}
5207#else 5205#else
@@ -6559,8 +6557,15 @@ static int mem_cgroup_can_attach(struct cgroup *cgroup,
6559 struct task_struct *p = cgroup_taskset_first(tset); 6557 struct task_struct *p = cgroup_taskset_first(tset);
6560 int ret = 0; 6558 int ret = 0;
6561 struct mem_cgroup *memcg = mem_cgroup_from_cont(cgroup); 6559 struct mem_cgroup *memcg = mem_cgroup_from_cont(cgroup);
6560 unsigned long move_charge_at_immigrate;
6562 6561
6563 if (memcg->move_charge_at_immigrate) { 6562 /*
6563 * We are now commited to this value whatever it is. Changes in this
6564 * tunable will only affect upcoming migrations, not the current one.
6565 * So we need to save it, and keep it going.
6566 */
6567 move_charge_at_immigrate = memcg->move_charge_at_immigrate;
6568 if (move_charge_at_immigrate) {
6564 struct mm_struct *mm; 6569 struct mm_struct *mm;
6565 struct mem_cgroup *from = mem_cgroup_from_task(p); 6570 struct mem_cgroup *from = mem_cgroup_from_task(p);
6566 6571
@@ -6580,6 +6585,7 @@ static int mem_cgroup_can_attach(struct cgroup *cgroup,
6580 spin_lock(&mc.lock); 6585 spin_lock(&mc.lock);
6581 mc.from = from; 6586 mc.from = from;
6582 mc.to = memcg; 6587 mc.to = memcg;
6588 mc.immigrate_flags = move_charge_at_immigrate;
6583 spin_unlock(&mc.lock); 6589 spin_unlock(&mc.lock);
6584 /* We set mc.moving_task later */ 6590 /* We set mc.moving_task later */
6585 6591