diff options
author | Johannes Weiner <hannes@cmpxchg.org> | 2013-10-31 19:34:15 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-10-31 19:58:13 -0400 |
commit | 696ac172fffa653dca401bb2b0cad91cf2ce453f (patch) | |
tree | fec0af0d449705ddfd6145c02cf0420579608d9f /mm/memcontrol.c | |
parent | 0056f4e66a1b8f00245248877e80386af36af14c (diff) |
mm: memcg: fix test for child groups
When memcg code needs to know whether any given memcg has children, it
uses the cgroup child iteration primitives and returns true/false
depending on whether the iteration loop is executed at least once or
not.
Because a cgroup's list of children is RCU protected, these primitives
require the RCU read-lock to be held, which is not the case for all
memcg callers. This results in the following splat when e.g. enabling
hierarchy mode:
WARNING: CPU: 3 PID: 1 at kernel/cgroup.c:3043 css_next_child+0xa3/0x160()
CPU: 3 PID: 1 Comm: systemd Not tainted 3.12.0-rc5-00117-g83f11a9-dirty #18
Hardware name: LENOVO 3680B56/3680B56, BIOS 6QET69WW (1.39 ) 04/26/2012
Call Trace:
dump_stack+0x54/0x74
warn_slowpath_common+0x78/0xa0
warn_slowpath_null+0x1a/0x20
css_next_child+0xa3/0x160
mem_cgroup_hierarchy_write+0x5b/0xa0
cgroup_file_write+0x108/0x2a0
vfs_write+0xbd/0x1e0
SyS_write+0x4c/0xa0
system_call_fastpath+0x16/0x1b
In the memcg case, we only care about children when we are attempting to
modify inheritable attributes interactively. Racing with deletion could
mean a spurious -EBUSY, no problem. Racing with addition is handled
just fine as well through the memcg_create_mutex: if the child group is
not on the list after the mutex is acquired, it won't be initialized
from the parent's attributes until after the unlock.
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 35 |
1 files changed, 11 insertions, 24 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 7e11cb7d75b1..e63278222be5 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -4959,31 +4959,18 @@ static void mem_cgroup_reparent_charges(struct mem_cgroup *memcg) | |||
4959 | } while (usage > 0); | 4959 | } while (usage > 0); |
4960 | } | 4960 | } |
4961 | 4961 | ||
4962 | /* | ||
4963 | * This mainly exists for tests during the setting of set of use_hierarchy. | ||
4964 | * Since this is the very setting we are changing, the current hierarchy value | ||
4965 | * is meaningless | ||
4966 | */ | ||
4967 | static inline bool __memcg_has_children(struct mem_cgroup *memcg) | ||
4968 | { | ||
4969 | struct cgroup_subsys_state *pos; | ||
4970 | |||
4971 | /* bounce at first found */ | ||
4972 | css_for_each_child(pos, &memcg->css) | ||
4973 | return true; | ||
4974 | return false; | ||
4975 | } | ||
4976 | |||
4977 | /* | ||
4978 | * Must be called with memcg_create_mutex held, unless the cgroup is guaranteed | ||
4979 | * to be already dead (as in mem_cgroup_force_empty, for instance). This is | ||
4980 | * from mem_cgroup_count_children(), in the sense that we don't really care how | ||
4981 | * many children we have; we only need to know if we have any. It also counts | ||
4982 | * any memcg without hierarchy as infertile. | ||
4983 | */ | ||
4984 | static inline bool memcg_has_children(struct mem_cgroup *memcg) | 4962 | static inline bool memcg_has_children(struct mem_cgroup *memcg) |
4985 | { | 4963 | { |
4986 | return memcg->use_hierarchy && __memcg_has_children(memcg); | 4964 | lockdep_assert_held(&memcg_create_mutex); |
4965 | /* | ||
4966 | * The lock does not prevent addition or deletion to the list | ||
4967 | * of children, but it prevents a new child from being | ||
4968 | * initialized based on this parent in css_online(), so it's | ||
4969 | * enough to decide whether hierarchically inherited | ||
4970 | * attributes can still be changed or not. | ||
4971 | */ | ||
4972 | return memcg->use_hierarchy && | ||
4973 | !list_empty(&memcg->css.cgroup->children); | ||
4987 | } | 4974 | } |
4988 | 4975 | ||
4989 | /* | 4976 | /* |
@@ -5063,7 +5050,7 @@ static int mem_cgroup_hierarchy_write(struct cgroup_subsys_state *css, | |||
5063 | */ | 5050 | */ |
5064 | if ((!parent_memcg || !parent_memcg->use_hierarchy) && | 5051 | if ((!parent_memcg || !parent_memcg->use_hierarchy) && |
5065 | (val == 1 || val == 0)) { | 5052 | (val == 1 || val == 0)) { |
5066 | if (!__memcg_has_children(memcg)) | 5053 | if (list_empty(&memcg->css.cgroup->children)) |
5067 | memcg->use_hierarchy = val; | 5054 | memcg->use_hierarchy = val; |
5068 | else | 5055 | else |
5069 | retval = -EBUSY; | 5056 | retval = -EBUSY; |