diff options
| author | Johannes Weiner <hannes@cmpxchg.org> | 2013-10-31 19:34:15 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-10-31 19:58:13 -0400 |
| commit | 696ac172fffa653dca401bb2b0cad91cf2ce453f (patch) | |
| tree | fec0af0d449705ddfd6145c02cf0420579608d9f | |
| parent | 0056f4e66a1b8f00245248877e80386af36af14c (diff) | |
mm: memcg: fix test for child groups
When memcg code needs to know whether any given memcg has children, it
uses the cgroup child iteration primitives and returns true/false
depending on whether the iteration loop is executed at least once or
not.
Because a cgroup's list of children is RCU protected, these primitives
require the RCU read-lock to be held, which is not the case for all
memcg callers. This results in the following splat when e.g. enabling
hierarchy mode:
WARNING: CPU: 3 PID: 1 at kernel/cgroup.c:3043 css_next_child+0xa3/0x160()
CPU: 3 PID: 1 Comm: systemd Not tainted 3.12.0-rc5-00117-g83f11a9-dirty #18
Hardware name: LENOVO 3680B56/3680B56, BIOS 6QET69WW (1.39 ) 04/26/2012
Call Trace:
dump_stack+0x54/0x74
warn_slowpath_common+0x78/0xa0
warn_slowpath_null+0x1a/0x20
css_next_child+0xa3/0x160
mem_cgroup_hierarchy_write+0x5b/0xa0
cgroup_file_write+0x108/0x2a0
vfs_write+0xbd/0x1e0
SyS_write+0x4c/0xa0
system_call_fastpath+0x16/0x1b
In the memcg case, we only care about children when we are attempting to
modify inheritable attributes interactively. Racing with deletion could
mean a spurious -EBUSY, no problem. Racing with addition is handled
just fine as well through the memcg_create_mutex: if the child group is
not on the list after the mutex is acquired, it won't be initialized
from the parent's attributes until after the unlock.
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
| -rw-r--r-- | mm/memcontrol.c | 35 |
1 files changed, 11 insertions, 24 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 7e11cb7d75b1..e63278222be5 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
| @@ -4959,31 +4959,18 @@ static void mem_cgroup_reparent_charges(struct mem_cgroup *memcg) | |||
| 4959 | } while (usage > 0); | 4959 | } while (usage > 0); |
| 4960 | } | 4960 | } |
| 4961 | 4961 | ||
| 4962 | /* | ||
| 4963 | * This mainly exists for tests during the setting of set of use_hierarchy. | ||
| 4964 | * Since this is the very setting we are changing, the current hierarchy value | ||
| 4965 | * is meaningless | ||
| 4966 | */ | ||
| 4967 | static inline bool __memcg_has_children(struct mem_cgroup *memcg) | ||
| 4968 | { | ||
| 4969 | struct cgroup_subsys_state *pos; | ||
| 4970 | |||
| 4971 | /* bounce at first found */ | ||
| 4972 | css_for_each_child(pos, &memcg->css) | ||
| 4973 | return true; | ||
| 4974 | return false; | ||
| 4975 | } | ||
| 4976 | |||
| 4977 | /* | ||
| 4978 | * Must be called with memcg_create_mutex held, unless the cgroup is guaranteed | ||
| 4979 | * to be already dead (as in mem_cgroup_force_empty, for instance). This is | ||
| 4980 | * from mem_cgroup_count_children(), in the sense that we don't really care how | ||
| 4981 | * many children we have; we only need to know if we have any. It also counts | ||
| 4982 | * any memcg without hierarchy as infertile. | ||
| 4983 | */ | ||
| 4984 | static inline bool memcg_has_children(struct mem_cgroup *memcg) | 4962 | static inline bool memcg_has_children(struct mem_cgroup *memcg) |
| 4985 | { | 4963 | { |
| 4986 | return memcg->use_hierarchy && __memcg_has_children(memcg); | 4964 | lockdep_assert_held(&memcg_create_mutex); |
| 4965 | /* | ||
| 4966 | * The lock does not prevent addition or deletion to the list | ||
| 4967 | * of children, but it prevents a new child from being | ||
| 4968 | * initialized based on this parent in css_online(), so it's | ||
| 4969 | * enough to decide whether hierarchically inherited | ||
| 4970 | * attributes can still be changed or not. | ||
| 4971 | */ | ||
| 4972 | return memcg->use_hierarchy && | ||
| 4973 | !list_empty(&memcg->css.cgroup->children); | ||
| 4987 | } | 4974 | } |
| 4988 | 4975 | ||
| 4989 | /* | 4976 | /* |
| @@ -5063,7 +5050,7 @@ static int mem_cgroup_hierarchy_write(struct cgroup_subsys_state *css, | |||
| 5063 | */ | 5050 | */ |
| 5064 | if ((!parent_memcg || !parent_memcg->use_hierarchy) && | 5051 | if ((!parent_memcg || !parent_memcg->use_hierarchy) && |
| 5065 | (val == 1 || val == 0)) { | 5052 | (val == 1 || val == 0)) { |
| 5066 | if (!__memcg_has_children(memcg)) | 5053 | if (list_empty(&memcg->css.cgroup->children)) |
| 5067 | memcg->use_hierarchy = val; | 5054 | memcg->use_hierarchy = val; |
| 5068 | else | 5055 | else |
| 5069 | retval = -EBUSY; | 5056 | retval = -EBUSY; |
