aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorJohannes Weiner <hannes@cmpxchg.org>2013-07-03 18:04:51 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-07-03 19:07:40 -0400
commit519ebea3bf6df45439e79c54bda1d9e29fe13a64 (patch)
tree74b52e13824051420a9901499de093c347338471 /mm
parent0f2d4a8e27108ad3b2555396b06392be590fe287 (diff)
mm: memcontrol: factor out reclaim iterator loading and updating
mem_cgroup_iter() is too hard to follow. Factor out the lockless reclaim iterator loading and updating so it's easier to follow the big picture. Also document the iterator invalidation mechanism a bit more extensively. Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Reported-by: Tejun Heo <tj@kernel.org> Reviewed-by: Tejun Heo <tj@kernel.org> Acked-by: Michal Hocko <mhocko@suse.cz> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Glauber Costa <glommer@parallels.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/memcontrol.c86
1 files changed, 57 insertions, 29 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 4748966b1511..2e851f453814 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1148,6 +1148,58 @@ skip_node:
1148 return NULL; 1148 return NULL;
1149} 1149}
1150 1150
1151static void mem_cgroup_iter_invalidate(struct mem_cgroup *root)
1152{
1153 /*
1154 * When a group in the hierarchy below root is destroyed, the
1155 * hierarchy iterator can no longer be trusted since it might
1156 * have pointed to the destroyed group. Invalidate it.
1157 */
1158 atomic_inc(&root->dead_count);
1159}
1160
1161static struct mem_cgroup *
1162mem_cgroup_iter_load(struct mem_cgroup_reclaim_iter *iter,
1163 struct mem_cgroup *root,
1164 int *sequence)
1165{
1166 struct mem_cgroup *position = NULL;
1167 /*
1168 * A cgroup destruction happens in two stages: offlining and
1169 * release. They are separated by a RCU grace period.
1170 *
1171 * If the iterator is valid, we may still race with an
1172 * offlining. The RCU lock ensures the object won't be
1173 * released, tryget will fail if we lost the race.
1174 */
1175 *sequence = atomic_read(&root->dead_count);
1176 if (iter->last_dead_count == *sequence) {
1177 smp_rmb();
1178 position = iter->last_visited;
1179 if (position && !css_tryget(&position->css))
1180 position = NULL;
1181 }
1182 return position;
1183}
1184
1185static void mem_cgroup_iter_update(struct mem_cgroup_reclaim_iter *iter,
1186 struct mem_cgroup *last_visited,
1187 struct mem_cgroup *new_position,
1188 int sequence)
1189{
1190 if (last_visited)
1191 css_put(&last_visited->css);
1192 /*
1193 * We store the sequence count from the time @last_visited was
1194 * loaded successfully instead of rereading it here so that we
1195 * don't lose destruction events in between. We could have
1196 * raced with the destruction of @new_position after all.
1197 */
1198 iter->last_visited = new_position;
1199 smp_wmb();
1200 iter->last_dead_count = sequence;
1201}
1202
1151/** 1203/**
1152 * mem_cgroup_iter - iterate over memory cgroup hierarchy 1204 * mem_cgroup_iter - iterate over memory cgroup hierarchy
1153 * @root: hierarchy root 1205 * @root: hierarchy root
@@ -1171,7 +1223,6 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
1171{ 1223{
1172 struct mem_cgroup *memcg = NULL; 1224 struct mem_cgroup *memcg = NULL;
1173 struct mem_cgroup *last_visited = NULL; 1225 struct mem_cgroup *last_visited = NULL;
1174 unsigned long uninitialized_var(dead_count);
1175 1226
1176 if (mem_cgroup_disabled()) 1227 if (mem_cgroup_disabled())
1177 return NULL; 1228 return NULL;
@@ -1191,6 +1242,7 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
1191 rcu_read_lock(); 1242 rcu_read_lock();
1192 while (!memcg) { 1243 while (!memcg) {
1193 struct mem_cgroup_reclaim_iter *uninitialized_var(iter); 1244 struct mem_cgroup_reclaim_iter *uninitialized_var(iter);
1245 int uninitialized_var(seq);
1194 1246
1195 if (reclaim) { 1247 if (reclaim) {
1196 int nid = zone_to_nid(reclaim->zone); 1248 int nid = zone_to_nid(reclaim->zone);
@@ -1204,37 +1256,13 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
1204 goto out_unlock; 1256 goto out_unlock;
1205 } 1257 }
1206 1258
1207 /* 1259 last_visited = mem_cgroup_iter_load(iter, root, &seq);
1208 * If the dead_count mismatches, a destruction
1209 * has happened or is happening concurrently.
1210 * If the dead_count matches, a destruction
1211 * might still happen concurrently, but since
1212 * we checked under RCU, that destruction
1213 * won't free the object until we release the
1214 * RCU reader lock. Thus, the dead_count
1215 * check verifies the pointer is still valid,
1216 * css_tryget() verifies the cgroup pointed to
1217 * is alive.
1218 */
1219 dead_count = atomic_read(&root->dead_count);
1220 if (dead_count == iter->last_dead_count) {
1221 smp_rmb();
1222 last_visited = iter->last_visited;
1223 if (last_visited &&
1224 !css_tryget(&last_visited->css))
1225 last_visited = NULL;
1226 }
1227 } 1260 }
1228 1261
1229 memcg = __mem_cgroup_iter_next(root, last_visited); 1262 memcg = __mem_cgroup_iter_next(root, last_visited);
1230 1263
1231 if (reclaim) { 1264 if (reclaim) {
1232 if (last_visited) 1265 mem_cgroup_iter_update(iter, last_visited, memcg, seq);
1233 css_put(&last_visited->css);
1234
1235 iter->last_visited = memcg;
1236 smp_wmb();
1237 iter->last_dead_count = dead_count;
1238 1266
1239 if (!memcg) 1267 if (!memcg)
1240 iter->generation++; 1268 iter->generation++;
@@ -6318,14 +6346,14 @@ static void mem_cgroup_invalidate_reclaim_iterators(struct mem_cgroup *memcg)
6318 struct mem_cgroup *parent = memcg; 6346 struct mem_cgroup *parent = memcg;
6319 6347
6320 while ((parent = parent_mem_cgroup(parent))) 6348 while ((parent = parent_mem_cgroup(parent)))
6321 atomic_inc(&parent->dead_count); 6349 mem_cgroup_iter_invalidate(parent);
6322 6350
6323 /* 6351 /*
6324 * if the root memcg is not hierarchical we have to check it 6352 * if the root memcg is not hierarchical we have to check it
6325 * explicitely. 6353 * explicitely.
6326 */ 6354 */
6327 if (!root_mem_cgroup->use_hierarchy) 6355 if (!root_mem_cgroup->use_hierarchy)
6328 atomic_inc(&root_mem_cgroup->dead_count); 6356 mem_cgroup_iter_invalidate(root_mem_cgroup);
6329} 6357}
6330 6358
6331static void mem_cgroup_css_offline(struct cgroup *cont) 6359static void mem_cgroup_css_offline(struct cgroup *cont)