diff options
-rw-r--r-- | mm/memcontrol.c | 86 |
1 files changed, 57 insertions, 29 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 4748966b1511..2e851f453814 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -1148,6 +1148,58 @@ skip_node: | |||
1148 | return NULL; | 1148 | return NULL; |
1149 | } | 1149 | } |
1150 | 1150 | ||
1151 | static void mem_cgroup_iter_invalidate(struct mem_cgroup *root) | ||
1152 | { | ||
1153 | /* | ||
1154 | * When a group in the hierarchy below root is destroyed, the | ||
1155 | * hierarchy iterator can no longer be trusted since it might | ||
1156 | * have pointed to the destroyed group. Invalidate it. | ||
1157 | */ | ||
1158 | atomic_inc(&root->dead_count); | ||
1159 | } | ||
1160 | |||
1161 | static struct mem_cgroup * | ||
1162 | mem_cgroup_iter_load(struct mem_cgroup_reclaim_iter *iter, | ||
1163 | struct mem_cgroup *root, | ||
1164 | int *sequence) | ||
1165 | { | ||
1166 | struct mem_cgroup *position = NULL; | ||
1167 | /* | ||
1168 | * A cgroup destruction happens in two stages: offlining and | ||
1169 | * release. They are separated by a RCU grace period. | ||
1170 | * | ||
1171 | * If the iterator is valid, we may still race with an | ||
1172 | * offlining. The RCU lock ensures the object won't be | ||
1173 | * released, tryget will fail if we lost the race. | ||
1174 | */ | ||
1175 | *sequence = atomic_read(&root->dead_count); | ||
1176 | if (iter->last_dead_count == *sequence) { | ||
1177 | smp_rmb(); | ||
1178 | position = iter->last_visited; | ||
1179 | if (position && !css_tryget(&position->css)) | ||
1180 | position = NULL; | ||
1181 | } | ||
1182 | return position; | ||
1183 | } | ||
1184 | |||
1185 | static void mem_cgroup_iter_update(struct mem_cgroup_reclaim_iter *iter, | ||
1186 | struct mem_cgroup *last_visited, | ||
1187 | struct mem_cgroup *new_position, | ||
1188 | int sequence) | ||
1189 | { | ||
1190 | if (last_visited) | ||
1191 | css_put(&last_visited->css); | ||
1192 | /* | ||
1193 | * We store the sequence count from the time @last_visited was | ||
1194 | * loaded successfully instead of rereading it here so that we | ||
1195 | * don't lose destruction events in between. We could have | ||
1196 | * raced with the destruction of @new_position after all. | ||
1197 | */ | ||
1198 | iter->last_visited = new_position; | ||
1199 | smp_wmb(); | ||
1200 | iter->last_dead_count = sequence; | ||
1201 | } | ||
1202 | |||
1151 | /** | 1203 | /** |
1152 | * mem_cgroup_iter - iterate over memory cgroup hierarchy | 1204 | * mem_cgroup_iter - iterate over memory cgroup hierarchy |
1153 | * @root: hierarchy root | 1205 | * @root: hierarchy root |
@@ -1171,7 +1223,6 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, | |||
1171 | { | 1223 | { |
1172 | struct mem_cgroup *memcg = NULL; | 1224 | struct mem_cgroup *memcg = NULL; |
1173 | struct mem_cgroup *last_visited = NULL; | 1225 | struct mem_cgroup *last_visited = NULL; |
1174 | unsigned long uninitialized_var(dead_count); | ||
1175 | 1226 | ||
1176 | if (mem_cgroup_disabled()) | 1227 | if (mem_cgroup_disabled()) |
1177 | return NULL; | 1228 | return NULL; |
@@ -1191,6 +1242,7 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, | |||
1191 | rcu_read_lock(); | 1242 | rcu_read_lock(); |
1192 | while (!memcg) { | 1243 | while (!memcg) { |
1193 | struct mem_cgroup_reclaim_iter *uninitialized_var(iter); | 1244 | struct mem_cgroup_reclaim_iter *uninitialized_var(iter); |
1245 | int uninitialized_var(seq); | ||
1194 | 1246 | ||
1195 | if (reclaim) { | 1247 | if (reclaim) { |
1196 | int nid = zone_to_nid(reclaim->zone); | 1248 | int nid = zone_to_nid(reclaim->zone); |
@@ -1204,37 +1256,13 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, | |||
1204 | goto out_unlock; | 1256 | goto out_unlock; |
1205 | } | 1257 | } |
1206 | 1258 | ||
1207 | /* | 1259 | last_visited = mem_cgroup_iter_load(iter, root, &seq); |
1208 | * If the dead_count mismatches, a destruction | ||
1209 | * has happened or is happening concurrently. | ||
1210 | * If the dead_count matches, a destruction | ||
1211 | * might still happen concurrently, but since | ||
1212 | * we checked under RCU, that destruction | ||
1213 | * won't free the object until we release the | ||
1214 | * RCU reader lock. Thus, the dead_count | ||
1215 | * check verifies the pointer is still valid, | ||
1216 | * css_tryget() verifies the cgroup pointed to | ||
1217 | * is alive. | ||
1218 | */ | ||
1219 | dead_count = atomic_read(&root->dead_count); | ||
1220 | if (dead_count == iter->last_dead_count) { | ||
1221 | smp_rmb(); | ||
1222 | last_visited = iter->last_visited; | ||
1223 | if (last_visited && | ||
1224 | !css_tryget(&last_visited->css)) | ||
1225 | last_visited = NULL; | ||
1226 | } | ||
1227 | } | 1260 | } |
1228 | 1261 | ||
1229 | memcg = __mem_cgroup_iter_next(root, last_visited); | 1262 | memcg = __mem_cgroup_iter_next(root, last_visited); |
1230 | 1263 | ||
1231 | if (reclaim) { | 1264 | if (reclaim) { |
1232 | if (last_visited) | 1265 | mem_cgroup_iter_update(iter, last_visited, memcg, seq); |
1233 | css_put(&last_visited->css); | ||
1234 | |||
1235 | iter->last_visited = memcg; | ||
1236 | smp_wmb(); | ||
1237 | iter->last_dead_count = dead_count; | ||
1238 | 1266 | ||
1239 | if (!memcg) | 1267 | if (!memcg) |
1240 | iter->generation++; | 1268 | iter->generation++; |
@@ -6318,14 +6346,14 @@ static void mem_cgroup_invalidate_reclaim_iterators(struct mem_cgroup *memcg) | |||
6318 | struct mem_cgroup *parent = memcg; | 6346 | struct mem_cgroup *parent = memcg; |
6319 | 6347 | ||
6320 | while ((parent = parent_mem_cgroup(parent))) | 6348 | while ((parent = parent_mem_cgroup(parent))) |
6321 | atomic_inc(&parent->dead_count); | 6349 | mem_cgroup_iter_invalidate(parent); |
6322 | 6350 | ||
6323 | /* | 6351 | /* |
6324 | * if the root memcg is not hierarchical we have to check it | 6352 | * if the root memcg is not hierarchical we have to check it |
6325 | * explicitely. | 6353 | * explicitely. |
6326 | */ | 6354 | */ |
6327 | if (!root_mem_cgroup->use_hierarchy) | 6355 | if (!root_mem_cgroup->use_hierarchy) |
6328 | atomic_inc(&root_mem_cgroup->dead_count); | 6356 | mem_cgroup_iter_invalidate(root_mem_cgroup); |
6329 | } | 6357 | } |
6330 | 6358 | ||
6331 | static void mem_cgroup_css_offline(struct cgroup *cont) | 6359 | static void mem_cgroup_css_offline(struct cgroup *cont) |