diff options
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 69 |
1 files changed, 52 insertions, 17 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 26a38b7c7739..408a5c75d77d 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -152,12 +152,15 @@ struct mem_cgroup_stat_cpu { | |||
152 | }; | 152 | }; |
153 | 153 | ||
154 | struct mem_cgroup_reclaim_iter { | 154 | struct mem_cgroup_reclaim_iter { |
155 | /* last scanned hierarchy member with elevated css ref count */ | 155 | /* |
156 | * last scanned hierarchy member. Valid only if last_dead_count | ||
157 | * matches memcg->dead_count of the hierarchy root group. | ||
158 | */ | ||
156 | struct mem_cgroup *last_visited; | 159 | struct mem_cgroup *last_visited; |
160 | unsigned long last_dead_count; | ||
161 | |||
157 | /* scan generation, increased every round-trip */ | 162 | /* scan generation, increased every round-trip */ |
158 | unsigned int generation; | 163 | unsigned int generation; |
159 | /* lock to protect the position and generation */ | ||
160 | spinlock_t iter_lock; | ||
161 | }; | 164 | }; |
162 | 165 | ||
163 | /* | 166 | /* |
@@ -337,6 +340,7 @@ struct mem_cgroup { | |||
337 | struct mem_cgroup_stat_cpu nocpu_base; | 340 | struct mem_cgroup_stat_cpu nocpu_base; |
338 | spinlock_t pcp_counter_lock; | 341 | spinlock_t pcp_counter_lock; |
339 | 342 | ||
343 | atomic_t dead_count; | ||
340 | #if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET) | 344 | #if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET) |
341 | struct tcp_memcontrol tcp_mem; | 345 | struct tcp_memcontrol tcp_mem; |
342 | #endif | 346 | #endif |
@@ -1092,6 +1096,7 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, | |||
1092 | { | 1096 | { |
1093 | struct mem_cgroup *memcg = NULL; | 1097 | struct mem_cgroup *memcg = NULL; |
1094 | struct mem_cgroup *last_visited = NULL; | 1098 | struct mem_cgroup *last_visited = NULL; |
1099 | unsigned long uninitialized_var(dead_count); | ||
1095 | 1100 | ||
1096 | if (mem_cgroup_disabled()) | 1101 | if (mem_cgroup_disabled()) |
1097 | return NULL; | 1102 | return NULL; |
@@ -1120,16 +1125,33 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, | |||
1120 | 1125 | ||
1121 | mz = mem_cgroup_zoneinfo(root, nid, zid); | 1126 | mz = mem_cgroup_zoneinfo(root, nid, zid); |
1122 | iter = &mz->reclaim_iter[reclaim->priority]; | 1127 | iter = &mz->reclaim_iter[reclaim->priority]; |
1123 | spin_lock(&iter->iter_lock); | ||
1124 | last_visited = iter->last_visited; | 1128 | last_visited = iter->last_visited; |
1125 | if (prev && reclaim->generation != iter->generation) { | 1129 | if (prev && reclaim->generation != iter->generation) { |
1126 | if (last_visited) { | 1130 | iter->last_visited = NULL; |
1127 | css_put(&last_visited->css); | ||
1128 | iter->last_visited = NULL; | ||
1129 | } | ||
1130 | spin_unlock(&iter->iter_lock); | ||
1131 | goto out_unlock; | 1131 | goto out_unlock; |
1132 | } | 1132 | } |
1133 | |||
1134 | /* | ||
1135 | * If the dead_count mismatches, a destruction | ||
1136 | * has happened or is happening concurrently. | ||
1137 | * If the dead_count matches, a destruction | ||
1138 | * might still happen concurrently, but since | ||
1139 | * we checked under RCU, that destruction | ||
1140 | * won't free the object until we release the | ||
1141 | * RCU reader lock. Thus, the dead_count | ||
1142 | * check verifies the pointer is still valid, | ||
1143 | * css_tryget() verifies the cgroup pointed to | ||
1144 | * is alive. | ||
1145 | */ | ||
1146 | dead_count = atomic_read(&root->dead_count); | ||
1147 | smp_rmb(); | ||
1148 | last_visited = iter->last_visited; | ||
1149 | if (last_visited) { | ||
1150 | if ((dead_count != iter->last_dead_count) || | ||
1151 | !css_tryget(&last_visited->css)) { | ||
1152 | last_visited = NULL; | ||
1153 | } | ||
1154 | } | ||
1133 | } | 1155 | } |
1134 | 1156 | ||
1135 | /* | 1157 | /* |
@@ -1169,16 +1191,14 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, | |||
1169 | if (css && !memcg) | 1191 | if (css && !memcg) |
1170 | curr = mem_cgroup_from_css(css); | 1192 | curr = mem_cgroup_from_css(css); |
1171 | 1193 | ||
1172 | /* make sure that the cached memcg is not removed */ | ||
1173 | if (curr) | ||
1174 | css_get(&curr->css); | ||
1175 | iter->last_visited = curr; | 1194 | iter->last_visited = curr; |
1195 | smp_wmb(); | ||
1196 | iter->last_dead_count = dead_count; | ||
1176 | 1197 | ||
1177 | if (!css) | 1198 | if (!css) |
1178 | iter->generation++; | 1199 | iter->generation++; |
1179 | else if (!prev && memcg) | 1200 | else if (!prev && memcg) |
1180 | reclaim->generation = iter->generation; | 1201 | reclaim->generation = iter->generation; |
1181 | spin_unlock(&iter->iter_lock); | ||
1182 | } else if (css && !memcg) { | 1202 | } else if (css && !memcg) { |
1183 | last_visited = mem_cgroup_from_css(css); | 1203 | last_visited = mem_cgroup_from_css(css); |
1184 | } | 1204 | } |
@@ -5975,12 +5995,8 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node) | |||
5975 | return 1; | 5995 | return 1; |
5976 | 5996 | ||
5977 | for (zone = 0; zone < MAX_NR_ZONES; zone++) { | 5997 | for (zone = 0; zone < MAX_NR_ZONES; zone++) { |
5978 | int prio; | ||
5979 | |||
5980 | mz = &pn->zoneinfo[zone]; | 5998 | mz = &pn->zoneinfo[zone]; |
5981 | lruvec_init(&mz->lruvec); | 5999 | lruvec_init(&mz->lruvec); |
5982 | for (prio = 0; prio < DEF_PRIORITY + 1; prio++) | ||
5983 | spin_lock_init(&mz->reclaim_iter[prio].iter_lock); | ||
5984 | mz->usage_in_excess = 0; | 6000 | mz->usage_in_excess = 0; |
5985 | mz->on_tree = false; | 6001 | mz->on_tree = false; |
5986 | mz->memcg = memcg; | 6002 | mz->memcg = memcg; |
@@ -6235,10 +6251,29 @@ mem_cgroup_css_online(struct cgroup *cont) | |||
6235 | return error; | 6251 | return error; |
6236 | } | 6252 | } |
6237 | 6253 | ||
6254 | /* | ||
6255 | * Announce all parents that a group from their hierarchy is gone. | ||
6256 | */ | ||
6257 | static void mem_cgroup_invalidate_reclaim_iterators(struct mem_cgroup *memcg) | ||
6258 | { | ||
6259 | struct mem_cgroup *parent = memcg; | ||
6260 | |||
6261 | while ((parent = parent_mem_cgroup(parent))) | ||
6262 | atomic_inc(&parent->dead_count); | ||
6263 | |||
6264 | /* | ||
6265 | * if the root memcg is not hierarchical we have to check it | ||
6266 | * explicitely. | ||
6267 | */ | ||
6268 | if (!root_mem_cgroup->use_hierarchy) | ||
6269 | atomic_inc(&root_mem_cgroup->dead_count); | ||
6270 | } | ||
6271 | |||
6238 | static void mem_cgroup_css_offline(struct cgroup *cont) | 6272 | static void mem_cgroup_css_offline(struct cgroup *cont) |
6239 | { | 6273 | { |
6240 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); | 6274 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); |
6241 | 6275 | ||
6276 | mem_cgroup_invalidate_reclaim_iterators(memcg); | ||
6242 | mem_cgroup_reparent_charges(memcg); | 6277 | mem_cgroup_reparent_charges(memcg); |
6243 | mem_cgroup_destroy_all_caches(memcg); | 6278 | mem_cgroup_destroy_all_caches(memcg); |
6244 | } | 6279 | } |