diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/memcontrol.c | 86 |
1 files changed, 68 insertions, 18 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 661a2c679f64..26a38b7c7739 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -152,10 +152,12 @@ struct mem_cgroup_stat_cpu { | |||
152 | }; | 152 | }; |
153 | 153 | ||
154 | struct mem_cgroup_reclaim_iter { | 154 | struct mem_cgroup_reclaim_iter { |
155 | /* css_id of the last scanned hierarchy member */ | 155 | /* last scanned hierarchy member with elevated css ref count */ |
156 | int position; | 156 | struct mem_cgroup *last_visited; |
157 | /* scan generation, increased every round-trip */ | 157 | /* scan generation, increased every round-trip */ |
158 | unsigned int generation; | 158 | unsigned int generation; |
159 | /* lock to protect the position and generation */ | ||
160 | spinlock_t iter_lock; | ||
159 | }; | 161 | }; |
160 | 162 | ||
161 | /* | 163 | /* |
@@ -1089,7 +1091,7 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, | |||
1089 | struct mem_cgroup_reclaim_cookie *reclaim) | 1091 | struct mem_cgroup_reclaim_cookie *reclaim) |
1090 | { | 1092 | { |
1091 | struct mem_cgroup *memcg = NULL; | 1093 | struct mem_cgroup *memcg = NULL; |
1092 | int id = 0; | 1094 | struct mem_cgroup *last_visited = NULL; |
1093 | 1095 | ||
1094 | if (mem_cgroup_disabled()) | 1096 | if (mem_cgroup_disabled()) |
1095 | return NULL; | 1097 | return NULL; |
@@ -1098,7 +1100,7 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, | |||
1098 | root = root_mem_cgroup; | 1100 | root = root_mem_cgroup; |
1099 | 1101 | ||
1100 | if (prev && !reclaim) | 1102 | if (prev && !reclaim) |
1101 | id = css_id(&prev->css); | 1103 | last_visited = prev; |
1102 | 1104 | ||
1103 | if (!root->use_hierarchy && root != root_mem_cgroup) { | 1105 | if (!root->use_hierarchy && root != root_mem_cgroup) { |
1104 | if (prev) | 1106 | if (prev) |
@@ -1106,9 +1108,10 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, | |||
1106 | return root; | 1108 | return root; |
1107 | } | 1109 | } |
1108 | 1110 | ||
1111 | rcu_read_lock(); | ||
1109 | while (!memcg) { | 1112 | while (!memcg) { |
1110 | struct mem_cgroup_reclaim_iter *uninitialized_var(iter); | 1113 | struct mem_cgroup_reclaim_iter *uninitialized_var(iter); |
1111 | struct cgroup_subsys_state *css; | 1114 | struct cgroup_subsys_state *css = NULL; |
1112 | 1115 | ||
1113 | if (reclaim) { | 1116 | if (reclaim) { |
1114 | int nid = zone_to_nid(reclaim->zone); | 1117 | int nid = zone_to_nid(reclaim->zone); |
@@ -1117,31 +1120,74 @@ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, | |||
1117 | 1120 | ||
1118 | mz = mem_cgroup_zoneinfo(root, nid, zid); | 1121 | mz = mem_cgroup_zoneinfo(root, nid, zid); |
1119 | iter = &mz->reclaim_iter[reclaim->priority]; | 1122 | iter = &mz->reclaim_iter[reclaim->priority]; |
1120 | if (prev && reclaim->generation != iter->generation) | 1123 | spin_lock(&iter->iter_lock); |
1121 | goto out_css_put; | 1124 | last_visited = iter->last_visited; |
1122 | id = iter->position; | 1125 | if (prev && reclaim->generation != iter->generation) { |
1126 | if (last_visited) { | ||
1127 | css_put(&last_visited->css); | ||
1128 | iter->last_visited = NULL; | ||
1129 | } | ||
1130 | spin_unlock(&iter->iter_lock); | ||
1131 | goto out_unlock; | ||
1132 | } | ||
1123 | } | 1133 | } |
1124 | 1134 | ||
1125 | rcu_read_lock(); | 1135 | /* |
1126 | css = css_get_next(&mem_cgroup_subsys, id + 1, &root->css, &id); | 1136 | * Root is not visited by cgroup iterators so it needs an |
1127 | if (css) { | 1137 | * explicit visit. |
1128 | if (css == &root->css || css_tryget(css)) | 1138 | */ |
1129 | memcg = mem_cgroup_from_css(css); | 1139 | if (!last_visited) { |
1130 | } else | 1140 | css = &root->css; |
1131 | id = 0; | 1141 | } else { |
1132 | rcu_read_unlock(); | 1142 | struct cgroup *prev_cgroup, *next_cgroup; |
1143 | |||
1144 | prev_cgroup = (last_visited == root) ? NULL | ||
1145 | : last_visited->css.cgroup; | ||
1146 | next_cgroup = cgroup_next_descendant_pre(prev_cgroup, | ||
1147 | root->css.cgroup); | ||
1148 | if (next_cgroup) | ||
1149 | css = cgroup_subsys_state(next_cgroup, | ||
1150 | mem_cgroup_subsys_id); | ||
1151 | } | ||
1152 | |||
1153 | /* | ||
1154 | * Even if we found a group we have to make sure it is alive. | ||
1155 | * css && !memcg means that the groups should be skipped and | ||
1156 | * we should continue the tree walk. | ||
1157 | * last_visited css is safe to use because it is protected by | ||
1158 | * css_get and the tree walk is rcu safe. | ||
1159 | */ | ||
1160 | if (css == &root->css || (css && css_tryget(css))) | ||
1161 | memcg = mem_cgroup_from_css(css); | ||
1133 | 1162 | ||
1134 | if (reclaim) { | 1163 | if (reclaim) { |
1135 | iter->position = id; | 1164 | struct mem_cgroup *curr = memcg; |
1165 | |||
1166 | if (last_visited) | ||
1167 | css_put(&last_visited->css); | ||
1168 | |||
1169 | if (css && !memcg) | ||
1170 | curr = mem_cgroup_from_css(css); | ||
1171 | |||
1172 | /* make sure that the cached memcg is not removed */ | ||
1173 | if (curr) | ||
1174 | css_get(&curr->css); | ||
1175 | iter->last_visited = curr; | ||
1176 | |||
1136 | if (!css) | 1177 | if (!css) |
1137 | iter->generation++; | 1178 | iter->generation++; |
1138 | else if (!prev && memcg) | 1179 | else if (!prev && memcg) |
1139 | reclaim->generation = iter->generation; | 1180 | reclaim->generation = iter->generation; |
1181 | spin_unlock(&iter->iter_lock); | ||
1182 | } else if (css && !memcg) { | ||
1183 | last_visited = mem_cgroup_from_css(css); | ||
1140 | } | 1184 | } |
1141 | 1185 | ||
1142 | if (prev && !css) | 1186 | if (prev && !css) |
1143 | goto out_css_put; | 1187 | goto out_unlock; |
1144 | } | 1188 | } |
1189 | out_unlock: | ||
1190 | rcu_read_unlock(); | ||
1145 | out_css_put: | 1191 | out_css_put: |
1146 | if (prev && prev != root) | 1192 | if (prev && prev != root) |
1147 | css_put(&prev->css); | 1193 | css_put(&prev->css); |
@@ -5929,8 +5975,12 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node) | |||
5929 | return 1; | 5975 | return 1; |
5930 | 5976 | ||
5931 | for (zone = 0; zone < MAX_NR_ZONES; zone++) { | 5977 | for (zone = 0; zone < MAX_NR_ZONES; zone++) { |
5978 | int prio; | ||
5979 | |||
5932 | mz = &pn->zoneinfo[zone]; | 5980 | mz = &pn->zoneinfo[zone]; |
5933 | lruvec_init(&mz->lruvec); | 5981 | lruvec_init(&mz->lruvec); |
5982 | for (prio = 0; prio < DEF_PRIORITY + 1; prio++) | ||
5983 | spin_lock_init(&mz->reclaim_iter[prio].iter_lock); | ||
5934 | mz->usage_in_excess = 0; | 5984 | mz->usage_in_excess = 0; |
5935 | mz->on_tree = false; | 5985 | mz->on_tree = false; |
5936 | mz->memcg = memcg; | 5986 | mz->memcg = memcg; |