diff options
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 146 |
1 files changed, 131 insertions, 15 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index cdbb7a84cb6e..9ec5e12486a7 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -752,15 +752,13 @@ void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, | |||
752 | /* Update memcg */ | 752 | /* Update memcg */ |
753 | __mod_memcg_state(memcg, idx, val); | 753 | __mod_memcg_state(memcg, idx, val); |
754 | 754 | ||
755 | /* Update lruvec */ | ||
756 | __this_cpu_add(pn->lruvec_stat_local->count[idx], val); | ||
757 | |||
755 | x = val + __this_cpu_read(pn->lruvec_stat_cpu->count[idx]); | 758 | x = val + __this_cpu_read(pn->lruvec_stat_cpu->count[idx]); |
756 | if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) { | 759 | if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) { |
757 | struct mem_cgroup_per_node *pi; | 760 | struct mem_cgroup_per_node *pi; |
758 | 761 | ||
759 | /* | ||
760 | * Batch local counters to keep them in sync with | ||
761 | * the hierarchical ones. | ||
762 | */ | ||
763 | __this_cpu_add(pn->lruvec_stat_local->count[idx], x); | ||
764 | for (pi = pn; pi; pi = parent_nodeinfo(pi, pgdat->node_id)) | 762 | for (pi = pn; pi; pi = parent_nodeinfo(pi, pgdat->node_id)) |
765 | atomic_long_add(x, &pi->lruvec_stat[idx]); | 763 | atomic_long_add(x, &pi->lruvec_stat[idx]); |
766 | x = 0; | 764 | x = 0; |
@@ -768,6 +766,26 @@ void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, | |||
768 | __this_cpu_write(pn->lruvec_stat_cpu->count[idx], x); | 766 | __this_cpu_write(pn->lruvec_stat_cpu->count[idx], x); |
769 | } | 767 | } |
770 | 768 | ||
769 | void __mod_lruvec_slab_state(void *p, enum node_stat_item idx, int val) | ||
770 | { | ||
771 | struct page *page = virt_to_head_page(p); | ||
772 | pg_data_t *pgdat = page_pgdat(page); | ||
773 | struct mem_cgroup *memcg; | ||
774 | struct lruvec *lruvec; | ||
775 | |||
776 | rcu_read_lock(); | ||
777 | memcg = memcg_from_slab_page(page); | ||
778 | |||
779 | /* Untracked pages have no memcg, no lruvec. Update only the node */ | ||
780 | if (!memcg || memcg == root_mem_cgroup) { | ||
781 | __mod_node_page_state(pgdat, idx, val); | ||
782 | } else { | ||
783 | lruvec = mem_cgroup_lruvec(pgdat, memcg); | ||
784 | __mod_lruvec_state(lruvec, idx, val); | ||
785 | } | ||
786 | rcu_read_unlock(); | ||
787 | } | ||
788 | |||
771 | /** | 789 | /** |
772 | * __count_memcg_events - account VM events in a cgroup | 790 | * __count_memcg_events - account VM events in a cgroup |
773 | * @memcg: the memory cgroup | 791 | * @memcg: the memory cgroup |
@@ -1130,26 +1148,45 @@ void mem_cgroup_iter_break(struct mem_cgroup *root, | |||
1130 | css_put(&prev->css); | 1148 | css_put(&prev->css); |
1131 | } | 1149 | } |
1132 | 1150 | ||
1133 | static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg) | 1151 | static void __invalidate_reclaim_iterators(struct mem_cgroup *from, |
1152 | struct mem_cgroup *dead_memcg) | ||
1134 | { | 1153 | { |
1135 | struct mem_cgroup *memcg = dead_memcg; | ||
1136 | struct mem_cgroup_reclaim_iter *iter; | 1154 | struct mem_cgroup_reclaim_iter *iter; |
1137 | struct mem_cgroup_per_node *mz; | 1155 | struct mem_cgroup_per_node *mz; |
1138 | int nid; | 1156 | int nid; |
1139 | int i; | 1157 | int i; |
1140 | 1158 | ||
1141 | for (; memcg; memcg = parent_mem_cgroup(memcg)) { | 1159 | for_each_node(nid) { |
1142 | for_each_node(nid) { | 1160 | mz = mem_cgroup_nodeinfo(from, nid); |
1143 | mz = mem_cgroup_nodeinfo(memcg, nid); | 1161 | for (i = 0; i <= DEF_PRIORITY; i++) { |
1144 | for (i = 0; i <= DEF_PRIORITY; i++) { | 1162 | iter = &mz->iter[i]; |
1145 | iter = &mz->iter[i]; | 1163 | cmpxchg(&iter->position, |
1146 | cmpxchg(&iter->position, | 1164 | dead_memcg, NULL); |
1147 | dead_memcg, NULL); | ||
1148 | } | ||
1149 | } | 1165 | } |
1150 | } | 1166 | } |
1151 | } | 1167 | } |
1152 | 1168 | ||
1169 | static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg) | ||
1170 | { | ||
1171 | struct mem_cgroup *memcg = dead_memcg; | ||
1172 | struct mem_cgroup *last; | ||
1173 | |||
1174 | do { | ||
1175 | __invalidate_reclaim_iterators(memcg, dead_memcg); | ||
1176 | last = memcg; | ||
1177 | } while ((memcg = parent_mem_cgroup(memcg))); | ||
1178 | |||
1179 | /* | ||
1180 | * When cgruop1 non-hierarchy mode is used, | ||
1181 | * parent_mem_cgroup() does not walk all the way up to the | ||
1182 | * cgroup root (root_mem_cgroup). So we have to handle | ||
1183 | * dead_memcg from cgroup root separately. | ||
1184 | */ | ||
1185 | if (last != root_mem_cgroup) | ||
1186 | __invalidate_reclaim_iterators(root_mem_cgroup, | ||
1187 | dead_memcg); | ||
1188 | } | ||
1189 | |||
1153 | /** | 1190 | /** |
1154 | * mem_cgroup_scan_tasks - iterate over tasks of a memory cgroup hierarchy | 1191 | * mem_cgroup_scan_tasks - iterate over tasks of a memory cgroup hierarchy |
1155 | * @memcg: hierarchy root | 1192 | * @memcg: hierarchy root |
@@ -3221,6 +3258,72 @@ static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css, | |||
3221 | } | 3258 | } |
3222 | } | 3259 | } |
3223 | 3260 | ||
3261 | static void memcg_flush_percpu_vmstats(struct mem_cgroup *memcg, bool slab_only) | ||
3262 | { | ||
3263 | unsigned long stat[MEMCG_NR_STAT]; | ||
3264 | struct mem_cgroup *mi; | ||
3265 | int node, cpu, i; | ||
3266 | int min_idx, max_idx; | ||
3267 | |||
3268 | if (slab_only) { | ||
3269 | min_idx = NR_SLAB_RECLAIMABLE; | ||
3270 | max_idx = NR_SLAB_UNRECLAIMABLE; | ||
3271 | } else { | ||
3272 | min_idx = 0; | ||
3273 | max_idx = MEMCG_NR_STAT; | ||
3274 | } | ||
3275 | |||
3276 | for (i = min_idx; i < max_idx; i++) | ||
3277 | stat[i] = 0; | ||
3278 | |||
3279 | for_each_online_cpu(cpu) | ||
3280 | for (i = min_idx; i < max_idx; i++) | ||
3281 | stat[i] += per_cpu(memcg->vmstats_percpu->stat[i], cpu); | ||
3282 | |||
3283 | for (mi = memcg; mi; mi = parent_mem_cgroup(mi)) | ||
3284 | for (i = min_idx; i < max_idx; i++) | ||
3285 | atomic_long_add(stat[i], &mi->vmstats[i]); | ||
3286 | |||
3287 | if (!slab_only) | ||
3288 | max_idx = NR_VM_NODE_STAT_ITEMS; | ||
3289 | |||
3290 | for_each_node(node) { | ||
3291 | struct mem_cgroup_per_node *pn = memcg->nodeinfo[node]; | ||
3292 | struct mem_cgroup_per_node *pi; | ||
3293 | |||
3294 | for (i = min_idx; i < max_idx; i++) | ||
3295 | stat[i] = 0; | ||
3296 | |||
3297 | for_each_online_cpu(cpu) | ||
3298 | for (i = min_idx; i < max_idx; i++) | ||
3299 | stat[i] += per_cpu( | ||
3300 | pn->lruvec_stat_cpu->count[i], cpu); | ||
3301 | |||
3302 | for (pi = pn; pi; pi = parent_nodeinfo(pi, node)) | ||
3303 | for (i = min_idx; i < max_idx; i++) | ||
3304 | atomic_long_add(stat[i], &pi->lruvec_stat[i]); | ||
3305 | } | ||
3306 | } | ||
3307 | |||
3308 | static void memcg_flush_percpu_vmevents(struct mem_cgroup *memcg) | ||
3309 | { | ||
3310 | unsigned long events[NR_VM_EVENT_ITEMS]; | ||
3311 | struct mem_cgroup *mi; | ||
3312 | int cpu, i; | ||
3313 | |||
3314 | for (i = 0; i < NR_VM_EVENT_ITEMS; i++) | ||
3315 | events[i] = 0; | ||
3316 | |||
3317 | for_each_online_cpu(cpu) | ||
3318 | for (i = 0; i < NR_VM_EVENT_ITEMS; i++) | ||
3319 | events[i] += per_cpu(memcg->vmstats_percpu->events[i], | ||
3320 | cpu); | ||
3321 | |||
3322 | for (mi = memcg; mi; mi = parent_mem_cgroup(mi)) | ||
3323 | for (i = 0; i < NR_VM_EVENT_ITEMS; i++) | ||
3324 | atomic_long_add(events[i], &mi->vmevents[i]); | ||
3325 | } | ||
3326 | |||
3224 | #ifdef CONFIG_MEMCG_KMEM | 3327 | #ifdef CONFIG_MEMCG_KMEM |
3225 | static int memcg_online_kmem(struct mem_cgroup *memcg) | 3328 | static int memcg_online_kmem(struct mem_cgroup *memcg) |
3226 | { | 3329 | { |
@@ -3270,7 +3373,14 @@ static void memcg_offline_kmem(struct mem_cgroup *memcg) | |||
3270 | if (!parent) | 3373 | if (!parent) |
3271 | parent = root_mem_cgroup; | 3374 | parent = root_mem_cgroup; |
3272 | 3375 | ||
3376 | /* | ||
3377 | * Deactivate and reparent kmem_caches. Then flush percpu | ||
3378 | * slab statistics to have precise values at the parent and | ||
3379 | * all ancestor levels. It's required to keep slab stats | ||
3380 | * accurate after the reparenting of kmem_caches. | ||
3381 | */ | ||
3273 | memcg_deactivate_kmem_caches(memcg, parent); | 3382 | memcg_deactivate_kmem_caches(memcg, parent); |
3383 | memcg_flush_percpu_vmstats(memcg, true); | ||
3274 | 3384 | ||
3275 | kmemcg_id = memcg->kmemcg_id; | 3385 | kmemcg_id = memcg->kmemcg_id; |
3276 | BUG_ON(kmemcg_id < 0); | 3386 | BUG_ON(kmemcg_id < 0); |
@@ -4643,6 +4753,12 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg) | |||
4643 | { | 4753 | { |
4644 | int node; | 4754 | int node; |
4645 | 4755 | ||
4756 | /* | ||
4757 | * Flush percpu vmstats and vmevents to guarantee the value correctness | ||
4758 | * on parent's and all ancestor levels. | ||
4759 | */ | ||
4760 | memcg_flush_percpu_vmstats(memcg, false); | ||
4761 | memcg_flush_percpu_vmevents(memcg); | ||
4646 | for_each_node(node) | 4762 | for_each_node(node) |
4647 | free_mem_cgroup_per_node_info(memcg, node); | 4763 | free_mem_cgroup_per_node_info(memcg, node); |
4648 | free_percpu(memcg->vmstats_percpu); | 4764 | free_percpu(memcg->vmstats_percpu); |