aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c146
1 files changed, 131 insertions, 15 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index cdbb7a84cb6e..9ec5e12486a7 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -752,15 +752,13 @@ void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
752 /* Update memcg */ 752 /* Update memcg */
753 __mod_memcg_state(memcg, idx, val); 753 __mod_memcg_state(memcg, idx, val);
754 754
755 /* Update lruvec */
756 __this_cpu_add(pn->lruvec_stat_local->count[idx], val);
757
755 x = val + __this_cpu_read(pn->lruvec_stat_cpu->count[idx]); 758 x = val + __this_cpu_read(pn->lruvec_stat_cpu->count[idx]);
756 if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) { 759 if (unlikely(abs(x) > MEMCG_CHARGE_BATCH)) {
757 struct mem_cgroup_per_node *pi; 760 struct mem_cgroup_per_node *pi;
758 761
759 /*
760 * Batch local counters to keep them in sync with
761 * the hierarchical ones.
762 */
763 __this_cpu_add(pn->lruvec_stat_local->count[idx], x);
764 for (pi = pn; pi; pi = parent_nodeinfo(pi, pgdat->node_id)) 762 for (pi = pn; pi; pi = parent_nodeinfo(pi, pgdat->node_id))
765 atomic_long_add(x, &pi->lruvec_stat[idx]); 763 atomic_long_add(x, &pi->lruvec_stat[idx]);
766 x = 0; 764 x = 0;
@@ -768,6 +766,26 @@ void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
768 __this_cpu_write(pn->lruvec_stat_cpu->count[idx], x); 766 __this_cpu_write(pn->lruvec_stat_cpu->count[idx], x);
769} 767}
770 768
769void __mod_lruvec_slab_state(void *p, enum node_stat_item idx, int val)
770{
771 struct page *page = virt_to_head_page(p);
772 pg_data_t *pgdat = page_pgdat(page);
773 struct mem_cgroup *memcg;
774 struct lruvec *lruvec;
775
776 rcu_read_lock();
777 memcg = memcg_from_slab_page(page);
778
779 /* Untracked pages have no memcg, no lruvec. Update only the node */
780 if (!memcg || memcg == root_mem_cgroup) {
781 __mod_node_page_state(pgdat, idx, val);
782 } else {
783 lruvec = mem_cgroup_lruvec(pgdat, memcg);
784 __mod_lruvec_state(lruvec, idx, val);
785 }
786 rcu_read_unlock();
787}
788
771/** 789/**
772 * __count_memcg_events - account VM events in a cgroup 790 * __count_memcg_events - account VM events in a cgroup
773 * @memcg: the memory cgroup 791 * @memcg: the memory cgroup
@@ -1130,26 +1148,45 @@ void mem_cgroup_iter_break(struct mem_cgroup *root,
1130 css_put(&prev->css); 1148 css_put(&prev->css);
1131} 1149}
1132 1150
1133static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg) 1151static void __invalidate_reclaim_iterators(struct mem_cgroup *from,
1152 struct mem_cgroup *dead_memcg)
1134{ 1153{
1135 struct mem_cgroup *memcg = dead_memcg;
1136 struct mem_cgroup_reclaim_iter *iter; 1154 struct mem_cgroup_reclaim_iter *iter;
1137 struct mem_cgroup_per_node *mz; 1155 struct mem_cgroup_per_node *mz;
1138 int nid; 1156 int nid;
1139 int i; 1157 int i;
1140 1158
1141 for (; memcg; memcg = parent_mem_cgroup(memcg)) { 1159 for_each_node(nid) {
1142 for_each_node(nid) { 1160 mz = mem_cgroup_nodeinfo(from, nid);
1143 mz = mem_cgroup_nodeinfo(memcg, nid); 1161 for (i = 0; i <= DEF_PRIORITY; i++) {
1144 for (i = 0; i <= DEF_PRIORITY; i++) { 1162 iter = &mz->iter[i];
1145 iter = &mz->iter[i]; 1163 cmpxchg(&iter->position,
1146 cmpxchg(&iter->position, 1164 dead_memcg, NULL);
1147 dead_memcg, NULL);
1148 }
1149 } 1165 }
1150 } 1166 }
1151} 1167}
1152 1168
1169static void invalidate_reclaim_iterators(struct mem_cgroup *dead_memcg)
1170{
1171 struct mem_cgroup *memcg = dead_memcg;
1172 struct mem_cgroup *last;
1173
1174 do {
1175 __invalidate_reclaim_iterators(memcg, dead_memcg);
1176 last = memcg;
1177 } while ((memcg = parent_mem_cgroup(memcg)));
1178
1179 /*
1180 * When cgruop1 non-hierarchy mode is used,
1181 * parent_mem_cgroup() does not walk all the way up to the
1182 * cgroup root (root_mem_cgroup). So we have to handle
1183 * dead_memcg from cgroup root separately.
1184 */
1185 if (last != root_mem_cgroup)
1186 __invalidate_reclaim_iterators(root_mem_cgroup,
1187 dead_memcg);
1188}
1189
1153/** 1190/**
1154 * mem_cgroup_scan_tasks - iterate over tasks of a memory cgroup hierarchy 1191 * mem_cgroup_scan_tasks - iterate over tasks of a memory cgroup hierarchy
1155 * @memcg: hierarchy root 1192 * @memcg: hierarchy root
@@ -3221,6 +3258,72 @@ static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
3221 } 3258 }
3222} 3259}
3223 3260
3261static void memcg_flush_percpu_vmstats(struct mem_cgroup *memcg, bool slab_only)
3262{
3263 unsigned long stat[MEMCG_NR_STAT];
3264 struct mem_cgroup *mi;
3265 int node, cpu, i;
3266 int min_idx, max_idx;
3267
3268 if (slab_only) {
3269 min_idx = NR_SLAB_RECLAIMABLE;
3270 max_idx = NR_SLAB_UNRECLAIMABLE;
3271 } else {
3272 min_idx = 0;
3273 max_idx = MEMCG_NR_STAT;
3274 }
3275
3276 for (i = min_idx; i < max_idx; i++)
3277 stat[i] = 0;
3278
3279 for_each_online_cpu(cpu)
3280 for (i = min_idx; i < max_idx; i++)
3281 stat[i] += per_cpu(memcg->vmstats_percpu->stat[i], cpu);
3282
3283 for (mi = memcg; mi; mi = parent_mem_cgroup(mi))
3284 for (i = min_idx; i < max_idx; i++)
3285 atomic_long_add(stat[i], &mi->vmstats[i]);
3286
3287 if (!slab_only)
3288 max_idx = NR_VM_NODE_STAT_ITEMS;
3289
3290 for_each_node(node) {
3291 struct mem_cgroup_per_node *pn = memcg->nodeinfo[node];
3292 struct mem_cgroup_per_node *pi;
3293
3294 for (i = min_idx; i < max_idx; i++)
3295 stat[i] = 0;
3296
3297 for_each_online_cpu(cpu)
3298 for (i = min_idx; i < max_idx; i++)
3299 stat[i] += per_cpu(
3300 pn->lruvec_stat_cpu->count[i], cpu);
3301
3302 for (pi = pn; pi; pi = parent_nodeinfo(pi, node))
3303 for (i = min_idx; i < max_idx; i++)
3304 atomic_long_add(stat[i], &pi->lruvec_stat[i]);
3305 }
3306}
3307
3308static void memcg_flush_percpu_vmevents(struct mem_cgroup *memcg)
3309{
3310 unsigned long events[NR_VM_EVENT_ITEMS];
3311 struct mem_cgroup *mi;
3312 int cpu, i;
3313
3314 for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
3315 events[i] = 0;
3316
3317 for_each_online_cpu(cpu)
3318 for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
3319 events[i] += per_cpu(memcg->vmstats_percpu->events[i],
3320 cpu);
3321
3322 for (mi = memcg; mi; mi = parent_mem_cgroup(mi))
3323 for (i = 0; i < NR_VM_EVENT_ITEMS; i++)
3324 atomic_long_add(events[i], &mi->vmevents[i]);
3325}
3326
3224#ifdef CONFIG_MEMCG_KMEM 3327#ifdef CONFIG_MEMCG_KMEM
3225static int memcg_online_kmem(struct mem_cgroup *memcg) 3328static int memcg_online_kmem(struct mem_cgroup *memcg)
3226{ 3329{
@@ -3270,7 +3373,14 @@ static void memcg_offline_kmem(struct mem_cgroup *memcg)
3270 if (!parent) 3373 if (!parent)
3271 parent = root_mem_cgroup; 3374 parent = root_mem_cgroup;
3272 3375
3376 /*
3377 * Deactivate and reparent kmem_caches. Then flush percpu
3378 * slab statistics to have precise values at the parent and
3379 * all ancestor levels. It's required to keep slab stats
3380 * accurate after the reparenting of kmem_caches.
3381 */
3273 memcg_deactivate_kmem_caches(memcg, parent); 3382 memcg_deactivate_kmem_caches(memcg, parent);
3383 memcg_flush_percpu_vmstats(memcg, true);
3274 3384
3275 kmemcg_id = memcg->kmemcg_id; 3385 kmemcg_id = memcg->kmemcg_id;
3276 BUG_ON(kmemcg_id < 0); 3386 BUG_ON(kmemcg_id < 0);
@@ -4643,6 +4753,12 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
4643{ 4753{
4644 int node; 4754 int node;
4645 4755
4756 /*
4757 * Flush percpu vmstats and vmevents to guarantee the value correctness
4758 * on parent's and all ancestor levels.
4759 */
4760 memcg_flush_percpu_vmstats(memcg, false);
4761 memcg_flush_percpu_vmevents(memcg);
4646 for_each_node(node) 4762 for_each_node(node)
4647 free_mem_cgroup_per_node_info(memcg, node); 4763 free_mem_cgroup_per_node_info(memcg, node);
4648 free_percpu(memcg->vmstats_percpu); 4764 free_percpu(memcg->vmstats_percpu);