aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorGreg Thelen <gthelen@google.com>2015-10-01 18:37:05 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2015-10-01 21:42:35 -0400
commit484ebb3b8c8b27dd2171696462a3116edb9ff801 (patch)
tree1056eab0960e2ec868c6a5cfd3076d05ee5887a3 /mm
parent0610c25daa3e76e38ad5a8fae683a89ff9f71798 (diff)
memcg: make mem_cgroup_read_stat() unsigned
mem_cgroup_read_stat() returns a page count by summing per cpu page counters. The summing is racy wrt. updates, so a transient negative sum is possible. Callers don't want negative values: - mem_cgroup_wb_stats() doesn't want negative nr_dirty or nr_writeback. This could confuse dirty throttling. - oom reports and memory.stat shouldn't show confusing negative usage. - tree_usage() already avoids negatives. Avoid returning negative page counts from mem_cgroup_read_stat() and convert it to unsigned. [akpm@linux-foundation.org: fix old typo while we're in there] Signed-off-by: Greg Thelen <gthelen@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Acked-by: Michal Hocko <mhocko@suse.com> Cc: <stable@vger.kernel.org> [4.2+] Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/memcontrol.c30
1 files changed, 18 insertions, 12 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 6ddaeba34e09..03cc0a742ff1 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -644,12 +644,14 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
644} 644}
645 645
646/* 646/*
647 * Return page count for single (non recursive) @memcg.
648 *
647 * Implementation Note: reading percpu statistics for memcg. 649 * Implementation Note: reading percpu statistics for memcg.
648 * 650 *
649 * Both of vmstat[] and percpu_counter has threshold and do periodic 651 * Both of vmstat[] and percpu_counter has threshold and do periodic
650 * synchronization to implement "quick" read. There are trade-off between 652 * synchronization to implement "quick" read. There are trade-off between
651 * reading cost and precision of value. Then, we may have a chance to implement 653 * reading cost and precision of value. Then, we may have a chance to implement
652 * a periodic synchronizion of counter in memcg's counter. 654 * a periodic synchronization of counter in memcg's counter.
653 * 655 *
654 * But this _read() function is used for user interface now. The user accounts 656 * But this _read() function is used for user interface now. The user accounts
655 * memory usage by memory cgroup and he _always_ requires exact value because 657 * memory usage by memory cgroup and he _always_ requires exact value because
@@ -659,17 +661,24 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
659 * 661 *
660 * If there are kernel internal actions which can make use of some not-exact 662 * If there are kernel internal actions which can make use of some not-exact
661 * value, and reading all cpu value can be performance bottleneck in some 663 * value, and reading all cpu value can be performance bottleneck in some
662 * common workload, threashold and synchonization as vmstat[] should be 664 * common workload, threshold and synchronization as vmstat[] should be
663 * implemented. 665 * implemented.
664 */ 666 */
665static long mem_cgroup_read_stat(struct mem_cgroup *memcg, 667static unsigned long
666 enum mem_cgroup_stat_index idx) 668mem_cgroup_read_stat(struct mem_cgroup *memcg, enum mem_cgroup_stat_index idx)
667{ 669{
668 long val = 0; 670 long val = 0;
669 int cpu; 671 int cpu;
670 672
673 /* Per-cpu values can be negative, use a signed accumulator */
671 for_each_possible_cpu(cpu) 674 for_each_possible_cpu(cpu)
672 val += per_cpu(memcg->stat->count[idx], cpu); 675 val += per_cpu(memcg->stat->count[idx], cpu);
676 /*
677 * Summing races with updates, so val may be negative. Avoid exposing
678 * transient negative values.
679 */
680 if (val < 0)
681 val = 0;
673 return val; 682 return val;
674} 683}
675 684
@@ -1254,7 +1263,7 @@ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
1254 for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) { 1263 for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {
1255 if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account) 1264 if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account)
1256 continue; 1265 continue;
1257 pr_cont(" %s:%ldKB", mem_cgroup_stat_names[i], 1266 pr_cont(" %s:%luKB", mem_cgroup_stat_names[i],
1258 K(mem_cgroup_read_stat(iter, i))); 1267 K(mem_cgroup_read_stat(iter, i)));
1259 } 1268 }
1260 1269
@@ -2819,14 +2828,11 @@ static unsigned long tree_stat(struct mem_cgroup *memcg,
2819 enum mem_cgroup_stat_index idx) 2828 enum mem_cgroup_stat_index idx)
2820{ 2829{
2821 struct mem_cgroup *iter; 2830 struct mem_cgroup *iter;
2822 long val = 0; 2831 unsigned long val = 0;
2823 2832
2824 /* Per-cpu values can be negative, use a signed accumulator */
2825 for_each_mem_cgroup_tree(iter, memcg) 2833 for_each_mem_cgroup_tree(iter, memcg)
2826 val += mem_cgroup_read_stat(iter, idx); 2834 val += mem_cgroup_read_stat(iter, idx);
2827 2835
2828 if (val < 0) /* race ? */
2829 val = 0;
2830 return val; 2836 return val;
2831} 2837}
2832 2838
@@ -3169,7 +3175,7 @@ static int memcg_stat_show(struct seq_file *m, void *v)
3169 for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) { 3175 for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {
3170 if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account) 3176 if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account)
3171 continue; 3177 continue;
3172 seq_printf(m, "%s %ld\n", mem_cgroup_stat_names[i], 3178 seq_printf(m, "%s %lu\n", mem_cgroup_stat_names[i],
3173 mem_cgroup_read_stat(memcg, i) * PAGE_SIZE); 3179 mem_cgroup_read_stat(memcg, i) * PAGE_SIZE);
3174 } 3180 }
3175 3181
@@ -3194,13 +3200,13 @@ static int memcg_stat_show(struct seq_file *m, void *v)
3194 (u64)memsw * PAGE_SIZE); 3200 (u64)memsw * PAGE_SIZE);
3195 3201
3196 for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) { 3202 for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {
3197 long long val = 0; 3203 unsigned long long val = 0;
3198 3204
3199 if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account) 3205 if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account)
3200 continue; 3206 continue;
3201 for_each_mem_cgroup_tree(mi, memcg) 3207 for_each_mem_cgroup_tree(mi, memcg)
3202 val += mem_cgroup_read_stat(mi, i) * PAGE_SIZE; 3208 val += mem_cgroup_read_stat(mi, i) * PAGE_SIZE;
3203 seq_printf(m, "total_%s %lld\n", mem_cgroup_stat_names[i], val); 3209 seq_printf(m, "total_%s %llu\n", mem_cgroup_stat_names[i], val);
3204 } 3210 }
3205 3211
3206 for (i = 0; i < MEM_CGROUP_EVENTS_NSTATS; i++) { 3212 for (i = 0; i < MEM_CGROUP_EVENTS_NSTATS; i++) {