diff options
author | Greg Thelen <gthelen@google.com> | 2015-10-01 18:37:05 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-10-01 21:42:35 -0400 |
commit | 484ebb3b8c8b27dd2171696462a3116edb9ff801 (patch) | |
tree | 1056eab0960e2ec868c6a5cfd3076d05ee5887a3 /mm | |
parent | 0610c25daa3e76e38ad5a8fae683a89ff9f71798 (diff) |
memcg: make mem_cgroup_read_stat() unsigned
mem_cgroup_read_stat() returns a page count by summing per cpu page
counters. The summing is racy wrt. updates, so a transient negative
sum is possible. Callers don't want negative values:
- mem_cgroup_wb_stats() doesn't want negative nr_dirty or nr_writeback.
This could confuse dirty throttling.
- oom reports and memory.stat shouldn't show confusing negative usage.
- tree_usage() already avoids negatives.
Avoid returning negative page counts from mem_cgroup_read_stat() and
convert it to unsigned.
[akpm@linux-foundation.org: fix old typo while we're in there]
Signed-off-by: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: <stable@vger.kernel.org> [4.2+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/memcontrol.c | 30 |
1 files changed, 18 insertions, 12 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 6ddaeba34e09..03cc0a742ff1 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -644,12 +644,14 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) | |||
644 | } | 644 | } |
645 | 645 | ||
646 | /* | 646 | /* |
647 | * Return page count for single (non recursive) @memcg. | ||
648 | * | ||
647 | * Implementation Note: reading percpu statistics for memcg. | 649 | * Implementation Note: reading percpu statistics for memcg. |
648 | * | 650 | * |
649 | * Both of vmstat[] and percpu_counter has threshold and do periodic | 651 | * Both of vmstat[] and percpu_counter has threshold and do periodic |
650 | * synchronization to implement "quick" read. There are trade-off between | 652 | * synchronization to implement "quick" read. There are trade-off between |
651 | * reading cost and precision of value. Then, we may have a chance to implement | 653 | * reading cost and precision of value. Then, we may have a chance to implement |
652 | * a periodic synchronizion of counter in memcg's counter. | 654 | * a periodic synchronization of counter in memcg's counter. |
653 | * | 655 | * |
654 | * But this _read() function is used for user interface now. The user accounts | 656 | * But this _read() function is used for user interface now. The user accounts |
655 | * memory usage by memory cgroup and he _always_ requires exact value because | 657 | * memory usage by memory cgroup and he _always_ requires exact value because |
@@ -659,17 +661,24 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) | |||
659 | * | 661 | * |
660 | * If there are kernel internal actions which can make use of some not-exact | 662 | * If there are kernel internal actions which can make use of some not-exact |
661 | * value, and reading all cpu value can be performance bottleneck in some | 663 | * value, and reading all cpu value can be performance bottleneck in some |
662 | * common workload, threashold and synchonization as vmstat[] should be | 664 | * common workload, threshold and synchronization as vmstat[] should be |
663 | * implemented. | 665 | * implemented. |
664 | */ | 666 | */ |
665 | static long mem_cgroup_read_stat(struct mem_cgroup *memcg, | 667 | static unsigned long |
666 | enum mem_cgroup_stat_index idx) | 668 | mem_cgroup_read_stat(struct mem_cgroup *memcg, enum mem_cgroup_stat_index idx) |
667 | { | 669 | { |
668 | long val = 0; | 670 | long val = 0; |
669 | int cpu; | 671 | int cpu; |
670 | 672 | ||
673 | /* Per-cpu values can be negative, use a signed accumulator */ | ||
671 | for_each_possible_cpu(cpu) | 674 | for_each_possible_cpu(cpu) |
672 | val += per_cpu(memcg->stat->count[idx], cpu); | 675 | val += per_cpu(memcg->stat->count[idx], cpu); |
676 | /* | ||
677 | * Summing races with updates, so val may be negative. Avoid exposing | ||
678 | * transient negative values. | ||
679 | */ | ||
680 | if (val < 0) | ||
681 | val = 0; | ||
673 | return val; | 682 | return val; |
674 | } | 683 | } |
675 | 684 | ||
@@ -1254,7 +1263,7 @@ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p) | |||
1254 | for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) { | 1263 | for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) { |
1255 | if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account) | 1264 | if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account) |
1256 | continue; | 1265 | continue; |
1257 | pr_cont(" %s:%ldKB", mem_cgroup_stat_names[i], | 1266 | pr_cont(" %s:%luKB", mem_cgroup_stat_names[i], |
1258 | K(mem_cgroup_read_stat(iter, i))); | 1267 | K(mem_cgroup_read_stat(iter, i))); |
1259 | } | 1268 | } |
1260 | 1269 | ||
@@ -2819,14 +2828,11 @@ static unsigned long tree_stat(struct mem_cgroup *memcg, | |||
2819 | enum mem_cgroup_stat_index idx) | 2828 | enum mem_cgroup_stat_index idx) |
2820 | { | 2829 | { |
2821 | struct mem_cgroup *iter; | 2830 | struct mem_cgroup *iter; |
2822 | long val = 0; | 2831 | unsigned long val = 0; |
2823 | 2832 | ||
2824 | /* Per-cpu values can be negative, use a signed accumulator */ | ||
2825 | for_each_mem_cgroup_tree(iter, memcg) | 2833 | for_each_mem_cgroup_tree(iter, memcg) |
2826 | val += mem_cgroup_read_stat(iter, idx); | 2834 | val += mem_cgroup_read_stat(iter, idx); |
2827 | 2835 | ||
2828 | if (val < 0) /* race ? */ | ||
2829 | val = 0; | ||
2830 | return val; | 2836 | return val; |
2831 | } | 2837 | } |
2832 | 2838 | ||
@@ -3169,7 +3175,7 @@ static int memcg_stat_show(struct seq_file *m, void *v) | |||
3169 | for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) { | 3175 | for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) { |
3170 | if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account) | 3176 | if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account) |
3171 | continue; | 3177 | continue; |
3172 | seq_printf(m, "%s %ld\n", mem_cgroup_stat_names[i], | 3178 | seq_printf(m, "%s %lu\n", mem_cgroup_stat_names[i], |
3173 | mem_cgroup_read_stat(memcg, i) * PAGE_SIZE); | 3179 | mem_cgroup_read_stat(memcg, i) * PAGE_SIZE); |
3174 | } | 3180 | } |
3175 | 3181 | ||
@@ -3194,13 +3200,13 @@ static int memcg_stat_show(struct seq_file *m, void *v) | |||
3194 | (u64)memsw * PAGE_SIZE); | 3200 | (u64)memsw * PAGE_SIZE); |
3195 | 3201 | ||
3196 | for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) { | 3202 | for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) { |
3197 | long long val = 0; | 3203 | unsigned long long val = 0; |
3198 | 3204 | ||
3199 | if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account) | 3205 | if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account) |
3200 | continue; | 3206 | continue; |
3201 | for_each_mem_cgroup_tree(mi, memcg) | 3207 | for_each_mem_cgroup_tree(mi, memcg) |
3202 | val += mem_cgroup_read_stat(mi, i) * PAGE_SIZE; | 3208 | val += mem_cgroup_read_stat(mi, i) * PAGE_SIZE; |
3203 | seq_printf(m, "total_%s %lld\n", mem_cgroup_stat_names[i], val); | 3209 | seq_printf(m, "total_%s %llu\n", mem_cgroup_stat_names[i], val); |
3204 | } | 3210 | } |
3205 | 3211 | ||
3206 | for (i = 0; i < MEM_CGROUP_EVENTS_NSTATS; i++) { | 3212 | for (i = 0; i < MEM_CGROUP_EVENTS_NSTATS; i++) { |