diff options
-rw-r--r-- | mm/memcontrol.c | 184 |
1 files changed, 63 insertions, 121 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index a82464b6e3d2..9c9dfcf7a6d1 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -89,54 +89,8 @@ enum mem_cgroup_stat_index { | |||
89 | 89 | ||
90 | struct mem_cgroup_stat_cpu { | 90 | struct mem_cgroup_stat_cpu { |
91 | s64 count[MEM_CGROUP_STAT_NSTATS]; | 91 | s64 count[MEM_CGROUP_STAT_NSTATS]; |
92 | } ____cacheline_aligned_in_smp; | ||
93 | |||
94 | struct mem_cgroup_stat { | ||
95 | struct mem_cgroup_stat_cpu cpustat[0]; | ||
96 | }; | 92 | }; |
97 | 93 | ||
98 | static inline void | ||
99 | __mem_cgroup_stat_set_safe(struct mem_cgroup_stat_cpu *stat, | ||
100 | enum mem_cgroup_stat_index idx, s64 val) | ||
101 | { | ||
102 | stat->count[idx] = val; | ||
103 | } | ||
104 | |||
105 | static inline s64 | ||
106 | __mem_cgroup_stat_read_local(struct mem_cgroup_stat_cpu *stat, | ||
107 | enum mem_cgroup_stat_index idx) | ||
108 | { | ||
109 | return stat->count[idx]; | ||
110 | } | ||
111 | |||
112 | /* | ||
113 | * For accounting under irq disable, no need for increment preempt count. | ||
114 | */ | ||
115 | static inline void __mem_cgroup_stat_add_safe(struct mem_cgroup_stat_cpu *stat, | ||
116 | enum mem_cgroup_stat_index idx, int val) | ||
117 | { | ||
118 | stat->count[idx] += val; | ||
119 | } | ||
120 | |||
121 | static s64 mem_cgroup_read_stat(struct mem_cgroup_stat *stat, | ||
122 | enum mem_cgroup_stat_index idx) | ||
123 | { | ||
124 | int cpu; | ||
125 | s64 ret = 0; | ||
126 | for_each_possible_cpu(cpu) | ||
127 | ret += stat->cpustat[cpu].count[idx]; | ||
128 | return ret; | ||
129 | } | ||
130 | |||
131 | static s64 mem_cgroup_local_usage(struct mem_cgroup_stat *stat) | ||
132 | { | ||
133 | s64 ret; | ||
134 | |||
135 | ret = mem_cgroup_read_stat(stat, MEM_CGROUP_STAT_CACHE); | ||
136 | ret += mem_cgroup_read_stat(stat, MEM_CGROUP_STAT_RSS); | ||
137 | return ret; | ||
138 | } | ||
139 | |||
140 | /* | 94 | /* |
141 | * per-zone information in memory controller. | 95 | * per-zone information in memory controller. |
142 | */ | 96 | */ |
@@ -270,9 +224,9 @@ struct mem_cgroup { | |||
270 | unsigned long move_charge_at_immigrate; | 224 | unsigned long move_charge_at_immigrate; |
271 | 225 | ||
272 | /* | 226 | /* |
273 | * statistics. This must be placed at the end of memcg. | 227 | * percpu counter. |
274 | */ | 228 | */ |
275 | struct mem_cgroup_stat stat; | 229 | struct mem_cgroup_stat_cpu *stat; |
276 | }; | 230 | }; |
277 | 231 | ||
278 | /* Stuffs for move charges at task migration. */ | 232 | /* Stuffs for move charges at task migration. */ |
@@ -441,19 +395,14 @@ mem_cgroup_remove_exceeded(struct mem_cgroup *mem, | |||
441 | static bool mem_cgroup_soft_limit_check(struct mem_cgroup *mem) | 395 | static bool mem_cgroup_soft_limit_check(struct mem_cgroup *mem) |
442 | { | 396 | { |
443 | bool ret = false; | 397 | bool ret = false; |
444 | int cpu; | ||
445 | s64 val; | 398 | s64 val; |
446 | struct mem_cgroup_stat_cpu *cpustat; | ||
447 | 399 | ||
448 | cpu = get_cpu(); | 400 | val = this_cpu_read(mem->stat->count[MEM_CGROUP_STAT_SOFTLIMIT]); |
449 | cpustat = &mem->stat.cpustat[cpu]; | ||
450 | val = __mem_cgroup_stat_read_local(cpustat, MEM_CGROUP_STAT_SOFTLIMIT); | ||
451 | if (unlikely(val < 0)) { | 401 | if (unlikely(val < 0)) { |
452 | __mem_cgroup_stat_set_safe(cpustat, MEM_CGROUP_STAT_SOFTLIMIT, | 402 | this_cpu_write(mem->stat->count[MEM_CGROUP_STAT_SOFTLIMIT], |
453 | SOFTLIMIT_EVENTS_THRESH); | 403 | SOFTLIMIT_EVENTS_THRESH); |
454 | ret = true; | 404 | ret = true; |
455 | } | 405 | } |
456 | put_cpu(); | ||
457 | return ret; | 406 | return ret; |
458 | } | 407 | } |
459 | 408 | ||
@@ -549,17 +498,31 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) | |||
549 | return mz; | 498 | return mz; |
550 | } | 499 | } |
551 | 500 | ||
501 | static s64 mem_cgroup_read_stat(struct mem_cgroup *mem, | ||
502 | enum mem_cgroup_stat_index idx) | ||
503 | { | ||
504 | int cpu; | ||
505 | s64 val = 0; | ||
506 | |||
507 | for_each_possible_cpu(cpu) | ||
508 | val += per_cpu(mem->stat->count[idx], cpu); | ||
509 | return val; | ||
510 | } | ||
511 | |||
512 | static s64 mem_cgroup_local_usage(struct mem_cgroup *mem) | ||
513 | { | ||
514 | s64 ret; | ||
515 | |||
516 | ret = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_RSS); | ||
517 | ret += mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_CACHE); | ||
518 | return ret; | ||
519 | } | ||
520 | |||
552 | static void mem_cgroup_swap_statistics(struct mem_cgroup *mem, | 521 | static void mem_cgroup_swap_statistics(struct mem_cgroup *mem, |
553 | bool charge) | 522 | bool charge) |
554 | { | 523 | { |
555 | int val = (charge) ? 1 : -1; | 524 | int val = (charge) ? 1 : -1; |
556 | struct mem_cgroup_stat *stat = &mem->stat; | 525 | this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_SWAPOUT], val); |
557 | struct mem_cgroup_stat_cpu *cpustat; | ||
558 | int cpu = get_cpu(); | ||
559 | |||
560 | cpustat = &stat->cpustat[cpu]; | ||
561 | __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_SWAPOUT, val); | ||
562 | put_cpu(); | ||
563 | } | 526 | } |
564 | 527 | ||
565 | static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, | 528 | static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, |
@@ -567,26 +530,22 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, | |||
567 | bool charge) | 530 | bool charge) |
568 | { | 531 | { |
569 | int val = (charge) ? 1 : -1; | 532 | int val = (charge) ? 1 : -1; |
570 | struct mem_cgroup_stat *stat = &mem->stat; | ||
571 | struct mem_cgroup_stat_cpu *cpustat; | ||
572 | int cpu = get_cpu(); | ||
573 | 533 | ||
574 | cpustat = &stat->cpustat[cpu]; | 534 | preempt_disable(); |
535 | |||
575 | if (PageCgroupCache(pc)) | 536 | if (PageCgroupCache(pc)) |
576 | __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_CACHE, val); | 537 | __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_CACHE], val); |
577 | else | 538 | else |
578 | __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_RSS, val); | 539 | __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_RSS], val); |
579 | 540 | ||
580 | if (charge) | 541 | if (charge) |
581 | __mem_cgroup_stat_add_safe(cpustat, | 542 | __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGIN_COUNT]); |
582 | MEM_CGROUP_STAT_PGPGIN_COUNT, 1); | ||
583 | else | 543 | else |
584 | __mem_cgroup_stat_add_safe(cpustat, | 544 | __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGOUT_COUNT]); |
585 | MEM_CGROUP_STAT_PGPGOUT_COUNT, 1); | 545 | __this_cpu_dec(mem->stat->count[MEM_CGROUP_STAT_SOFTLIMIT]); |
586 | __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_SOFTLIMIT, -1); | 546 | __this_cpu_dec(mem->stat->count[MEM_CGROUP_STAT_THRESHOLDS]); |
587 | __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_THRESHOLDS, -1); | ||
588 | 547 | ||
589 | put_cpu(); | 548 | preempt_enable(); |
590 | } | 549 | } |
591 | 550 | ||
592 | static unsigned long mem_cgroup_get_local_zonestat(struct mem_cgroup *mem, | 551 | static unsigned long mem_cgroup_get_local_zonestat(struct mem_cgroup *mem, |
@@ -1244,7 +1203,7 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, | |||
1244 | } | 1203 | } |
1245 | } | 1204 | } |
1246 | } | 1205 | } |
1247 | if (!mem_cgroup_local_usage(&victim->stat)) { | 1206 | if (!mem_cgroup_local_usage(victim)) { |
1248 | /* this cgroup's local usage == 0 */ | 1207 | /* this cgroup's local usage == 0 */ |
1249 | css_put(&victim->css); | 1208 | css_put(&victim->css); |
1250 | continue; | 1209 | continue; |
@@ -1310,9 +1269,6 @@ static void record_last_oom(struct mem_cgroup *mem) | |||
1310 | void mem_cgroup_update_file_mapped(struct page *page, int val) | 1269 | void mem_cgroup_update_file_mapped(struct page *page, int val) |
1311 | { | 1270 | { |
1312 | struct mem_cgroup *mem; | 1271 | struct mem_cgroup *mem; |
1313 | struct mem_cgroup_stat *stat; | ||
1314 | struct mem_cgroup_stat_cpu *cpustat; | ||
1315 | int cpu; | ||
1316 | struct page_cgroup *pc; | 1272 | struct page_cgroup *pc; |
1317 | 1273 | ||
1318 | pc = lookup_page_cgroup(page); | 1274 | pc = lookup_page_cgroup(page); |
@@ -1328,13 +1284,10 @@ void mem_cgroup_update_file_mapped(struct page *page, int val) | |||
1328 | goto done; | 1284 | goto done; |
1329 | 1285 | ||
1330 | /* | 1286 | /* |
1331 | * Preemption is already disabled, we don't need get_cpu() | 1287 | * Preemption is already disabled. We can use __this_cpu_xxx |
1332 | */ | 1288 | */ |
1333 | cpu = smp_processor_id(); | 1289 | __this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_FILE_MAPPED], val); |
1334 | stat = &mem->stat; | ||
1335 | cpustat = &stat->cpustat[cpu]; | ||
1336 | 1290 | ||
1337 | __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_FILE_MAPPED, val); | ||
1338 | done: | 1291 | done: |
1339 | unlock_page_cgroup(pc); | 1292 | unlock_page_cgroup(pc); |
1340 | } | 1293 | } |
@@ -1761,9 +1714,6 @@ static void __mem_cgroup_move_account(struct page_cgroup *pc, | |||
1761 | struct mem_cgroup *from, struct mem_cgroup *to, bool uncharge) | 1714 | struct mem_cgroup *from, struct mem_cgroup *to, bool uncharge) |
1762 | { | 1715 | { |
1763 | struct page *page; | 1716 | struct page *page; |
1764 | int cpu; | ||
1765 | struct mem_cgroup_stat *stat; | ||
1766 | struct mem_cgroup_stat_cpu *cpustat; | ||
1767 | 1717 | ||
1768 | VM_BUG_ON(from == to); | 1718 | VM_BUG_ON(from == to); |
1769 | VM_BUG_ON(PageLRU(pc->page)); | 1719 | VM_BUG_ON(PageLRU(pc->page)); |
@@ -1773,18 +1723,11 @@ static void __mem_cgroup_move_account(struct page_cgroup *pc, | |||
1773 | 1723 | ||
1774 | page = pc->page; | 1724 | page = pc->page; |
1775 | if (page_mapped(page) && !PageAnon(page)) { | 1725 | if (page_mapped(page) && !PageAnon(page)) { |
1776 | cpu = smp_processor_id(); | 1726 | /* Update mapped_file data for mem_cgroup */ |
1777 | /* Update mapped_file data for mem_cgroup "from" */ | 1727 | preempt_disable(); |
1778 | stat = &from->stat; | 1728 | __this_cpu_dec(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]); |
1779 | cpustat = &stat->cpustat[cpu]; | 1729 | __this_cpu_inc(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED]); |
1780 | __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_FILE_MAPPED, | 1730 | preempt_enable(); |
1781 | -1); | ||
1782 | |||
1783 | /* Update mapped_file data for mem_cgroup "to" */ | ||
1784 | stat = &to->stat; | ||
1785 | cpustat = &stat->cpustat[cpu]; | ||
1786 | __mem_cgroup_stat_add_safe(cpustat, MEM_CGROUP_STAT_FILE_MAPPED, | ||
1787 | 1); | ||
1788 | } | 1731 | } |
1789 | mem_cgroup_charge_statistics(from, pc, false); | 1732 | mem_cgroup_charge_statistics(from, pc, false); |
1790 | if (uncharge) | 1733 | if (uncharge) |
@@ -2885,7 +2828,7 @@ static int | |||
2885 | mem_cgroup_get_idx_stat(struct mem_cgroup *mem, void *data) | 2828 | mem_cgroup_get_idx_stat(struct mem_cgroup *mem, void *data) |
2886 | { | 2829 | { |
2887 | struct mem_cgroup_idx_data *d = data; | 2830 | struct mem_cgroup_idx_data *d = data; |
2888 | d->val += mem_cgroup_read_stat(&mem->stat, d->idx); | 2831 | d->val += mem_cgroup_read_stat(mem, d->idx); |
2889 | return 0; | 2832 | return 0; |
2890 | } | 2833 | } |
2891 | 2834 | ||
@@ -3134,18 +3077,18 @@ static int mem_cgroup_get_local_stat(struct mem_cgroup *mem, void *data) | |||
3134 | s64 val; | 3077 | s64 val; |
3135 | 3078 | ||
3136 | /* per cpu stat */ | 3079 | /* per cpu stat */ |
3137 | val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_CACHE); | 3080 | val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_CACHE); |
3138 | s->stat[MCS_CACHE] += val * PAGE_SIZE; | 3081 | s->stat[MCS_CACHE] += val * PAGE_SIZE; |
3139 | val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_RSS); | 3082 | val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_RSS); |
3140 | s->stat[MCS_RSS] += val * PAGE_SIZE; | 3083 | s->stat[MCS_RSS] += val * PAGE_SIZE; |
3141 | val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_FILE_MAPPED); | 3084 | val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_FILE_MAPPED); |
3142 | s->stat[MCS_FILE_MAPPED] += val * PAGE_SIZE; | 3085 | s->stat[MCS_FILE_MAPPED] += val * PAGE_SIZE; |
3143 | val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_PGPGIN_COUNT); | 3086 | val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_PGPGIN_COUNT); |
3144 | s->stat[MCS_PGPGIN] += val; | 3087 | s->stat[MCS_PGPGIN] += val; |
3145 | val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_PGPGOUT_COUNT); | 3088 | val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_PGPGOUT_COUNT); |
3146 | s->stat[MCS_PGPGOUT] += val; | 3089 | s->stat[MCS_PGPGOUT] += val; |
3147 | if (do_swap_account) { | 3090 | if (do_swap_account) { |
3148 | val = mem_cgroup_read_stat(&mem->stat, MEM_CGROUP_STAT_SWAPOUT); | 3091 | val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_SWAPOUT); |
3149 | s->stat[MCS_SWAP] += val * PAGE_SIZE; | 3092 | s->stat[MCS_SWAP] += val * PAGE_SIZE; |
3150 | } | 3093 | } |
3151 | 3094 | ||
@@ -3276,19 +3219,14 @@ static int mem_cgroup_swappiness_write(struct cgroup *cgrp, struct cftype *cft, | |||
3276 | static bool mem_cgroup_threshold_check(struct mem_cgroup *mem) | 3219 | static bool mem_cgroup_threshold_check(struct mem_cgroup *mem) |
3277 | { | 3220 | { |
3278 | bool ret = false; | 3221 | bool ret = false; |
3279 | int cpu; | ||
3280 | s64 val; | 3222 | s64 val; |
3281 | struct mem_cgroup_stat_cpu *cpustat; | ||
3282 | 3223 | ||
3283 | cpu = get_cpu(); | 3224 | val = this_cpu_read(mem->stat->count[MEM_CGROUP_STAT_THRESHOLDS]); |
3284 | cpustat = &mem->stat.cpustat[cpu]; | ||
3285 | val = __mem_cgroup_stat_read_local(cpustat, MEM_CGROUP_STAT_THRESHOLDS); | ||
3286 | if (unlikely(val < 0)) { | 3225 | if (unlikely(val < 0)) { |
3287 | __mem_cgroup_stat_set_safe(cpustat, MEM_CGROUP_STAT_THRESHOLDS, | 3226 | this_cpu_write(mem->stat->count[MEM_CGROUP_STAT_THRESHOLDS], |
3288 | THRESHOLDS_EVENTS_THRESH); | 3227 | THRESHOLDS_EVENTS_THRESH); |
3289 | ret = true; | 3228 | ret = true; |
3290 | } | 3229 | } |
3291 | put_cpu(); | ||
3292 | return ret; | 3230 | return ret; |
3293 | } | 3231 | } |
3294 | 3232 | ||
@@ -3676,17 +3614,12 @@ static void free_mem_cgroup_per_zone_info(struct mem_cgroup *mem, int node) | |||
3676 | kfree(mem->info.nodeinfo[node]); | 3614 | kfree(mem->info.nodeinfo[node]); |
3677 | } | 3615 | } |
3678 | 3616 | ||
3679 | static int mem_cgroup_size(void) | ||
3680 | { | ||
3681 | int cpustat_size = nr_cpu_ids * sizeof(struct mem_cgroup_stat_cpu); | ||
3682 | return sizeof(struct mem_cgroup) + cpustat_size; | ||
3683 | } | ||
3684 | |||
3685 | static struct mem_cgroup *mem_cgroup_alloc(void) | 3617 | static struct mem_cgroup *mem_cgroup_alloc(void) |
3686 | { | 3618 | { |
3687 | struct mem_cgroup *mem; | 3619 | struct mem_cgroup *mem; |
3688 | int size = mem_cgroup_size(); | 3620 | int size = sizeof(struct mem_cgroup); |
3689 | 3621 | ||
3622 | /* Can be very big if MAX_NUMNODES is very big */ | ||
3690 | if (size < PAGE_SIZE) | 3623 | if (size < PAGE_SIZE) |
3691 | mem = kmalloc(size, GFP_KERNEL); | 3624 | mem = kmalloc(size, GFP_KERNEL); |
3692 | else | 3625 | else |
@@ -3694,6 +3627,14 @@ static struct mem_cgroup *mem_cgroup_alloc(void) | |||
3694 | 3627 | ||
3695 | if (mem) | 3628 | if (mem) |
3696 | memset(mem, 0, size); | 3629 | memset(mem, 0, size); |
3630 | mem->stat = alloc_percpu(struct mem_cgroup_stat_cpu); | ||
3631 | if (!mem->stat) { | ||
3632 | if (size < PAGE_SIZE) | ||
3633 | kfree(mem); | ||
3634 | else | ||
3635 | vfree(mem); | ||
3636 | mem = NULL; | ||
3637 | } | ||
3697 | return mem; | 3638 | return mem; |
3698 | } | 3639 | } |
3699 | 3640 | ||
@@ -3718,7 +3659,8 @@ static void __mem_cgroup_free(struct mem_cgroup *mem) | |||
3718 | for_each_node_state(node, N_POSSIBLE) | 3659 | for_each_node_state(node, N_POSSIBLE) |
3719 | free_mem_cgroup_per_zone_info(mem, node); | 3660 | free_mem_cgroup_per_zone_info(mem, node); |
3720 | 3661 | ||
3721 | if (mem_cgroup_size() < PAGE_SIZE) | 3662 | free_percpu(mem->stat); |
3663 | if (sizeof(struct mem_cgroup) < PAGE_SIZE) | ||
3722 | kfree(mem); | 3664 | kfree(mem); |
3723 | else | 3665 | else |
3724 | vfree(mem); | 3666 | vfree(mem); |