diff options
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 139 |
1 files changed, 109 insertions, 30 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index ec4dcf1b9562..28928ce9b07f 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -292,6 +292,9 @@ struct mem_cgroup { | |||
292 | /* vmpressure notifications */ | 292 | /* vmpressure notifications */ |
293 | struct vmpressure vmpressure; | 293 | struct vmpressure vmpressure; |
294 | 294 | ||
295 | /* css_online() has been completed */ | ||
296 | int initialized; | ||
297 | |||
295 | /* | 298 | /* |
296 | * the counter to account for mem+swap usage. | 299 | * the counter to account for mem+swap usage. |
297 | */ | 300 | */ |
@@ -1099,10 +1102,21 @@ skip_node: | |||
1099 | * skipping css reference should be safe. | 1102 | * skipping css reference should be safe. |
1100 | */ | 1103 | */ |
1101 | if (next_css) { | 1104 | if (next_css) { |
1102 | if ((next_css == &root->css) || | 1105 | struct mem_cgroup *memcg = mem_cgroup_from_css(next_css); |
1103 | ((next_css->flags & CSS_ONLINE) && | 1106 | |
1104 | css_tryget_online(next_css))) | 1107 | if (next_css == &root->css) |
1105 | return mem_cgroup_from_css(next_css); | 1108 | return memcg; |
1109 | |||
1110 | if (css_tryget_online(next_css)) { | ||
1111 | /* | ||
1112 | * Make sure the memcg is initialized: | ||
1113 | * mem_cgroup_css_online() orders the the | ||
1114 | * initialization against setting the flag. | ||
1115 | */ | ||
1116 | if (smp_load_acquire(&memcg->initialized)) | ||
1117 | return memcg; | ||
1118 | css_put(next_css); | ||
1119 | } | ||
1106 | 1120 | ||
1107 | prev_css = next_css; | 1121 | prev_css = next_css; |
1108 | goto skip_node; | 1122 | goto skip_node; |
@@ -2534,6 +2548,8 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, | |||
2534 | unsigned long long size; | 2548 | unsigned long long size; |
2535 | int ret = 0; | 2549 | int ret = 0; |
2536 | 2550 | ||
2551 | if (mem_cgroup_is_root(memcg)) | ||
2552 | goto done; | ||
2537 | retry: | 2553 | retry: |
2538 | if (consume_stock(memcg, nr_pages)) | 2554 | if (consume_stock(memcg, nr_pages)) |
2539 | goto done; | 2555 | goto done; |
@@ -2611,9 +2627,7 @@ nomem: | |||
2611 | if (!(gfp_mask & __GFP_NOFAIL)) | 2627 | if (!(gfp_mask & __GFP_NOFAIL)) |
2612 | return -ENOMEM; | 2628 | return -ENOMEM; |
2613 | bypass: | 2629 | bypass: |
2614 | memcg = root_mem_cgroup; | 2630 | return -EINTR; |
2615 | ret = -EINTR; | ||
2616 | goto retry; | ||
2617 | 2631 | ||
2618 | done_restock: | 2632 | done_restock: |
2619 | if (batch > nr_pages) | 2633 | if (batch > nr_pages) |
@@ -2626,6 +2640,9 @@ static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages) | |||
2626 | { | 2640 | { |
2627 | unsigned long bytes = nr_pages * PAGE_SIZE; | 2641 | unsigned long bytes = nr_pages * PAGE_SIZE; |
2628 | 2642 | ||
2643 | if (mem_cgroup_is_root(memcg)) | ||
2644 | return; | ||
2645 | |||
2629 | res_counter_uncharge(&memcg->res, bytes); | 2646 | res_counter_uncharge(&memcg->res, bytes); |
2630 | if (do_swap_account) | 2647 | if (do_swap_account) |
2631 | res_counter_uncharge(&memcg->memsw, bytes); | 2648 | res_counter_uncharge(&memcg->memsw, bytes); |
@@ -2640,6 +2657,9 @@ static void __mem_cgroup_cancel_local_charge(struct mem_cgroup *memcg, | |||
2640 | { | 2657 | { |
2641 | unsigned long bytes = nr_pages * PAGE_SIZE; | 2658 | unsigned long bytes = nr_pages * PAGE_SIZE; |
2642 | 2659 | ||
2660 | if (mem_cgroup_is_root(memcg)) | ||
2661 | return; | ||
2662 | |||
2643 | res_counter_uncharge_until(&memcg->res, memcg->res.parent, bytes); | 2663 | res_counter_uncharge_until(&memcg->res, memcg->res.parent, bytes); |
2644 | if (do_swap_account) | 2664 | if (do_swap_account) |
2645 | res_counter_uncharge_until(&memcg->memsw, | 2665 | res_counter_uncharge_until(&memcg->memsw, |
@@ -4093,6 +4113,46 @@ out: | |||
4093 | return retval; | 4113 | return retval; |
4094 | } | 4114 | } |
4095 | 4115 | ||
4116 | static unsigned long mem_cgroup_recursive_stat(struct mem_cgroup *memcg, | ||
4117 | enum mem_cgroup_stat_index idx) | ||
4118 | { | ||
4119 | struct mem_cgroup *iter; | ||
4120 | long val = 0; | ||
4121 | |||
4122 | /* Per-cpu values can be negative, use a signed accumulator */ | ||
4123 | for_each_mem_cgroup_tree(iter, memcg) | ||
4124 | val += mem_cgroup_read_stat(iter, idx); | ||
4125 | |||
4126 | if (val < 0) /* race ? */ | ||
4127 | val = 0; | ||
4128 | return val; | ||
4129 | } | ||
4130 | |||
4131 | static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap) | ||
4132 | { | ||
4133 | u64 val; | ||
4134 | |||
4135 | if (!mem_cgroup_is_root(memcg)) { | ||
4136 | if (!swap) | ||
4137 | return res_counter_read_u64(&memcg->res, RES_USAGE); | ||
4138 | else | ||
4139 | return res_counter_read_u64(&memcg->memsw, RES_USAGE); | ||
4140 | } | ||
4141 | |||
4142 | /* | ||
4143 | * Transparent hugepages are still accounted for in MEM_CGROUP_STAT_RSS | ||
4144 | * as well as in MEM_CGROUP_STAT_RSS_HUGE. | ||
4145 | */ | ||
4146 | val = mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_CACHE); | ||
4147 | val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_RSS); | ||
4148 | |||
4149 | if (swap) | ||
4150 | val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_SWAP); | ||
4151 | |||
4152 | return val << PAGE_SHIFT; | ||
4153 | } | ||
4154 | |||
4155 | |||
4096 | static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css, | 4156 | static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css, |
4097 | struct cftype *cft) | 4157 | struct cftype *cft) |
4098 | { | 4158 | { |
@@ -4102,8 +4162,12 @@ static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css, | |||
4102 | 4162 | ||
4103 | switch (type) { | 4163 | switch (type) { |
4104 | case _MEM: | 4164 | case _MEM: |
4165 | if (name == RES_USAGE) | ||
4166 | return mem_cgroup_usage(memcg, false); | ||
4105 | return res_counter_read_u64(&memcg->res, name); | 4167 | return res_counter_read_u64(&memcg->res, name); |
4106 | case _MEMSWAP: | 4168 | case _MEMSWAP: |
4169 | if (name == RES_USAGE) | ||
4170 | return mem_cgroup_usage(memcg, true); | ||
4107 | return res_counter_read_u64(&memcg->memsw, name); | 4171 | return res_counter_read_u64(&memcg->memsw, name); |
4108 | case _KMEM: | 4172 | case _KMEM: |
4109 | return res_counter_read_u64(&memcg->kmem, name); | 4173 | return res_counter_read_u64(&memcg->kmem, name); |
@@ -4572,10 +4636,7 @@ static void __mem_cgroup_threshold(struct mem_cgroup *memcg, bool swap) | |||
4572 | if (!t) | 4636 | if (!t) |
4573 | goto unlock; | 4637 | goto unlock; |
4574 | 4638 | ||
4575 | if (!swap) | 4639 | usage = mem_cgroup_usage(memcg, swap); |
4576 | usage = res_counter_read_u64(&memcg->res, RES_USAGE); | ||
4577 | else | ||
4578 | usage = res_counter_read_u64(&memcg->memsw, RES_USAGE); | ||
4579 | 4640 | ||
4580 | /* | 4641 | /* |
4581 | * current_threshold points to threshold just below or equal to usage. | 4642 | * current_threshold points to threshold just below or equal to usage. |
@@ -4673,10 +4734,10 @@ static int __mem_cgroup_usage_register_event(struct mem_cgroup *memcg, | |||
4673 | 4734 | ||
4674 | if (type == _MEM) { | 4735 | if (type == _MEM) { |
4675 | thresholds = &memcg->thresholds; | 4736 | thresholds = &memcg->thresholds; |
4676 | usage = res_counter_read_u64(&memcg->res, RES_USAGE); | 4737 | usage = mem_cgroup_usage(memcg, false); |
4677 | } else if (type == _MEMSWAP) { | 4738 | } else if (type == _MEMSWAP) { |
4678 | thresholds = &memcg->memsw_thresholds; | 4739 | thresholds = &memcg->memsw_thresholds; |
4679 | usage = res_counter_read_u64(&memcg->memsw, RES_USAGE); | 4740 | usage = mem_cgroup_usage(memcg, true); |
4680 | } else | 4741 | } else |
4681 | BUG(); | 4742 | BUG(); |
4682 | 4743 | ||
@@ -4762,10 +4823,10 @@ static void __mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg, | |||
4762 | 4823 | ||
4763 | if (type == _MEM) { | 4824 | if (type == _MEM) { |
4764 | thresholds = &memcg->thresholds; | 4825 | thresholds = &memcg->thresholds; |
4765 | usage = res_counter_read_u64(&memcg->res, RES_USAGE); | 4826 | usage = mem_cgroup_usage(memcg, false); |
4766 | } else if (type == _MEMSWAP) { | 4827 | } else if (type == _MEMSWAP) { |
4767 | thresholds = &memcg->memsw_thresholds; | 4828 | thresholds = &memcg->memsw_thresholds; |
4768 | usage = res_counter_read_u64(&memcg->memsw, RES_USAGE); | 4829 | usage = mem_cgroup_usage(memcg, true); |
4769 | } else | 4830 | } else |
4770 | BUG(); | 4831 | BUG(); |
4771 | 4832 | ||
@@ -5502,6 +5563,7 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css) | |||
5502 | { | 5563 | { |
5503 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | 5564 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
5504 | struct mem_cgroup *parent = mem_cgroup_from_css(css->parent); | 5565 | struct mem_cgroup *parent = mem_cgroup_from_css(css->parent); |
5566 | int ret; | ||
5505 | 5567 | ||
5506 | if (css->id > MEM_CGROUP_ID_MAX) | 5568 | if (css->id > MEM_CGROUP_ID_MAX) |
5507 | return -ENOSPC; | 5569 | return -ENOSPC; |
@@ -5525,9 +5587,9 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css) | |||
5525 | * core guarantees its existence. | 5587 | * core guarantees its existence. |
5526 | */ | 5588 | */ |
5527 | } else { | 5589 | } else { |
5528 | res_counter_init(&memcg->res, &root_mem_cgroup->res); | 5590 | res_counter_init(&memcg->res, NULL); |
5529 | res_counter_init(&memcg->memsw, &root_mem_cgroup->memsw); | 5591 | res_counter_init(&memcg->memsw, NULL); |
5530 | res_counter_init(&memcg->kmem, &root_mem_cgroup->kmem); | 5592 | res_counter_init(&memcg->kmem, NULL); |
5531 | /* | 5593 | /* |
5532 | * Deeper hierachy with use_hierarchy == false doesn't make | 5594 | * Deeper hierachy with use_hierarchy == false doesn't make |
5533 | * much sense so let cgroup subsystem know about this | 5595 | * much sense so let cgroup subsystem know about this |
@@ -5538,7 +5600,18 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css) | |||
5538 | } | 5600 | } |
5539 | mutex_unlock(&memcg_create_mutex); | 5601 | mutex_unlock(&memcg_create_mutex); |
5540 | 5602 | ||
5541 | return memcg_init_kmem(memcg, &memory_cgrp_subsys); | 5603 | ret = memcg_init_kmem(memcg, &memory_cgrp_subsys); |
5604 | if (ret) | ||
5605 | return ret; | ||
5606 | |||
5607 | /* | ||
5608 | * Make sure the memcg is initialized: mem_cgroup_iter() | ||
5609 | * orders reading memcg->initialized against its callers | ||
5610 | * reading the memcg members. | ||
5611 | */ | ||
5612 | smp_store_release(&memcg->initialized, 1); | ||
5613 | |||
5614 | return 0; | ||
5542 | } | 5615 | } |
5543 | 5616 | ||
5544 | /* | 5617 | /* |
@@ -5969,8 +6042,9 @@ static void __mem_cgroup_clear_mc(void) | |||
5969 | /* we must fixup refcnts and charges */ | 6042 | /* we must fixup refcnts and charges */ |
5970 | if (mc.moved_swap) { | 6043 | if (mc.moved_swap) { |
5971 | /* uncharge swap account from the old cgroup */ | 6044 | /* uncharge swap account from the old cgroup */ |
5972 | res_counter_uncharge(&mc.from->memsw, | 6045 | if (!mem_cgroup_is_root(mc.from)) |
5973 | PAGE_SIZE * mc.moved_swap); | 6046 | res_counter_uncharge(&mc.from->memsw, |
6047 | PAGE_SIZE * mc.moved_swap); | ||
5974 | 6048 | ||
5975 | for (i = 0; i < mc.moved_swap; i++) | 6049 | for (i = 0; i < mc.moved_swap; i++) |
5976 | css_put(&mc.from->css); | 6050 | css_put(&mc.from->css); |
@@ -5979,8 +6053,9 @@ static void __mem_cgroup_clear_mc(void) | |||
5979 | * we charged both to->res and to->memsw, so we should | 6053 | * we charged both to->res and to->memsw, so we should |
5980 | * uncharge to->res. | 6054 | * uncharge to->res. |
5981 | */ | 6055 | */ |
5982 | res_counter_uncharge(&mc.to->res, | 6056 | if (!mem_cgroup_is_root(mc.to)) |
5983 | PAGE_SIZE * mc.moved_swap); | 6057 | res_counter_uncharge(&mc.to->res, |
6058 | PAGE_SIZE * mc.moved_swap); | ||
5984 | /* we've already done css_get(mc.to) */ | 6059 | /* we've already done css_get(mc.to) */ |
5985 | mc.moved_swap = 0; | 6060 | mc.moved_swap = 0; |
5986 | } | 6061 | } |
@@ -6345,7 +6420,8 @@ void mem_cgroup_uncharge_swap(swp_entry_t entry) | |||
6345 | rcu_read_lock(); | 6420 | rcu_read_lock(); |
6346 | memcg = mem_cgroup_lookup(id); | 6421 | memcg = mem_cgroup_lookup(id); |
6347 | if (memcg) { | 6422 | if (memcg) { |
6348 | res_counter_uncharge(&memcg->memsw, PAGE_SIZE); | 6423 | if (!mem_cgroup_is_root(memcg)) |
6424 | res_counter_uncharge(&memcg->memsw, PAGE_SIZE); | ||
6349 | mem_cgroup_swap_statistics(memcg, false); | 6425 | mem_cgroup_swap_statistics(memcg, false); |
6350 | css_put(&memcg->css); | 6426 | css_put(&memcg->css); |
6351 | } | 6427 | } |
@@ -6509,12 +6585,15 @@ static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout, | |||
6509 | { | 6585 | { |
6510 | unsigned long flags; | 6586 | unsigned long flags; |
6511 | 6587 | ||
6512 | if (nr_mem) | 6588 | if (!mem_cgroup_is_root(memcg)) { |
6513 | res_counter_uncharge(&memcg->res, nr_mem * PAGE_SIZE); | 6589 | if (nr_mem) |
6514 | if (nr_memsw) | 6590 | res_counter_uncharge(&memcg->res, |
6515 | res_counter_uncharge(&memcg->memsw, nr_memsw * PAGE_SIZE); | 6591 | nr_mem * PAGE_SIZE); |
6516 | 6592 | if (nr_memsw) | |
6517 | memcg_oom_recover(memcg); | 6593 | res_counter_uncharge(&memcg->memsw, |
6594 | nr_memsw * PAGE_SIZE); | ||
6595 | memcg_oom_recover(memcg); | ||
6596 | } | ||
6518 | 6597 | ||
6519 | local_irq_save(flags); | 6598 | local_irq_save(flags); |
6520 | __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS], nr_anon); | 6599 | __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS], nr_anon); |