aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c139
1 files changed, 109 insertions, 30 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ec4dcf1b9562..28928ce9b07f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -292,6 +292,9 @@ struct mem_cgroup {
292 /* vmpressure notifications */ 292 /* vmpressure notifications */
293 struct vmpressure vmpressure; 293 struct vmpressure vmpressure;
294 294
295 /* css_online() has been completed */
296 int initialized;
297
295 /* 298 /*
296 * the counter to account for mem+swap usage. 299 * the counter to account for mem+swap usage.
297 */ 300 */
@@ -1099,10 +1102,21 @@ skip_node:
1099 * skipping css reference should be safe. 1102 * skipping css reference should be safe.
1100 */ 1103 */
1101 if (next_css) { 1104 if (next_css) {
1102 if ((next_css == &root->css) || 1105 struct mem_cgroup *memcg = mem_cgroup_from_css(next_css);
1103 ((next_css->flags & CSS_ONLINE) && 1106
1104 css_tryget_online(next_css))) 1107 if (next_css == &root->css)
1105 return mem_cgroup_from_css(next_css); 1108 return memcg;
1109
1110 if (css_tryget_online(next_css)) {
1111 /*
1112 * Make sure the memcg is initialized:
1113 * mem_cgroup_css_online() orders the the
1114 * initialization against setting the flag.
1115 */
1116 if (smp_load_acquire(&memcg->initialized))
1117 return memcg;
1118 css_put(next_css);
1119 }
1106 1120
1107 prev_css = next_css; 1121 prev_css = next_css;
1108 goto skip_node; 1122 goto skip_node;
@@ -2534,6 +2548,8 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
2534 unsigned long long size; 2548 unsigned long long size;
2535 int ret = 0; 2549 int ret = 0;
2536 2550
2551 if (mem_cgroup_is_root(memcg))
2552 goto done;
2537retry: 2553retry:
2538 if (consume_stock(memcg, nr_pages)) 2554 if (consume_stock(memcg, nr_pages))
2539 goto done; 2555 goto done;
@@ -2611,9 +2627,7 @@ nomem:
2611 if (!(gfp_mask & __GFP_NOFAIL)) 2627 if (!(gfp_mask & __GFP_NOFAIL))
2612 return -ENOMEM; 2628 return -ENOMEM;
2613bypass: 2629bypass:
2614 memcg = root_mem_cgroup; 2630 return -EINTR;
2615 ret = -EINTR;
2616 goto retry;
2617 2631
2618done_restock: 2632done_restock:
2619 if (batch > nr_pages) 2633 if (batch > nr_pages)
@@ -2626,6 +2640,9 @@ static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
2626{ 2640{
2627 unsigned long bytes = nr_pages * PAGE_SIZE; 2641 unsigned long bytes = nr_pages * PAGE_SIZE;
2628 2642
2643 if (mem_cgroup_is_root(memcg))
2644 return;
2645
2629 res_counter_uncharge(&memcg->res, bytes); 2646 res_counter_uncharge(&memcg->res, bytes);
2630 if (do_swap_account) 2647 if (do_swap_account)
2631 res_counter_uncharge(&memcg->memsw, bytes); 2648 res_counter_uncharge(&memcg->memsw, bytes);
@@ -2640,6 +2657,9 @@ static void __mem_cgroup_cancel_local_charge(struct mem_cgroup *memcg,
2640{ 2657{
2641 unsigned long bytes = nr_pages * PAGE_SIZE; 2658 unsigned long bytes = nr_pages * PAGE_SIZE;
2642 2659
2660 if (mem_cgroup_is_root(memcg))
2661 return;
2662
2643 res_counter_uncharge_until(&memcg->res, memcg->res.parent, bytes); 2663 res_counter_uncharge_until(&memcg->res, memcg->res.parent, bytes);
2644 if (do_swap_account) 2664 if (do_swap_account)
2645 res_counter_uncharge_until(&memcg->memsw, 2665 res_counter_uncharge_until(&memcg->memsw,
@@ -4093,6 +4113,46 @@ out:
4093 return retval; 4113 return retval;
4094} 4114}
4095 4115
4116static unsigned long mem_cgroup_recursive_stat(struct mem_cgroup *memcg,
4117 enum mem_cgroup_stat_index idx)
4118{
4119 struct mem_cgroup *iter;
4120 long val = 0;
4121
4122 /* Per-cpu values can be negative, use a signed accumulator */
4123 for_each_mem_cgroup_tree(iter, memcg)
4124 val += mem_cgroup_read_stat(iter, idx);
4125
4126 if (val < 0) /* race ? */
4127 val = 0;
4128 return val;
4129}
4130
4131static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
4132{
4133 u64 val;
4134
4135 if (!mem_cgroup_is_root(memcg)) {
4136 if (!swap)
4137 return res_counter_read_u64(&memcg->res, RES_USAGE);
4138 else
4139 return res_counter_read_u64(&memcg->memsw, RES_USAGE);
4140 }
4141
4142 /*
4143 * Transparent hugepages are still accounted for in MEM_CGROUP_STAT_RSS
4144 * as well as in MEM_CGROUP_STAT_RSS_HUGE.
4145 */
4146 val = mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_CACHE);
4147 val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_RSS);
4148
4149 if (swap)
4150 val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_SWAP);
4151
4152 return val << PAGE_SHIFT;
4153}
4154
4155
4096static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css, 4156static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
4097 struct cftype *cft) 4157 struct cftype *cft)
4098{ 4158{
@@ -4102,8 +4162,12 @@ static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
4102 4162
4103 switch (type) { 4163 switch (type) {
4104 case _MEM: 4164 case _MEM:
4165 if (name == RES_USAGE)
4166 return mem_cgroup_usage(memcg, false);
4105 return res_counter_read_u64(&memcg->res, name); 4167 return res_counter_read_u64(&memcg->res, name);
4106 case _MEMSWAP: 4168 case _MEMSWAP:
4169 if (name == RES_USAGE)
4170 return mem_cgroup_usage(memcg, true);
4107 return res_counter_read_u64(&memcg->memsw, name); 4171 return res_counter_read_u64(&memcg->memsw, name);
4108 case _KMEM: 4172 case _KMEM:
4109 return res_counter_read_u64(&memcg->kmem, name); 4173 return res_counter_read_u64(&memcg->kmem, name);
@@ -4572,10 +4636,7 @@ static void __mem_cgroup_threshold(struct mem_cgroup *memcg, bool swap)
4572 if (!t) 4636 if (!t)
4573 goto unlock; 4637 goto unlock;
4574 4638
4575 if (!swap) 4639 usage = mem_cgroup_usage(memcg, swap);
4576 usage = res_counter_read_u64(&memcg->res, RES_USAGE);
4577 else
4578 usage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
4579 4640
4580 /* 4641 /*
4581 * current_threshold points to threshold just below or equal to usage. 4642 * current_threshold points to threshold just below or equal to usage.
@@ -4673,10 +4734,10 @@ static int __mem_cgroup_usage_register_event(struct mem_cgroup *memcg,
4673 4734
4674 if (type == _MEM) { 4735 if (type == _MEM) {
4675 thresholds = &memcg->thresholds; 4736 thresholds = &memcg->thresholds;
4676 usage = res_counter_read_u64(&memcg->res, RES_USAGE); 4737 usage = mem_cgroup_usage(memcg, false);
4677 } else if (type == _MEMSWAP) { 4738 } else if (type == _MEMSWAP) {
4678 thresholds = &memcg->memsw_thresholds; 4739 thresholds = &memcg->memsw_thresholds;
4679 usage = res_counter_read_u64(&memcg->memsw, RES_USAGE); 4740 usage = mem_cgroup_usage(memcg, true);
4680 } else 4741 } else
4681 BUG(); 4742 BUG();
4682 4743
@@ -4762,10 +4823,10 @@ static void __mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg,
4762 4823
4763 if (type == _MEM) { 4824 if (type == _MEM) {
4764 thresholds = &memcg->thresholds; 4825 thresholds = &memcg->thresholds;
4765 usage = res_counter_read_u64(&memcg->res, RES_USAGE); 4826 usage = mem_cgroup_usage(memcg, false);
4766 } else if (type == _MEMSWAP) { 4827 } else if (type == _MEMSWAP) {
4767 thresholds = &memcg->memsw_thresholds; 4828 thresholds = &memcg->memsw_thresholds;
4768 usage = res_counter_read_u64(&memcg->memsw, RES_USAGE); 4829 usage = mem_cgroup_usage(memcg, true);
4769 } else 4830 } else
4770 BUG(); 4831 BUG();
4771 4832
@@ -5502,6 +5563,7 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
5502{ 5563{
5503 struct mem_cgroup *memcg = mem_cgroup_from_css(css); 5564 struct mem_cgroup *memcg = mem_cgroup_from_css(css);
5504 struct mem_cgroup *parent = mem_cgroup_from_css(css->parent); 5565 struct mem_cgroup *parent = mem_cgroup_from_css(css->parent);
5566 int ret;
5505 5567
5506 if (css->id > MEM_CGROUP_ID_MAX) 5568 if (css->id > MEM_CGROUP_ID_MAX)
5507 return -ENOSPC; 5569 return -ENOSPC;
@@ -5525,9 +5587,9 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
5525 * core guarantees its existence. 5587 * core guarantees its existence.
5526 */ 5588 */
5527 } else { 5589 } else {
5528 res_counter_init(&memcg->res, &root_mem_cgroup->res); 5590 res_counter_init(&memcg->res, NULL);
5529 res_counter_init(&memcg->memsw, &root_mem_cgroup->memsw); 5591 res_counter_init(&memcg->memsw, NULL);
5530 res_counter_init(&memcg->kmem, &root_mem_cgroup->kmem); 5592 res_counter_init(&memcg->kmem, NULL);
5531 /* 5593 /*
5532 * Deeper hierachy with use_hierarchy == false doesn't make 5594 * Deeper hierachy with use_hierarchy == false doesn't make
5533 * much sense so let cgroup subsystem know about this 5595 * much sense so let cgroup subsystem know about this
@@ -5538,7 +5600,18 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
5538 } 5600 }
5539 mutex_unlock(&memcg_create_mutex); 5601 mutex_unlock(&memcg_create_mutex);
5540 5602
5541 return memcg_init_kmem(memcg, &memory_cgrp_subsys); 5603 ret = memcg_init_kmem(memcg, &memory_cgrp_subsys);
5604 if (ret)
5605 return ret;
5606
5607 /*
5608 * Make sure the memcg is initialized: mem_cgroup_iter()
5609 * orders reading memcg->initialized against its callers
5610 * reading the memcg members.
5611 */
5612 smp_store_release(&memcg->initialized, 1);
5613
5614 return 0;
5542} 5615}
5543 5616
5544/* 5617/*
@@ -5969,8 +6042,9 @@ static void __mem_cgroup_clear_mc(void)
5969 /* we must fixup refcnts and charges */ 6042 /* we must fixup refcnts and charges */
5970 if (mc.moved_swap) { 6043 if (mc.moved_swap) {
5971 /* uncharge swap account from the old cgroup */ 6044 /* uncharge swap account from the old cgroup */
5972 res_counter_uncharge(&mc.from->memsw, 6045 if (!mem_cgroup_is_root(mc.from))
5973 PAGE_SIZE * mc.moved_swap); 6046 res_counter_uncharge(&mc.from->memsw,
6047 PAGE_SIZE * mc.moved_swap);
5974 6048
5975 for (i = 0; i < mc.moved_swap; i++) 6049 for (i = 0; i < mc.moved_swap; i++)
5976 css_put(&mc.from->css); 6050 css_put(&mc.from->css);
@@ -5979,8 +6053,9 @@ static void __mem_cgroup_clear_mc(void)
5979 * we charged both to->res and to->memsw, so we should 6053 * we charged both to->res and to->memsw, so we should
5980 * uncharge to->res. 6054 * uncharge to->res.
5981 */ 6055 */
5982 res_counter_uncharge(&mc.to->res, 6056 if (!mem_cgroup_is_root(mc.to))
5983 PAGE_SIZE * mc.moved_swap); 6057 res_counter_uncharge(&mc.to->res,
6058 PAGE_SIZE * mc.moved_swap);
5984 /* we've already done css_get(mc.to) */ 6059 /* we've already done css_get(mc.to) */
5985 mc.moved_swap = 0; 6060 mc.moved_swap = 0;
5986 } 6061 }
@@ -6345,7 +6420,8 @@ void mem_cgroup_uncharge_swap(swp_entry_t entry)
6345 rcu_read_lock(); 6420 rcu_read_lock();
6346 memcg = mem_cgroup_lookup(id); 6421 memcg = mem_cgroup_lookup(id);
6347 if (memcg) { 6422 if (memcg) {
6348 res_counter_uncharge(&memcg->memsw, PAGE_SIZE); 6423 if (!mem_cgroup_is_root(memcg))
6424 res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
6349 mem_cgroup_swap_statistics(memcg, false); 6425 mem_cgroup_swap_statistics(memcg, false);
6350 css_put(&memcg->css); 6426 css_put(&memcg->css);
6351 } 6427 }
@@ -6509,12 +6585,15 @@ static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
6509{ 6585{
6510 unsigned long flags; 6586 unsigned long flags;
6511 6587
6512 if (nr_mem) 6588 if (!mem_cgroup_is_root(memcg)) {
6513 res_counter_uncharge(&memcg->res, nr_mem * PAGE_SIZE); 6589 if (nr_mem)
6514 if (nr_memsw) 6590 res_counter_uncharge(&memcg->res,
6515 res_counter_uncharge(&memcg->memsw, nr_memsw * PAGE_SIZE); 6591 nr_mem * PAGE_SIZE);
6516 6592 if (nr_memsw)
6517 memcg_oom_recover(memcg); 6593 res_counter_uncharge(&memcg->memsw,
6594 nr_memsw * PAGE_SIZE);
6595 memcg_oom_recover(memcg);
6596 }
6518 6597
6519 local_irq_save(flags); 6598 local_irq_save(flags);
6520 __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS], nr_anon); 6599 __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS], nr_anon);