diff options
author | Johannes Weiner <hannes@cmpxchg.org> | 2014-08-06 19:05:59 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-08-06 21:01:17 -0400 |
commit | 05b8430123359886ef6a4146fba384e30d771b3f (patch) | |
tree | 6d83d5c64448f2cbb8fe1df84b7d423dd24f823a | |
parent | 692e7c45d95ad1064b6911800e2cfec7fc0236db (diff) |
mm: memcontrol: use root_mem_cgroup res_counter
Due to an old optimization to keep expensive res_counter changes at a
minimum, the root_mem_cgroup res_counter is never charged; there is no
limit at that level anyway, and any statistics can be generated on
demand by summing up the counters of all other cgroups.
However, with per-cpu charge caches, res_counter operations do not even
show up in profiles anymore, so this optimization is no longer
necessary.
Remove it to simplify the code.
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Vladimir Davydov <vdavydov@parallels.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | mm/memcontrol.c | 152 |
1 files changed, 44 insertions, 108 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index e0ac636315f8..07908ea954b6 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -2570,9 +2570,8 @@ static int mem_cgroup_try_charge(struct mem_cgroup *memcg, | |||
2570 | unsigned long nr_reclaimed; | 2570 | unsigned long nr_reclaimed; |
2571 | unsigned long flags = 0; | 2571 | unsigned long flags = 0; |
2572 | unsigned long long size; | 2572 | unsigned long long size; |
2573 | int ret = 0; | ||
2573 | 2574 | ||
2574 | if (mem_cgroup_is_root(memcg)) | ||
2575 | goto done; | ||
2576 | retry: | 2575 | retry: |
2577 | if (consume_stock(memcg, nr_pages)) | 2576 | if (consume_stock(memcg, nr_pages)) |
2578 | goto done; | 2577 | goto done; |
@@ -2650,13 +2649,15 @@ nomem: | |||
2650 | if (!(gfp_mask & __GFP_NOFAIL)) | 2649 | if (!(gfp_mask & __GFP_NOFAIL)) |
2651 | return -ENOMEM; | 2650 | return -ENOMEM; |
2652 | bypass: | 2651 | bypass: |
2653 | return -EINTR; | 2652 | memcg = root_mem_cgroup; |
2653 | ret = -EINTR; | ||
2654 | goto retry; | ||
2654 | 2655 | ||
2655 | done_restock: | 2656 | done_restock: |
2656 | if (batch > nr_pages) | 2657 | if (batch > nr_pages) |
2657 | refill_stock(memcg, batch - nr_pages); | 2658 | refill_stock(memcg, batch - nr_pages); |
2658 | done: | 2659 | done: |
2659 | return 0; | 2660 | return ret; |
2660 | } | 2661 | } |
2661 | 2662 | ||
2662 | /** | 2663 | /** |
@@ -2695,13 +2696,11 @@ static struct mem_cgroup *mem_cgroup_try_charge_mm(struct mm_struct *mm, | |||
2695 | static void __mem_cgroup_cancel_charge(struct mem_cgroup *memcg, | 2696 | static void __mem_cgroup_cancel_charge(struct mem_cgroup *memcg, |
2696 | unsigned int nr_pages) | 2697 | unsigned int nr_pages) |
2697 | { | 2698 | { |
2698 | if (!mem_cgroup_is_root(memcg)) { | 2699 | unsigned long bytes = nr_pages * PAGE_SIZE; |
2699 | unsigned long bytes = nr_pages * PAGE_SIZE; | ||
2700 | 2700 | ||
2701 | res_counter_uncharge(&memcg->res, bytes); | 2701 | res_counter_uncharge(&memcg->res, bytes); |
2702 | if (do_swap_account) | 2702 | if (do_swap_account) |
2703 | res_counter_uncharge(&memcg->memsw, bytes); | 2703 | res_counter_uncharge(&memcg->memsw, bytes); |
2704 | } | ||
2705 | } | 2704 | } |
2706 | 2705 | ||
2707 | /* | 2706 | /* |
@@ -2713,9 +2712,6 @@ static void __mem_cgroup_cancel_local_charge(struct mem_cgroup *memcg, | |||
2713 | { | 2712 | { |
2714 | unsigned long bytes = nr_pages * PAGE_SIZE; | 2713 | unsigned long bytes = nr_pages * PAGE_SIZE; |
2715 | 2714 | ||
2716 | if (mem_cgroup_is_root(memcg)) | ||
2717 | return; | ||
2718 | |||
2719 | res_counter_uncharge_until(&memcg->res, memcg->res.parent, bytes); | 2715 | res_counter_uncharge_until(&memcg->res, memcg->res.parent, bytes); |
2720 | if (do_swap_account) | 2716 | if (do_swap_account) |
2721 | res_counter_uncharge_until(&memcg->memsw, | 2717 | res_counter_uncharge_until(&memcg->memsw, |
@@ -3943,7 +3939,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype, | |||
3943 | * replacement page, so leave it alone when phasing out the | 3939 | * replacement page, so leave it alone when phasing out the |
3944 | * page that is unused after the migration. | 3940 | * page that is unused after the migration. |
3945 | */ | 3941 | */ |
3946 | if (!end_migration && !mem_cgroup_is_root(memcg)) | 3942 | if (!end_migration) |
3947 | mem_cgroup_do_uncharge(memcg, nr_pages, ctype); | 3943 | mem_cgroup_do_uncharge(memcg, nr_pages, ctype); |
3948 | 3944 | ||
3949 | return memcg; | 3945 | return memcg; |
@@ -4076,8 +4072,7 @@ void mem_cgroup_uncharge_swap(swp_entry_t ent) | |||
4076 | * We uncharge this because swap is freed. This memcg can | 4072 | * We uncharge this because swap is freed. This memcg can |
4077 | * be obsolete one. We avoid calling css_tryget_online(). | 4073 | * be obsolete one. We avoid calling css_tryget_online(). |
4078 | */ | 4074 | */ |
4079 | if (!mem_cgroup_is_root(memcg)) | 4075 | res_counter_uncharge(&memcg->memsw, PAGE_SIZE); |
4080 | res_counter_uncharge(&memcg->memsw, PAGE_SIZE); | ||
4081 | mem_cgroup_swap_statistics(memcg, false); | 4076 | mem_cgroup_swap_statistics(memcg, false); |
4082 | css_put(&memcg->css); | 4077 | css_put(&memcg->css); |
4083 | } | 4078 | } |
@@ -4767,78 +4762,24 @@ out: | |||
4767 | return retval; | 4762 | return retval; |
4768 | } | 4763 | } |
4769 | 4764 | ||
4770 | |||
4771 | static unsigned long mem_cgroup_recursive_stat(struct mem_cgroup *memcg, | ||
4772 | enum mem_cgroup_stat_index idx) | ||
4773 | { | ||
4774 | struct mem_cgroup *iter; | ||
4775 | long val = 0; | ||
4776 | |||
4777 | /* Per-cpu values can be negative, use a signed accumulator */ | ||
4778 | for_each_mem_cgroup_tree(iter, memcg) | ||
4779 | val += mem_cgroup_read_stat(iter, idx); | ||
4780 | |||
4781 | if (val < 0) /* race ? */ | ||
4782 | val = 0; | ||
4783 | return val; | ||
4784 | } | ||
4785 | |||
4786 | static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap) | ||
4787 | { | ||
4788 | u64 val; | ||
4789 | |||
4790 | if (!mem_cgroup_is_root(memcg)) { | ||
4791 | if (!swap) | ||
4792 | return res_counter_read_u64(&memcg->res, RES_USAGE); | ||
4793 | else | ||
4794 | return res_counter_read_u64(&memcg->memsw, RES_USAGE); | ||
4795 | } | ||
4796 | |||
4797 | /* | ||
4798 | * Transparent hugepages are still accounted for in MEM_CGROUP_STAT_RSS | ||
4799 | * as well as in MEM_CGROUP_STAT_RSS_HUGE. | ||
4800 | */ | ||
4801 | val = mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_CACHE); | ||
4802 | val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_RSS); | ||
4803 | |||
4804 | if (swap) | ||
4805 | val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_SWAP); | ||
4806 | |||
4807 | return val << PAGE_SHIFT; | ||
4808 | } | ||
4809 | |||
4810 | static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css, | 4765 | static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css, |
4811 | struct cftype *cft) | 4766 | struct cftype *cft) |
4812 | { | 4767 | { |
4813 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | 4768 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
4814 | u64 val; | 4769 | enum res_type type = MEMFILE_TYPE(cft->private); |
4815 | int name; | 4770 | int name = MEMFILE_ATTR(cft->private); |
4816 | enum res_type type; | ||
4817 | |||
4818 | type = MEMFILE_TYPE(cft->private); | ||
4819 | name = MEMFILE_ATTR(cft->private); | ||
4820 | 4771 | ||
4821 | switch (type) { | 4772 | switch (type) { |
4822 | case _MEM: | 4773 | case _MEM: |
4823 | if (name == RES_USAGE) | 4774 | return res_counter_read_u64(&memcg->res, name); |
4824 | val = mem_cgroup_usage(memcg, false); | ||
4825 | else | ||
4826 | val = res_counter_read_u64(&memcg->res, name); | ||
4827 | break; | ||
4828 | case _MEMSWAP: | 4775 | case _MEMSWAP: |
4829 | if (name == RES_USAGE) | 4776 | return res_counter_read_u64(&memcg->memsw, name); |
4830 | val = mem_cgroup_usage(memcg, true); | ||
4831 | else | ||
4832 | val = res_counter_read_u64(&memcg->memsw, name); | ||
4833 | break; | ||
4834 | case _KMEM: | 4777 | case _KMEM: |
4835 | val = res_counter_read_u64(&memcg->kmem, name); | 4778 | return res_counter_read_u64(&memcg->kmem, name); |
4836 | break; | 4779 | break; |
4837 | default: | 4780 | default: |
4838 | BUG(); | 4781 | BUG(); |
4839 | } | 4782 | } |
4840 | |||
4841 | return val; | ||
4842 | } | 4783 | } |
4843 | 4784 | ||
4844 | #ifdef CONFIG_MEMCG_KMEM | 4785 | #ifdef CONFIG_MEMCG_KMEM |
@@ -5300,7 +5241,10 @@ static void __mem_cgroup_threshold(struct mem_cgroup *memcg, bool swap) | |||
5300 | if (!t) | 5241 | if (!t) |
5301 | goto unlock; | 5242 | goto unlock; |
5302 | 5243 | ||
5303 | usage = mem_cgroup_usage(memcg, swap); | 5244 | if (!swap) |
5245 | usage = res_counter_read_u64(&memcg->res, RES_USAGE); | ||
5246 | else | ||
5247 | usage = res_counter_read_u64(&memcg->memsw, RES_USAGE); | ||
5304 | 5248 | ||
5305 | /* | 5249 | /* |
5306 | * current_threshold points to threshold just below or equal to usage. | 5250 | * current_threshold points to threshold just below or equal to usage. |
@@ -5396,15 +5340,15 @@ static int __mem_cgroup_usage_register_event(struct mem_cgroup *memcg, | |||
5396 | 5340 | ||
5397 | mutex_lock(&memcg->thresholds_lock); | 5341 | mutex_lock(&memcg->thresholds_lock); |
5398 | 5342 | ||
5399 | if (type == _MEM) | 5343 | if (type == _MEM) { |
5400 | thresholds = &memcg->thresholds; | 5344 | thresholds = &memcg->thresholds; |
5401 | else if (type == _MEMSWAP) | 5345 | usage = res_counter_read_u64(&memcg->res, RES_USAGE); |
5346 | } else if (type == _MEMSWAP) { | ||
5402 | thresholds = &memcg->memsw_thresholds; | 5347 | thresholds = &memcg->memsw_thresholds; |
5403 | else | 5348 | usage = res_counter_read_u64(&memcg->memsw, RES_USAGE); |
5349 | } else | ||
5404 | BUG(); | 5350 | BUG(); |
5405 | 5351 | ||
5406 | usage = mem_cgroup_usage(memcg, type == _MEMSWAP); | ||
5407 | |||
5408 | /* Check if a threshold crossed before adding a new one */ | 5352 | /* Check if a threshold crossed before adding a new one */ |
5409 | if (thresholds->primary) | 5353 | if (thresholds->primary) |
5410 | __mem_cgroup_threshold(memcg, type == _MEMSWAP); | 5354 | __mem_cgroup_threshold(memcg, type == _MEMSWAP); |
@@ -5484,18 +5428,19 @@ static void __mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg, | |||
5484 | int i, j, size; | 5428 | int i, j, size; |
5485 | 5429 | ||
5486 | mutex_lock(&memcg->thresholds_lock); | 5430 | mutex_lock(&memcg->thresholds_lock); |
5487 | if (type == _MEM) | 5431 | |
5432 | if (type == _MEM) { | ||
5488 | thresholds = &memcg->thresholds; | 5433 | thresholds = &memcg->thresholds; |
5489 | else if (type == _MEMSWAP) | 5434 | usage = res_counter_read_u64(&memcg->res, RES_USAGE); |
5435 | } else if (type == _MEMSWAP) { | ||
5490 | thresholds = &memcg->memsw_thresholds; | 5436 | thresholds = &memcg->memsw_thresholds; |
5491 | else | 5437 | usage = res_counter_read_u64(&memcg->memsw, RES_USAGE); |
5438 | } else | ||
5492 | BUG(); | 5439 | BUG(); |
5493 | 5440 | ||
5494 | if (!thresholds->primary) | 5441 | if (!thresholds->primary) |
5495 | goto unlock; | 5442 | goto unlock; |
5496 | 5443 | ||
5497 | usage = mem_cgroup_usage(memcg, type == _MEMSWAP); | ||
5498 | |||
5499 | /* Check if a threshold crossed before removing */ | 5444 | /* Check if a threshold crossed before removing */ |
5500 | __mem_cgroup_threshold(memcg, type == _MEMSWAP); | 5445 | __mem_cgroup_threshold(memcg, type == _MEMSWAP); |
5501 | 5446 | ||
@@ -6249,9 +6194,9 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css) | |||
6249 | * core guarantees its existence. | 6194 | * core guarantees its existence. |
6250 | */ | 6195 | */ |
6251 | } else { | 6196 | } else { |
6252 | res_counter_init(&memcg->res, NULL); | 6197 | res_counter_init(&memcg->res, &root_mem_cgroup->res); |
6253 | res_counter_init(&memcg->memsw, NULL); | 6198 | res_counter_init(&memcg->memsw, &root_mem_cgroup->memsw); |
6254 | res_counter_init(&memcg->kmem, NULL); | 6199 | res_counter_init(&memcg->kmem, &root_mem_cgroup->kmem); |
6255 | /* | 6200 | /* |
6256 | * Deeper hierachy with use_hierarchy == false doesn't make | 6201 | * Deeper hierachy with use_hierarchy == false doesn't make |
6257 | * much sense so let cgroup subsystem know about this | 6202 | * much sense so let cgroup subsystem know about this |
@@ -6387,13 +6332,7 @@ static void mem_cgroup_css_reset(struct cgroup_subsys_state *css) | |||
6387 | /* Handlers for move charge at task migration. */ | 6332 | /* Handlers for move charge at task migration. */ |
6388 | static int mem_cgroup_do_precharge(unsigned long count) | 6333 | static int mem_cgroup_do_precharge(unsigned long count) |
6389 | { | 6334 | { |
6390 | int ret = 0; | 6335 | int ret; |
6391 | |||
6392 | if (mem_cgroup_is_root(mc.to)) { | ||
6393 | mc.precharge += count; | ||
6394 | /* we don't need css_get for root */ | ||
6395 | return ret; | ||
6396 | } | ||
6397 | 6336 | ||
6398 | /* Try a single bulk charge without reclaim first */ | 6337 | /* Try a single bulk charge without reclaim first */ |
6399 | ret = mem_cgroup_try_charge(mc.to, GFP_KERNEL & ~__GFP_WAIT, count); | 6338 | ret = mem_cgroup_try_charge(mc.to, GFP_KERNEL & ~__GFP_WAIT, count); |
@@ -6700,21 +6639,18 @@ static void __mem_cgroup_clear_mc(void) | |||
6700 | /* we must fixup refcnts and charges */ | 6639 | /* we must fixup refcnts and charges */ |
6701 | if (mc.moved_swap) { | 6640 | if (mc.moved_swap) { |
6702 | /* uncharge swap account from the old cgroup */ | 6641 | /* uncharge swap account from the old cgroup */ |
6703 | if (!mem_cgroup_is_root(mc.from)) | 6642 | res_counter_uncharge(&mc.from->memsw, |
6704 | res_counter_uncharge(&mc.from->memsw, | 6643 | PAGE_SIZE * mc.moved_swap); |
6705 | PAGE_SIZE * mc.moved_swap); | ||
6706 | 6644 | ||
6707 | for (i = 0; i < mc.moved_swap; i++) | 6645 | for (i = 0; i < mc.moved_swap; i++) |
6708 | css_put(&mc.from->css); | 6646 | css_put(&mc.from->css); |
6709 | 6647 | ||
6710 | if (!mem_cgroup_is_root(mc.to)) { | 6648 | /* |
6711 | /* | 6649 | * we charged both to->res and to->memsw, so we should |
6712 | * we charged both to->res and to->memsw, so we should | 6650 | * uncharge to->res. |
6713 | * uncharge to->res. | 6651 | */ |
6714 | */ | 6652 | res_counter_uncharge(&mc.to->res, |
6715 | res_counter_uncharge(&mc.to->res, | 6653 | PAGE_SIZE * mc.moved_swap); |
6716 | PAGE_SIZE * mc.moved_swap); | ||
6717 | } | ||
6718 | /* we've already done css_get(mc.to) */ | 6654 | /* we've already done css_get(mc.to) */ |
6719 | mc.moved_swap = 0; | 6655 | mc.moved_swap = 0; |
6720 | } | 6656 | } |