diff options
| -rw-r--r-- | mm/memcontrol.c | 103 |
1 files changed, 78 insertions, 25 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index ec4dcf1b9562..085dc6d2f876 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
| @@ -2534,6 +2534,8 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask, | |||
| 2534 | unsigned long long size; | 2534 | unsigned long long size; |
| 2535 | int ret = 0; | 2535 | int ret = 0; |
| 2536 | 2536 | ||
| 2537 | if (mem_cgroup_is_root(memcg)) | ||
| 2538 | goto done; | ||
| 2537 | retry: | 2539 | retry: |
| 2538 | if (consume_stock(memcg, nr_pages)) | 2540 | if (consume_stock(memcg, nr_pages)) |
| 2539 | goto done; | 2541 | goto done; |
| @@ -2611,9 +2613,7 @@ nomem: | |||
| 2611 | if (!(gfp_mask & __GFP_NOFAIL)) | 2613 | if (!(gfp_mask & __GFP_NOFAIL)) |
| 2612 | return -ENOMEM; | 2614 | return -ENOMEM; |
| 2613 | bypass: | 2615 | bypass: |
| 2614 | memcg = root_mem_cgroup; | 2616 | return -EINTR; |
| 2615 | ret = -EINTR; | ||
| 2616 | goto retry; | ||
| 2617 | 2617 | ||
| 2618 | done_restock: | 2618 | done_restock: |
| 2619 | if (batch > nr_pages) | 2619 | if (batch > nr_pages) |
| @@ -2626,6 +2626,9 @@ static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages) | |||
| 2626 | { | 2626 | { |
| 2627 | unsigned long bytes = nr_pages * PAGE_SIZE; | 2627 | unsigned long bytes = nr_pages * PAGE_SIZE; |
| 2628 | 2628 | ||
| 2629 | if (mem_cgroup_is_root(memcg)) | ||
| 2630 | return; | ||
| 2631 | |||
| 2629 | res_counter_uncharge(&memcg->res, bytes); | 2632 | res_counter_uncharge(&memcg->res, bytes); |
| 2630 | if (do_swap_account) | 2633 | if (do_swap_account) |
| 2631 | res_counter_uncharge(&memcg->memsw, bytes); | 2634 | res_counter_uncharge(&memcg->memsw, bytes); |
| @@ -2640,6 +2643,9 @@ static void __mem_cgroup_cancel_local_charge(struct mem_cgroup *memcg, | |||
| 2640 | { | 2643 | { |
| 2641 | unsigned long bytes = nr_pages * PAGE_SIZE; | 2644 | unsigned long bytes = nr_pages * PAGE_SIZE; |
| 2642 | 2645 | ||
| 2646 | if (mem_cgroup_is_root(memcg)) | ||
| 2647 | return; | ||
| 2648 | |||
| 2643 | res_counter_uncharge_until(&memcg->res, memcg->res.parent, bytes); | 2649 | res_counter_uncharge_until(&memcg->res, memcg->res.parent, bytes); |
| 2644 | if (do_swap_account) | 2650 | if (do_swap_account) |
| 2645 | res_counter_uncharge_until(&memcg->memsw, | 2651 | res_counter_uncharge_until(&memcg->memsw, |
| @@ -4093,6 +4099,46 @@ out: | |||
| 4093 | return retval; | 4099 | return retval; |
| 4094 | } | 4100 | } |
| 4095 | 4101 | ||
| 4102 | static unsigned long mem_cgroup_recursive_stat(struct mem_cgroup *memcg, | ||
| 4103 | enum mem_cgroup_stat_index idx) | ||
| 4104 | { | ||
| 4105 | struct mem_cgroup *iter; | ||
| 4106 | long val = 0; | ||
| 4107 | |||
| 4108 | /* Per-cpu values can be negative, use a signed accumulator */ | ||
| 4109 | for_each_mem_cgroup_tree(iter, memcg) | ||
| 4110 | val += mem_cgroup_read_stat(iter, idx); | ||
| 4111 | |||
| 4112 | if (val < 0) /* race ? */ | ||
| 4113 | val = 0; | ||
| 4114 | return val; | ||
| 4115 | } | ||
| 4116 | |||
| 4117 | static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap) | ||
| 4118 | { | ||
| 4119 | u64 val; | ||
| 4120 | |||
| 4121 | if (!mem_cgroup_is_root(memcg)) { | ||
| 4122 | if (!swap) | ||
| 4123 | return res_counter_read_u64(&memcg->res, RES_USAGE); | ||
| 4124 | else | ||
| 4125 | return res_counter_read_u64(&memcg->memsw, RES_USAGE); | ||
| 4126 | } | ||
| 4127 | |||
| 4128 | /* | ||
| 4129 | * Transparent hugepages are still accounted for in MEM_CGROUP_STAT_RSS | ||
| 4130 | * as well as in MEM_CGROUP_STAT_RSS_HUGE. | ||
| 4131 | */ | ||
| 4132 | val = mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_CACHE); | ||
| 4133 | val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_RSS); | ||
| 4134 | |||
| 4135 | if (swap) | ||
| 4136 | val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_SWAP); | ||
| 4137 | |||
| 4138 | return val << PAGE_SHIFT; | ||
| 4139 | } | ||
| 4140 | |||
| 4141 | |||
| 4096 | static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css, | 4142 | static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css, |
| 4097 | struct cftype *cft) | 4143 | struct cftype *cft) |
| 4098 | { | 4144 | { |
| @@ -4102,8 +4148,12 @@ static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css, | |||
| 4102 | 4148 | ||
| 4103 | switch (type) { | 4149 | switch (type) { |
| 4104 | case _MEM: | 4150 | case _MEM: |
| 4151 | if (name == RES_USAGE) | ||
| 4152 | return mem_cgroup_usage(memcg, false); | ||
| 4105 | return res_counter_read_u64(&memcg->res, name); | 4153 | return res_counter_read_u64(&memcg->res, name); |
| 4106 | case _MEMSWAP: | 4154 | case _MEMSWAP: |
| 4155 | if (name == RES_USAGE) | ||
| 4156 | return mem_cgroup_usage(memcg, true); | ||
| 4107 | return res_counter_read_u64(&memcg->memsw, name); | 4157 | return res_counter_read_u64(&memcg->memsw, name); |
| 4108 | case _KMEM: | 4158 | case _KMEM: |
| 4109 | return res_counter_read_u64(&memcg->kmem, name); | 4159 | return res_counter_read_u64(&memcg->kmem, name); |
| @@ -4572,10 +4622,7 @@ static void __mem_cgroup_threshold(struct mem_cgroup *memcg, bool swap) | |||
| 4572 | if (!t) | 4622 | if (!t) |
| 4573 | goto unlock; | 4623 | goto unlock; |
| 4574 | 4624 | ||
| 4575 | if (!swap) | 4625 | usage = mem_cgroup_usage(memcg, swap); |
| 4576 | usage = res_counter_read_u64(&memcg->res, RES_USAGE); | ||
| 4577 | else | ||
| 4578 | usage = res_counter_read_u64(&memcg->memsw, RES_USAGE); | ||
| 4579 | 4626 | ||
| 4580 | /* | 4627 | /* |
| 4581 | * current_threshold points to threshold just below or equal to usage. | 4628 | * current_threshold points to threshold just below or equal to usage. |
| @@ -4673,10 +4720,10 @@ static int __mem_cgroup_usage_register_event(struct mem_cgroup *memcg, | |||
| 4673 | 4720 | ||
| 4674 | if (type == _MEM) { | 4721 | if (type == _MEM) { |
| 4675 | thresholds = &memcg->thresholds; | 4722 | thresholds = &memcg->thresholds; |
| 4676 | usage = res_counter_read_u64(&memcg->res, RES_USAGE); | 4723 | usage = mem_cgroup_usage(memcg, false); |
| 4677 | } else if (type == _MEMSWAP) { | 4724 | } else if (type == _MEMSWAP) { |
| 4678 | thresholds = &memcg->memsw_thresholds; | 4725 | thresholds = &memcg->memsw_thresholds; |
| 4679 | usage = res_counter_read_u64(&memcg->memsw, RES_USAGE); | 4726 | usage = mem_cgroup_usage(memcg, true); |
| 4680 | } else | 4727 | } else |
| 4681 | BUG(); | 4728 | BUG(); |
| 4682 | 4729 | ||
| @@ -4762,10 +4809,10 @@ static void __mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg, | |||
| 4762 | 4809 | ||
| 4763 | if (type == _MEM) { | 4810 | if (type == _MEM) { |
| 4764 | thresholds = &memcg->thresholds; | 4811 | thresholds = &memcg->thresholds; |
| 4765 | usage = res_counter_read_u64(&memcg->res, RES_USAGE); | 4812 | usage = mem_cgroup_usage(memcg, false); |
| 4766 | } else if (type == _MEMSWAP) { | 4813 | } else if (type == _MEMSWAP) { |
| 4767 | thresholds = &memcg->memsw_thresholds; | 4814 | thresholds = &memcg->memsw_thresholds; |
| 4768 | usage = res_counter_read_u64(&memcg->memsw, RES_USAGE); | 4815 | usage = mem_cgroup_usage(memcg, true); |
| 4769 | } else | 4816 | } else |
| 4770 | BUG(); | 4817 | BUG(); |
| 4771 | 4818 | ||
| @@ -5525,9 +5572,9 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css) | |||
| 5525 | * core guarantees its existence. | 5572 | * core guarantees its existence. |
| 5526 | */ | 5573 | */ |
| 5527 | } else { | 5574 | } else { |
| 5528 | res_counter_init(&memcg->res, &root_mem_cgroup->res); | 5575 | res_counter_init(&memcg->res, NULL); |
| 5529 | res_counter_init(&memcg->memsw, &root_mem_cgroup->memsw); | 5576 | res_counter_init(&memcg->memsw, NULL); |
| 5530 | res_counter_init(&memcg->kmem, &root_mem_cgroup->kmem); | 5577 | res_counter_init(&memcg->kmem, NULL); |
| 5531 | /* | 5578 | /* |
| 5532 | * Deeper hierachy with use_hierarchy == false doesn't make | 5579 | * Deeper hierachy with use_hierarchy == false doesn't make |
| 5533 | * much sense so let cgroup subsystem know about this | 5580 | * much sense so let cgroup subsystem know about this |
| @@ -5969,8 +6016,9 @@ static void __mem_cgroup_clear_mc(void) | |||
| 5969 | /* we must fixup refcnts and charges */ | 6016 | /* we must fixup refcnts and charges */ |
| 5970 | if (mc.moved_swap) { | 6017 | if (mc.moved_swap) { |
| 5971 | /* uncharge swap account from the old cgroup */ | 6018 | /* uncharge swap account from the old cgroup */ |
| 5972 | res_counter_uncharge(&mc.from->memsw, | 6019 | if (!mem_cgroup_is_root(mc.from)) |
| 5973 | PAGE_SIZE * mc.moved_swap); | 6020 | res_counter_uncharge(&mc.from->memsw, |
| 6021 | PAGE_SIZE * mc.moved_swap); | ||
| 5974 | 6022 | ||
| 5975 | for (i = 0; i < mc.moved_swap; i++) | 6023 | for (i = 0; i < mc.moved_swap; i++) |
| 5976 | css_put(&mc.from->css); | 6024 | css_put(&mc.from->css); |
| @@ -5979,8 +6027,9 @@ static void __mem_cgroup_clear_mc(void) | |||
| 5979 | * we charged both to->res and to->memsw, so we should | 6027 | * we charged both to->res and to->memsw, so we should |
| 5980 | * uncharge to->res. | 6028 | * uncharge to->res. |
| 5981 | */ | 6029 | */ |
| 5982 | res_counter_uncharge(&mc.to->res, | 6030 | if (!mem_cgroup_is_root(mc.to)) |
| 5983 | PAGE_SIZE * mc.moved_swap); | 6031 | res_counter_uncharge(&mc.to->res, |
| 6032 | PAGE_SIZE * mc.moved_swap); | ||
| 5984 | /* we've already done css_get(mc.to) */ | 6033 | /* we've already done css_get(mc.to) */ |
| 5985 | mc.moved_swap = 0; | 6034 | mc.moved_swap = 0; |
| 5986 | } | 6035 | } |
| @@ -6345,7 +6394,8 @@ void mem_cgroup_uncharge_swap(swp_entry_t entry) | |||
| 6345 | rcu_read_lock(); | 6394 | rcu_read_lock(); |
| 6346 | memcg = mem_cgroup_lookup(id); | 6395 | memcg = mem_cgroup_lookup(id); |
| 6347 | if (memcg) { | 6396 | if (memcg) { |
| 6348 | res_counter_uncharge(&memcg->memsw, PAGE_SIZE); | 6397 | if (!mem_cgroup_is_root(memcg)) |
| 6398 | res_counter_uncharge(&memcg->memsw, PAGE_SIZE); | ||
| 6349 | mem_cgroup_swap_statistics(memcg, false); | 6399 | mem_cgroup_swap_statistics(memcg, false); |
| 6350 | css_put(&memcg->css); | 6400 | css_put(&memcg->css); |
| 6351 | } | 6401 | } |
| @@ -6509,12 +6559,15 @@ static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout, | |||
| 6509 | { | 6559 | { |
| 6510 | unsigned long flags; | 6560 | unsigned long flags; |
| 6511 | 6561 | ||
| 6512 | if (nr_mem) | 6562 | if (!mem_cgroup_is_root(memcg)) { |
| 6513 | res_counter_uncharge(&memcg->res, nr_mem * PAGE_SIZE); | 6563 | if (nr_mem) |
| 6514 | if (nr_memsw) | 6564 | res_counter_uncharge(&memcg->res, |
| 6515 | res_counter_uncharge(&memcg->memsw, nr_memsw * PAGE_SIZE); | 6565 | nr_mem * PAGE_SIZE); |
| 6516 | 6566 | if (nr_memsw) | |
| 6517 | memcg_oom_recover(memcg); | 6567 | res_counter_uncharge(&memcg->memsw, |
| 6568 | nr_memsw * PAGE_SIZE); | ||
| 6569 | memcg_oom_recover(memcg); | ||
| 6570 | } | ||
| 6518 | 6571 | ||
| 6519 | local_irq_save(flags); | 6572 | local_irq_save(flags); |
| 6520 | __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS], nr_anon); | 6573 | __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS], nr_anon); |
