diff options
author | Li Zefan <lizefan@huawei.com> | 2013-07-08 19:00:33 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-07-09 13:33:24 -0400 |
commit | 10d5ebf40ff09db03b97cb177f24b9c7c8b4bb52 (patch) | |
tree | 659b3431157e052d6ab77c7c6c2951e6ab68d82f /mm/memcontrol.c | |
parent | 20f05310ba62d5816fb339d08effe78683137197 (diff) |
memcg: use css_get/put when charging/uncharging kmem
Use css_get/put instead of mem_cgroup_get/put.
We can't do a simple replacement, because here mem_cgroup_put() is
called during mem_cgroup_css_free(), while mem_cgroup_css_free() won't
be called until css refcnt goes down to 0.
Instead we increment css refcnt in mem_cgroup_css_offline(), and then
check if there's still kmem charges. If not, css refcnt will be
decremented immediately, otherwise the refcnt will be released after the
last kmem allocation is uncahred.
[akpm@linux-foundation.org: tweak comment]
Signed-off-by: Li Zefan <lizefan@huawei.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Reviewed-by: Tejun Heo <tj@kernel.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Glauber Costa <glommer@openvz.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 80 |
1 files changed, 54 insertions, 26 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 80175ded718d..bdc9582585af 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -406,6 +406,11 @@ static void memcg_kmem_clear_activated(struct mem_cgroup *memcg) | |||
406 | 406 | ||
407 | static void memcg_kmem_mark_dead(struct mem_cgroup *memcg) | 407 | static void memcg_kmem_mark_dead(struct mem_cgroup *memcg) |
408 | { | 408 | { |
409 | /* | ||
410 | * Our caller must use css_get() first, because memcg_uncharge_kmem() | ||
411 | * will call css_put() if it sees the memcg is dead. | ||
412 | */ | ||
413 | smp_wmb(); | ||
409 | if (test_bit(KMEM_ACCOUNTED_ACTIVE, &memcg->kmem_account_flags)) | 414 | if (test_bit(KMEM_ACCOUNTED_ACTIVE, &memcg->kmem_account_flags)) |
410 | set_bit(KMEM_ACCOUNTED_DEAD, &memcg->kmem_account_flags); | 415 | set_bit(KMEM_ACCOUNTED_DEAD, &memcg->kmem_account_flags); |
411 | } | 416 | } |
@@ -3050,8 +3055,16 @@ static void memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size) | |||
3050 | if (res_counter_uncharge(&memcg->kmem, size)) | 3055 | if (res_counter_uncharge(&memcg->kmem, size)) |
3051 | return; | 3056 | return; |
3052 | 3057 | ||
3058 | /* | ||
3059 | * Releases a reference taken in kmem_cgroup_css_offline in case | ||
3060 | * this last uncharge is racing with the offlining code or it is | ||
3061 | * outliving the memcg existence. | ||
3062 | * | ||
3063 | * The memory barrier imposed by test&clear is paired with the | ||
3064 | * explicit one in memcg_kmem_mark_dead(). | ||
3065 | */ | ||
3053 | if (memcg_kmem_test_and_clear_dead(memcg)) | 3066 | if (memcg_kmem_test_and_clear_dead(memcg)) |
3054 | mem_cgroup_put(memcg); | 3067 | css_put(&memcg->css); |
3055 | } | 3068 | } |
3056 | 3069 | ||
3057 | void memcg_cache_list_add(struct mem_cgroup *memcg, struct kmem_cache *cachep) | 3070 | void memcg_cache_list_add(struct mem_cgroup *memcg, struct kmem_cache *cachep) |
@@ -5183,14 +5196,6 @@ static int memcg_update_kmem_limit(struct cgroup *cont, u64 val) | |||
5183 | * starts accounting before all call sites are patched | 5196 | * starts accounting before all call sites are patched |
5184 | */ | 5197 | */ |
5185 | memcg_kmem_set_active(memcg); | 5198 | memcg_kmem_set_active(memcg); |
5186 | |||
5187 | /* | ||
5188 | * kmem charges can outlive the cgroup. In the case of slab | ||
5189 | * pages, for instance, a page contain objects from various | ||
5190 | * processes, so it is unfeasible to migrate them away. We | ||
5191 | * need to reference count the memcg because of that. | ||
5192 | */ | ||
5193 | mem_cgroup_get(memcg); | ||
5194 | } else | 5199 | } else |
5195 | ret = res_counter_set_limit(&memcg->kmem, val); | 5200 | ret = res_counter_set_limit(&memcg->kmem, val); |
5196 | out: | 5201 | out: |
@@ -5223,12 +5228,10 @@ static int memcg_propagate_kmem(struct mem_cgroup *memcg) | |||
5223 | goto out; | 5228 | goto out; |
5224 | 5229 | ||
5225 | /* | 5230 | /* |
5226 | * destroy(), called if we fail, will issue static_key_slow_inc() and | 5231 | * __mem_cgroup_free() will issue static_key_slow_dec() because this |
5227 | * mem_cgroup_put() if kmem is enabled. We have to either call them | 5232 | * memcg is active already. If the later initialization fails then the |
5228 | * unconditionally, or clear the KMEM_ACTIVE flag. I personally find | 5233 | * cgroup core triggers the cleanup so we do not have to do it here. |
5229 | * this more consistent, since it always leads to the same destroy path | ||
5230 | */ | 5234 | */ |
5231 | mem_cgroup_get(memcg); | ||
5232 | static_key_slow_inc(&memcg_kmem_enabled_key); | 5235 | static_key_slow_inc(&memcg_kmem_enabled_key); |
5233 | 5236 | ||
5234 | mutex_lock(&set_limit_mutex); | 5237 | mutex_lock(&set_limit_mutex); |
@@ -5913,23 +5916,43 @@ static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss) | |||
5913 | return mem_cgroup_sockets_init(memcg, ss); | 5916 | return mem_cgroup_sockets_init(memcg, ss); |
5914 | } | 5917 | } |
5915 | 5918 | ||
5916 | static void kmem_cgroup_destroy(struct mem_cgroup *memcg) | 5919 | static void memcg_destroy_kmem(struct mem_cgroup *memcg) |
5917 | { | 5920 | { |
5918 | mem_cgroup_sockets_destroy(memcg); | 5921 | mem_cgroup_sockets_destroy(memcg); |
5922 | } | ||
5923 | |||
5924 | static void kmem_cgroup_css_offline(struct mem_cgroup *memcg) | ||
5925 | { | ||
5926 | if (!memcg_kmem_is_active(memcg)) | ||
5927 | return; | ||
5928 | |||
5929 | /* | ||
5930 | * kmem charges can outlive the cgroup. In the case of slab | ||
5931 | * pages, for instance, a page contain objects from various | ||
5932 | * processes. As we prevent from taking a reference for every | ||
5933 | * such allocation we have to be careful when doing uncharge | ||
5934 | * (see memcg_uncharge_kmem) and here during offlining. | ||
5935 | * | ||
5936 | * The idea is that that only the _last_ uncharge which sees | ||
5937 | * the dead memcg will drop the last reference. An additional | ||
5938 | * reference is taken here before the group is marked dead | ||
5939 | * which is then paired with css_put during uncharge resp. here. | ||
5940 | * | ||
5941 | * Although this might sound strange as this path is called from | ||
5942 | * css_offline() when the referencemight have dropped down to 0 | ||
5943 | * and shouldn't be incremented anymore (css_tryget would fail) | ||
5944 | * we do not have other options because of the kmem allocations | ||
5945 | * lifetime. | ||
5946 | */ | ||
5947 | css_get(&memcg->css); | ||
5919 | 5948 | ||
5920 | memcg_kmem_mark_dead(memcg); | 5949 | memcg_kmem_mark_dead(memcg); |
5921 | 5950 | ||
5922 | if (res_counter_read_u64(&memcg->kmem, RES_USAGE) != 0) | 5951 | if (res_counter_read_u64(&memcg->kmem, RES_USAGE) != 0) |
5923 | return; | 5952 | return; |
5924 | 5953 | ||
5925 | /* | ||
5926 | * Charges already down to 0, undo mem_cgroup_get() done in the charge | ||
5927 | * path here, being careful not to race with memcg_uncharge_kmem: it is | ||
5928 | * possible that the charges went down to 0 between mark_dead and the | ||
5929 | * res_counter read, so in that case, we don't need the put | ||
5930 | */ | ||
5931 | if (memcg_kmem_test_and_clear_dead(memcg)) | 5954 | if (memcg_kmem_test_and_clear_dead(memcg)) |
5932 | mem_cgroup_put(memcg); | 5955 | css_put(&memcg->css); |
5933 | } | 5956 | } |
5934 | #else | 5957 | #else |
5935 | static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss) | 5958 | static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss) |
@@ -5937,7 +5960,11 @@ static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss) | |||
5937 | return 0; | 5960 | return 0; |
5938 | } | 5961 | } |
5939 | 5962 | ||
5940 | static void kmem_cgroup_destroy(struct mem_cgroup *memcg) | 5963 | static void memcg_destroy_kmem(struct mem_cgroup *memcg) |
5964 | { | ||
5965 | } | ||
5966 | |||
5967 | static void kmem_cgroup_css_offline(struct mem_cgroup *memcg) | ||
5941 | { | 5968 | { |
5942 | } | 5969 | } |
5943 | #endif | 5970 | #endif |
@@ -6370,6 +6397,8 @@ static void mem_cgroup_css_offline(struct cgroup *cont) | |||
6370 | { | 6397 | { |
6371 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); | 6398 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); |
6372 | 6399 | ||
6400 | kmem_cgroup_css_offline(memcg); | ||
6401 | |||
6373 | mem_cgroup_invalidate_reclaim_iterators(memcg); | 6402 | mem_cgroup_invalidate_reclaim_iterators(memcg); |
6374 | mem_cgroup_reparent_charges(memcg); | 6403 | mem_cgroup_reparent_charges(memcg); |
6375 | mem_cgroup_destroy_all_caches(memcg); | 6404 | mem_cgroup_destroy_all_caches(memcg); |
@@ -6379,9 +6408,8 @@ static void mem_cgroup_css_free(struct cgroup *cont) | |||
6379 | { | 6408 | { |
6380 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); | 6409 | struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); |
6381 | 6410 | ||
6382 | kmem_cgroup_destroy(memcg); | 6411 | memcg_destroy_kmem(memcg); |
6383 | 6412 | __mem_cgroup_free(memcg); | |
6384 | mem_cgroup_put(memcg); | ||
6385 | } | 6413 | } |
6386 | 6414 | ||
6387 | #ifdef CONFIG_MMU | 6415 | #ifdef CONFIG_MMU |