aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
authorVladimir Davydov <vdavydov@parallels.com>2014-01-23 18:53:09 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2014-01-23 19:36:51 -0500
commitd6441637709ba302905f1076f2afcb6d4ea3a901 (patch)
tree043dab40e692dc25d92d40c19218e0386336d274 /mm/memcontrol.c
parent6de64beb3435ab8f2ac1428dd7dddad5dc679c4b (diff)
memcg: rework memcg_update_kmem_limit synchronization
Currently we take both the memcg_create_mutex and the set_limit_mutex when we enable kmem accounting for a memory cgroup, which makes kmem activation events serialize with both memcg creations and other memcg limit updates (memory.limit, memory.memsw.limit). However, there is no point in such strict synchronization rules there. First, the set_limit_mutex was introduced to keep the memory.limit and memory.memsw.limit values in sync. Since memory.kmem.limit can be set independently of them, it is better to introduce a separate mutex to synchronize against concurrent kmem limit updates. Second, we take the memcg_create_mutex in order to make sure all children of this memcg will be kmem-active as well. For achieving that, it is enough to hold this mutex only while checking if memcg_has_children() though. This guarantees that if a child is added after we checked that the memcg has no children, the newly added cgroup will see its parent kmem-active (of course if the latter succeeded), and call kmem activation for itself. This patch simplifies the locking rules of memcg_update_kmem_limit() according to these considerations. [vdavydov@parallels.com: fix unintialized var warning] Signed-off-by: Vladimir Davydov <vdavydov@parallels.com> Cc: Michal Hocko <mhocko@suse.cz> Cc: Glauber Costa <glommer@gmail.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Balbir Singh <bsingharora@gmail.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c196
1 files changed, 105 insertions, 91 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 706f7bc16db2..c8715056e1ef 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2977,6 +2977,8 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
2977static DEFINE_MUTEX(set_limit_mutex); 2977static DEFINE_MUTEX(set_limit_mutex);
2978 2978
2979#ifdef CONFIG_MEMCG_KMEM 2979#ifdef CONFIG_MEMCG_KMEM
2980static DEFINE_MUTEX(activate_kmem_mutex);
2981
2980static inline bool memcg_can_account_kmem(struct mem_cgroup *memcg) 2982static inline bool memcg_can_account_kmem(struct mem_cgroup *memcg)
2981{ 2983{
2982 return !mem_cgroup_disabled() && !mem_cgroup_is_root(memcg) && 2984 return !mem_cgroup_disabled() && !mem_cgroup_is_root(memcg) &&
@@ -3089,34 +3091,6 @@ int memcg_cache_id(struct mem_cgroup *memcg)
3089 return memcg ? memcg->kmemcg_id : -1; 3091 return memcg ? memcg->kmemcg_id : -1;
3090} 3092}
3091 3093
3092/*
3093 * This ends up being protected by the set_limit mutex, during normal
3094 * operation, because that is its main call site.
3095 *
3096 * But when we create a new cache, we can call this as well if its parent
3097 * is kmem-limited. That will have to hold set_limit_mutex as well.
3098 */
3099static int memcg_update_cache_sizes(struct mem_cgroup *memcg)
3100{
3101 int num, ret;
3102
3103 num = ida_simple_get(&kmem_limited_groups,
3104 0, MEMCG_CACHES_MAX_SIZE, GFP_KERNEL);
3105 if (num < 0)
3106 return num;
3107
3108 ret = memcg_update_all_caches(num+1);
3109 if (ret) {
3110 ida_simple_remove(&kmem_limited_groups, num);
3111 return ret;
3112 }
3113
3114 memcg->kmemcg_id = num;
3115 INIT_LIST_HEAD(&memcg->memcg_slab_caches);
3116 mutex_init(&memcg->slab_caches_mutex);
3117 return 0;
3118}
3119
3120static size_t memcg_caches_array_size(int num_groups) 3094static size_t memcg_caches_array_size(int num_groups)
3121{ 3095{
3122 ssize_t size; 3096 ssize_t size;
@@ -3459,9 +3433,10 @@ void kmem_cache_destroy_memcg_children(struct kmem_cache *s)
3459 * 3433 *
3460 * Still, we don't want anyone else freeing memcg_caches under our 3434 * Still, we don't want anyone else freeing memcg_caches under our
3461 * noses, which can happen if a new memcg comes to life. As usual, 3435 * noses, which can happen if a new memcg comes to life. As usual,
3462 * we'll take the set_limit_mutex to protect ourselves against this. 3436 * we'll take the activate_kmem_mutex to protect ourselves against
3437 * this.
3463 */ 3438 */
3464 mutex_lock(&set_limit_mutex); 3439 mutex_lock(&activate_kmem_mutex);
3465 for_each_memcg_cache_index(i) { 3440 for_each_memcg_cache_index(i) {
3466 c = cache_from_memcg_idx(s, i); 3441 c = cache_from_memcg_idx(s, i);
3467 if (!c) 3442 if (!c)
@@ -3484,7 +3459,7 @@ void kmem_cache_destroy_memcg_children(struct kmem_cache *s)
3484 cancel_work_sync(&c->memcg_params->destroy); 3459 cancel_work_sync(&c->memcg_params->destroy);
3485 kmem_cache_destroy(c); 3460 kmem_cache_destroy(c);
3486 } 3461 }
3487 mutex_unlock(&set_limit_mutex); 3462 mutex_unlock(&activate_kmem_mutex);
3488} 3463}
3489 3464
3490struct create_work { 3465struct create_work {
@@ -5148,11 +5123,23 @@ static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
5148 return val; 5123 return val;
5149} 5124}
5150 5125
5151static int memcg_update_kmem_limit(struct cgroup_subsys_state *css, u64 val)
5152{
5153 int ret = -EINVAL;
5154#ifdef CONFIG_MEMCG_KMEM 5126#ifdef CONFIG_MEMCG_KMEM
5155 struct mem_cgroup *memcg = mem_cgroup_from_css(css); 5127/* should be called with activate_kmem_mutex held */
5128static int __memcg_activate_kmem(struct mem_cgroup *memcg,
5129 unsigned long long limit)
5130{
5131 int err = 0;
5132 int memcg_id;
5133
5134 if (memcg_kmem_is_active(memcg))
5135 return 0;
5136
5137 /*
5138 * We are going to allocate memory for data shared by all memory
5139 * cgroups so let's stop accounting here.
5140 */
5141 memcg_stop_kmem_account();
5142
5156 /* 5143 /*
5157 * For simplicity, we won't allow this to be disabled. It also can't 5144 * For simplicity, we won't allow this to be disabled. It also can't
5158 * be changed if the cgroup has children already, or if tasks had 5145 * be changed if the cgroup has children already, or if tasks had
@@ -5166,72 +5153,101 @@ static int memcg_update_kmem_limit(struct cgroup_subsys_state *css, u64 val)
5166 * of course permitted. 5153 * of course permitted.
5167 */ 5154 */
5168 mutex_lock(&memcg_create_mutex); 5155 mutex_lock(&memcg_create_mutex);
5169 mutex_lock(&set_limit_mutex); 5156 if (cgroup_task_count(memcg->css.cgroup) || memcg_has_children(memcg))
5170 if (!memcg->kmem_account_flags && val != RES_COUNTER_MAX) { 5157 err = -EBUSY;
5171 if (cgroup_task_count(css->cgroup) || memcg_has_children(memcg)) { 5158 mutex_unlock(&memcg_create_mutex);
5172 ret = -EBUSY; 5159 if (err)
5173 goto out; 5160 goto out;
5174 }
5175 ret = res_counter_set_limit(&memcg->kmem, val);
5176 VM_BUG_ON(ret);
5177 5161
5178 ret = memcg_update_cache_sizes(memcg); 5162 memcg_id = ida_simple_get(&kmem_limited_groups,
5179 if (ret) { 5163 0, MEMCG_CACHES_MAX_SIZE, GFP_KERNEL);
5180 res_counter_set_limit(&memcg->kmem, RES_COUNTER_MAX); 5164 if (memcg_id < 0) {
5181 goto out; 5165 err = memcg_id;
5182 } 5166 goto out;
5183 static_key_slow_inc(&memcg_kmem_enabled_key); 5167 }
5184 /* 5168
5185 * setting the active bit after the inc will guarantee no one 5169 /*
5186 * starts accounting before all call sites are patched 5170 * Make sure we have enough space for this cgroup in each root cache's
5187 */ 5171 * memcg_params.
5188 memcg_kmem_set_active(memcg); 5172 */
5189 } else 5173 err = memcg_update_all_caches(memcg_id + 1);
5190 ret = res_counter_set_limit(&memcg->kmem, val); 5174 if (err)
5175 goto out_rmid;
5176
5177 memcg->kmemcg_id = memcg_id;
5178 INIT_LIST_HEAD(&memcg->memcg_slab_caches);
5179 mutex_init(&memcg->slab_caches_mutex);
5180
5181 /*
5182 * We couldn't have accounted to this cgroup, because it hasn't got the
5183 * active bit set yet, so this should succeed.
5184 */
5185 err = res_counter_set_limit(&memcg->kmem, limit);
5186 VM_BUG_ON(err);
5187
5188 static_key_slow_inc(&memcg_kmem_enabled_key);
5189 /*
5190 * Setting the active bit after enabling static branching will
5191 * guarantee no one starts accounting before all call sites are
5192 * patched.
5193 */
5194 memcg_kmem_set_active(memcg);
5191out: 5195out:
5192 mutex_unlock(&set_limit_mutex); 5196 memcg_resume_kmem_account();
5193 mutex_unlock(&memcg_create_mutex); 5197 return err;
5194#endif 5198
5199out_rmid:
5200 ida_simple_remove(&kmem_limited_groups, memcg_id);
5201 goto out;
5202}
5203
5204static int memcg_activate_kmem(struct mem_cgroup *memcg,
5205 unsigned long long limit)
5206{
5207 int ret;
5208
5209 mutex_lock(&activate_kmem_mutex);
5210 ret = __memcg_activate_kmem(memcg, limit);
5211 mutex_unlock(&activate_kmem_mutex);
5212 return ret;
5213}
5214
5215static int memcg_update_kmem_limit(struct mem_cgroup *memcg,
5216 unsigned long long val)
5217{
5218 int ret;
5219
5220 if (!memcg_kmem_is_active(memcg))
5221 ret = memcg_activate_kmem(memcg, val);
5222 else
5223 ret = res_counter_set_limit(&memcg->kmem, val);
5195 return ret; 5224 return ret;
5196} 5225}
5197 5226
5198#ifdef CONFIG_MEMCG_KMEM
5199static int memcg_propagate_kmem(struct mem_cgroup *memcg) 5227static int memcg_propagate_kmem(struct mem_cgroup *memcg)
5200{ 5228{
5201 int ret = 0; 5229 int ret = 0;
5202 struct mem_cgroup *parent = parent_mem_cgroup(memcg); 5230 struct mem_cgroup *parent = parent_mem_cgroup(memcg);
5203 if (!parent)
5204 goto out;
5205 5231
5206 memcg->kmem_account_flags = parent->kmem_account_flags; 5232 if (!parent)
5207 /* 5233 return 0;
5208 * When that happen, we need to disable the static branch only on those
5209 * memcgs that enabled it. To achieve this, we would be forced to
5210 * complicate the code by keeping track of which memcgs were the ones
5211 * that actually enabled limits, and which ones got it from its
5212 * parents.
5213 *
5214 * It is a lot simpler just to do static_key_slow_inc() on every child
5215 * that is accounted.
5216 */
5217 if (!memcg_kmem_is_active(memcg))
5218 goto out;
5219 5234
5235 mutex_lock(&activate_kmem_mutex);
5220 /* 5236 /*
5221 * __mem_cgroup_free() will issue static_key_slow_dec() because this 5237 * If the parent cgroup is not kmem-active now, it cannot be activated
5222 * memcg is active already. If the later initialization fails then the 5238 * after this point, because it has at least one child already.
5223 * cgroup core triggers the cleanup so we do not have to do it here.
5224 */ 5239 */
5225 static_key_slow_inc(&memcg_kmem_enabled_key); 5240 if (memcg_kmem_is_active(parent))
5226 5241 ret = __memcg_activate_kmem(memcg, RES_COUNTER_MAX);
5227 mutex_lock(&set_limit_mutex); 5242 mutex_unlock(&activate_kmem_mutex);
5228 memcg_stop_kmem_account();
5229 ret = memcg_update_cache_sizes(memcg);
5230 memcg_resume_kmem_account();
5231 mutex_unlock(&set_limit_mutex);
5232out:
5233 return ret; 5243 return ret;
5234} 5244}
5245#else
5246static int memcg_update_kmem_limit(struct mem_cgroup *memcg,
5247 unsigned long long val)
5248{
5249 return -EINVAL;
5250}
5235#endif /* CONFIG_MEMCG_KMEM */ 5251#endif /* CONFIG_MEMCG_KMEM */
5236 5252
5237/* 5253/*
@@ -5265,7 +5281,7 @@ static int mem_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft,
5265 else if (type == _MEMSWAP) 5281 else if (type == _MEMSWAP)
5266 ret = mem_cgroup_resize_memsw_limit(memcg, val); 5282 ret = mem_cgroup_resize_memsw_limit(memcg, val);
5267 else if (type == _KMEM) 5283 else if (type == _KMEM)
5268 ret = memcg_update_kmem_limit(css, val); 5284 ret = memcg_update_kmem_limit(memcg, val);
5269 else 5285 else
5270 return -EINVAL; 5286 return -EINVAL;
5271 break; 5287 break;
@@ -6499,7 +6515,6 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
6499{ 6515{
6500 struct mem_cgroup *memcg = mem_cgroup_from_css(css); 6516 struct mem_cgroup *memcg = mem_cgroup_from_css(css);
6501 struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(css)); 6517 struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(css));
6502 int error = 0;
6503 6518
6504 if (css->cgroup->id > MEM_CGROUP_ID_MAX) 6519 if (css->cgroup->id > MEM_CGROUP_ID_MAX)
6505 return -ENOSPC; 6520 return -ENOSPC;
@@ -6534,10 +6549,9 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
6534 if (parent != root_mem_cgroup) 6549 if (parent != root_mem_cgroup)
6535 mem_cgroup_subsys.broken_hierarchy = true; 6550 mem_cgroup_subsys.broken_hierarchy = true;
6536 } 6551 }
6537
6538 error = memcg_init_kmem(memcg, &mem_cgroup_subsys);
6539 mutex_unlock(&memcg_create_mutex); 6552 mutex_unlock(&memcg_create_mutex);
6540 return error; 6553
6554 return memcg_init_kmem(memcg, &mem_cgroup_subsys);
6541} 6555}
6542 6556
6543/* 6557/*