aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--mm/memcontrol.c196
1 files changed, 105 insertions, 91 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 706f7bc16db2..c8715056e1ef 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2977,6 +2977,8 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
2977static DEFINE_MUTEX(set_limit_mutex); 2977static DEFINE_MUTEX(set_limit_mutex);
2978 2978
2979#ifdef CONFIG_MEMCG_KMEM 2979#ifdef CONFIG_MEMCG_KMEM
2980static DEFINE_MUTEX(activate_kmem_mutex);
2981
2980static inline bool memcg_can_account_kmem(struct mem_cgroup *memcg) 2982static inline bool memcg_can_account_kmem(struct mem_cgroup *memcg)
2981{ 2983{
2982 return !mem_cgroup_disabled() && !mem_cgroup_is_root(memcg) && 2984 return !mem_cgroup_disabled() && !mem_cgroup_is_root(memcg) &&
@@ -3089,34 +3091,6 @@ int memcg_cache_id(struct mem_cgroup *memcg)
3089 return memcg ? memcg->kmemcg_id : -1; 3091 return memcg ? memcg->kmemcg_id : -1;
3090} 3092}
3091 3093
3092/*
3093 * This ends up being protected by the set_limit mutex, during normal
3094 * operation, because that is its main call site.
3095 *
3096 * But when we create a new cache, we can call this as well if its parent
3097 * is kmem-limited. That will have to hold set_limit_mutex as well.
3098 */
3099static int memcg_update_cache_sizes(struct mem_cgroup *memcg)
3100{
3101 int num, ret;
3102
3103 num = ida_simple_get(&kmem_limited_groups,
3104 0, MEMCG_CACHES_MAX_SIZE, GFP_KERNEL);
3105 if (num < 0)
3106 return num;
3107
3108 ret = memcg_update_all_caches(num+1);
3109 if (ret) {
3110 ida_simple_remove(&kmem_limited_groups, num);
3111 return ret;
3112 }
3113
3114 memcg->kmemcg_id = num;
3115 INIT_LIST_HEAD(&memcg->memcg_slab_caches);
3116 mutex_init(&memcg->slab_caches_mutex);
3117 return 0;
3118}
3119
3120static size_t memcg_caches_array_size(int num_groups) 3094static size_t memcg_caches_array_size(int num_groups)
3121{ 3095{
3122 ssize_t size; 3096 ssize_t size;
@@ -3459,9 +3433,10 @@ void kmem_cache_destroy_memcg_children(struct kmem_cache *s)
3459 * 3433 *
3460 * Still, we don't want anyone else freeing memcg_caches under our 3434 * Still, we don't want anyone else freeing memcg_caches under our
3461 * noses, which can happen if a new memcg comes to life. As usual, 3435 * noses, which can happen if a new memcg comes to life. As usual,
3462 * we'll take the set_limit_mutex to protect ourselves against this. 3436 * we'll take the activate_kmem_mutex to protect ourselves against
3437 * this.
3463 */ 3438 */
3464 mutex_lock(&set_limit_mutex); 3439 mutex_lock(&activate_kmem_mutex);
3465 for_each_memcg_cache_index(i) { 3440 for_each_memcg_cache_index(i) {
3466 c = cache_from_memcg_idx(s, i); 3441 c = cache_from_memcg_idx(s, i);
3467 if (!c) 3442 if (!c)
@@ -3484,7 +3459,7 @@ void kmem_cache_destroy_memcg_children(struct kmem_cache *s)
3484 cancel_work_sync(&c->memcg_params->destroy); 3459 cancel_work_sync(&c->memcg_params->destroy);
3485 kmem_cache_destroy(c); 3460 kmem_cache_destroy(c);
3486 } 3461 }
3487 mutex_unlock(&set_limit_mutex); 3462 mutex_unlock(&activate_kmem_mutex);
3488} 3463}
3489 3464
3490struct create_work { 3465struct create_work {
@@ -5148,11 +5123,23 @@ static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
5148 return val; 5123 return val;
5149} 5124}
5150 5125
5151static int memcg_update_kmem_limit(struct cgroup_subsys_state *css, u64 val)
5152{
5153 int ret = -EINVAL;
5154#ifdef CONFIG_MEMCG_KMEM 5126#ifdef CONFIG_MEMCG_KMEM
5155 struct mem_cgroup *memcg = mem_cgroup_from_css(css); 5127/* should be called with activate_kmem_mutex held */
5128static int __memcg_activate_kmem(struct mem_cgroup *memcg,
5129 unsigned long long limit)
5130{
5131 int err = 0;
5132 int memcg_id;
5133
5134 if (memcg_kmem_is_active(memcg))
5135 return 0;
5136
5137 /*
5138 * We are going to allocate memory for data shared by all memory
5139 * cgroups so let's stop accounting here.
5140 */
5141 memcg_stop_kmem_account();
5142
5156 /* 5143 /*
5157 * For simplicity, we won't allow this to be disabled. It also can't 5144 * For simplicity, we won't allow this to be disabled. It also can't
5158 * be changed if the cgroup has children already, or if tasks had 5145 * be changed if the cgroup has children already, or if tasks had
@@ -5166,72 +5153,101 @@ static int memcg_update_kmem_limit(struct cgroup_subsys_state *css, u64 val)
5166 * of course permitted. 5153 * of course permitted.
5167 */ 5154 */
5168 mutex_lock(&memcg_create_mutex); 5155 mutex_lock(&memcg_create_mutex);
5169 mutex_lock(&set_limit_mutex); 5156 if (cgroup_task_count(memcg->css.cgroup) || memcg_has_children(memcg))
5170 if (!memcg->kmem_account_flags && val != RES_COUNTER_MAX) { 5157 err = -EBUSY;
5171 if (cgroup_task_count(css->cgroup) || memcg_has_children(memcg)) { 5158 mutex_unlock(&memcg_create_mutex);
5172 ret = -EBUSY; 5159 if (err)
5173 goto out; 5160 goto out;
5174 }
5175 ret = res_counter_set_limit(&memcg->kmem, val);
5176 VM_BUG_ON(ret);
5177 5161
5178 ret = memcg_update_cache_sizes(memcg); 5162 memcg_id = ida_simple_get(&kmem_limited_groups,
5179 if (ret) { 5163 0, MEMCG_CACHES_MAX_SIZE, GFP_KERNEL);
5180 res_counter_set_limit(&memcg->kmem, RES_COUNTER_MAX); 5164 if (memcg_id < 0) {
5181 goto out; 5165 err = memcg_id;
5182 } 5166 goto out;
5183 static_key_slow_inc(&memcg_kmem_enabled_key); 5167 }
5184 /* 5168
5185 * setting the active bit after the inc will guarantee no one 5169 /*
5186 * starts accounting before all call sites are patched 5170 * Make sure we have enough space for this cgroup in each root cache's
5187 */ 5171 * memcg_params.
5188 memcg_kmem_set_active(memcg); 5172 */
5189 } else 5173 err = memcg_update_all_caches(memcg_id + 1);
5190 ret = res_counter_set_limit(&memcg->kmem, val); 5174 if (err)
5175 goto out_rmid;
5176
5177 memcg->kmemcg_id = memcg_id;
5178 INIT_LIST_HEAD(&memcg->memcg_slab_caches);
5179 mutex_init(&memcg->slab_caches_mutex);
5180
5181 /*
5182 * We couldn't have accounted to this cgroup, because it hasn't got the
5183 * active bit set yet, so this should succeed.
5184 */
5185 err = res_counter_set_limit(&memcg->kmem, limit);
5186 VM_BUG_ON(err);
5187
5188 static_key_slow_inc(&memcg_kmem_enabled_key);
5189 /*
5190 * Setting the active bit after enabling static branching will
5191 * guarantee no one starts accounting before all call sites are
5192 * patched.
5193 */
5194 memcg_kmem_set_active(memcg);
5191out: 5195out:
5192 mutex_unlock(&set_limit_mutex); 5196 memcg_resume_kmem_account();
5193 mutex_unlock(&memcg_create_mutex); 5197 return err;
5194#endif 5198
5199out_rmid:
5200 ida_simple_remove(&kmem_limited_groups, memcg_id);
5201 goto out;
5202}
5203
5204static int memcg_activate_kmem(struct mem_cgroup *memcg,
5205 unsigned long long limit)
5206{
5207 int ret;
5208
5209 mutex_lock(&activate_kmem_mutex);
5210 ret = __memcg_activate_kmem(memcg, limit);
5211 mutex_unlock(&activate_kmem_mutex);
5212 return ret;
5213}
5214
5215static int memcg_update_kmem_limit(struct mem_cgroup *memcg,
5216 unsigned long long val)
5217{
5218 int ret;
5219
5220 if (!memcg_kmem_is_active(memcg))
5221 ret = memcg_activate_kmem(memcg, val);
5222 else
5223 ret = res_counter_set_limit(&memcg->kmem, val);
5195 return ret; 5224 return ret;
5196} 5225}
5197 5226
5198#ifdef CONFIG_MEMCG_KMEM
5199static int memcg_propagate_kmem(struct mem_cgroup *memcg) 5227static int memcg_propagate_kmem(struct mem_cgroup *memcg)
5200{ 5228{
5201 int ret = 0; 5229 int ret = 0;
5202 struct mem_cgroup *parent = parent_mem_cgroup(memcg); 5230 struct mem_cgroup *parent = parent_mem_cgroup(memcg);
5203 if (!parent)
5204 goto out;
5205 5231
5206 memcg->kmem_account_flags = parent->kmem_account_flags; 5232 if (!parent)
5207 /* 5233 return 0;
5208 * When that happen, we need to disable the static branch only on those
5209 * memcgs that enabled it. To achieve this, we would be forced to
5210 * complicate the code by keeping track of which memcgs were the ones
5211 * that actually enabled limits, and which ones got it from its
5212 * parents.
5213 *
5214 * It is a lot simpler just to do static_key_slow_inc() on every child
5215 * that is accounted.
5216 */
5217 if (!memcg_kmem_is_active(memcg))
5218 goto out;
5219 5234
5235 mutex_lock(&activate_kmem_mutex);
5220 /* 5236 /*
5221 * __mem_cgroup_free() will issue static_key_slow_dec() because this 5237 * If the parent cgroup is not kmem-active now, it cannot be activated
5222 * memcg is active already. If the later initialization fails then the 5238 * after this point, because it has at least one child already.
5223 * cgroup core triggers the cleanup so we do not have to do it here.
5224 */ 5239 */
5225 static_key_slow_inc(&memcg_kmem_enabled_key); 5240 if (memcg_kmem_is_active(parent))
5226 5241 ret = __memcg_activate_kmem(memcg, RES_COUNTER_MAX);
5227 mutex_lock(&set_limit_mutex); 5242 mutex_unlock(&activate_kmem_mutex);
5228 memcg_stop_kmem_account();
5229 ret = memcg_update_cache_sizes(memcg);
5230 memcg_resume_kmem_account();
5231 mutex_unlock(&set_limit_mutex);
5232out:
5233 return ret; 5243 return ret;
5234} 5244}
5245#else
5246static int memcg_update_kmem_limit(struct mem_cgroup *memcg,
5247 unsigned long long val)
5248{
5249 return -EINVAL;
5250}
5235#endif /* CONFIG_MEMCG_KMEM */ 5251#endif /* CONFIG_MEMCG_KMEM */
5236 5252
5237/* 5253/*
@@ -5265,7 +5281,7 @@ static int mem_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft,
5265 else if (type == _MEMSWAP) 5281 else if (type == _MEMSWAP)
5266 ret = mem_cgroup_resize_memsw_limit(memcg, val); 5282 ret = mem_cgroup_resize_memsw_limit(memcg, val);
5267 else if (type == _KMEM) 5283 else if (type == _KMEM)
5268 ret = memcg_update_kmem_limit(css, val); 5284 ret = memcg_update_kmem_limit(memcg, val);
5269 else 5285 else
5270 return -EINVAL; 5286 return -EINVAL;
5271 break; 5287 break;
@@ -6499,7 +6515,6 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
6499{ 6515{
6500 struct mem_cgroup *memcg = mem_cgroup_from_css(css); 6516 struct mem_cgroup *memcg = mem_cgroup_from_css(css);
6501 struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(css)); 6517 struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(css));
6502 int error = 0;
6503 6518
6504 if (css->cgroup->id > MEM_CGROUP_ID_MAX) 6519 if (css->cgroup->id > MEM_CGROUP_ID_MAX)
6505 return -ENOSPC; 6520 return -ENOSPC;
@@ -6534,10 +6549,9 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
6534 if (parent != root_mem_cgroup) 6549 if (parent != root_mem_cgroup)
6535 mem_cgroup_subsys.broken_hierarchy = true; 6550 mem_cgroup_subsys.broken_hierarchy = true;
6536 } 6551 }
6537
6538 error = memcg_init_kmem(memcg, &mem_cgroup_subsys);
6539 mutex_unlock(&memcg_create_mutex); 6552 mutex_unlock(&memcg_create_mutex);
6540 return error; 6553
6554 return memcg_init_kmem(memcg, &mem_cgroup_subsys);
6541} 6555}
6542 6556
6543/* 6557/*