diff options
-rw-r--r-- | mm/memcontrol.c | 196 |
1 files changed, 105 insertions, 91 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 706f7bc16db2..c8715056e1ef 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -2977,6 +2977,8 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg, | |||
2977 | static DEFINE_MUTEX(set_limit_mutex); | 2977 | static DEFINE_MUTEX(set_limit_mutex); |
2978 | 2978 | ||
2979 | #ifdef CONFIG_MEMCG_KMEM | 2979 | #ifdef CONFIG_MEMCG_KMEM |
2980 | static DEFINE_MUTEX(activate_kmem_mutex); | ||
2981 | |||
2980 | static inline bool memcg_can_account_kmem(struct mem_cgroup *memcg) | 2982 | static inline bool memcg_can_account_kmem(struct mem_cgroup *memcg) |
2981 | { | 2983 | { |
2982 | return !mem_cgroup_disabled() && !mem_cgroup_is_root(memcg) && | 2984 | return !mem_cgroup_disabled() && !mem_cgroup_is_root(memcg) && |
@@ -3089,34 +3091,6 @@ int memcg_cache_id(struct mem_cgroup *memcg) | |||
3089 | return memcg ? memcg->kmemcg_id : -1; | 3091 | return memcg ? memcg->kmemcg_id : -1; |
3090 | } | 3092 | } |
3091 | 3093 | ||
3092 | /* | ||
3093 | * This ends up being protected by the set_limit mutex, during normal | ||
3094 | * operation, because that is its main call site. | ||
3095 | * | ||
3096 | * But when we create a new cache, we can call this as well if its parent | ||
3097 | * is kmem-limited. That will have to hold set_limit_mutex as well. | ||
3098 | */ | ||
3099 | static int memcg_update_cache_sizes(struct mem_cgroup *memcg) | ||
3100 | { | ||
3101 | int num, ret; | ||
3102 | |||
3103 | num = ida_simple_get(&kmem_limited_groups, | ||
3104 | 0, MEMCG_CACHES_MAX_SIZE, GFP_KERNEL); | ||
3105 | if (num < 0) | ||
3106 | return num; | ||
3107 | |||
3108 | ret = memcg_update_all_caches(num+1); | ||
3109 | if (ret) { | ||
3110 | ida_simple_remove(&kmem_limited_groups, num); | ||
3111 | return ret; | ||
3112 | } | ||
3113 | |||
3114 | memcg->kmemcg_id = num; | ||
3115 | INIT_LIST_HEAD(&memcg->memcg_slab_caches); | ||
3116 | mutex_init(&memcg->slab_caches_mutex); | ||
3117 | return 0; | ||
3118 | } | ||
3119 | |||
3120 | static size_t memcg_caches_array_size(int num_groups) | 3094 | static size_t memcg_caches_array_size(int num_groups) |
3121 | { | 3095 | { |
3122 | ssize_t size; | 3096 | ssize_t size; |
@@ -3459,9 +3433,10 @@ void kmem_cache_destroy_memcg_children(struct kmem_cache *s) | |||
3459 | * | 3433 | * |
3460 | * Still, we don't want anyone else freeing memcg_caches under our | 3434 | * Still, we don't want anyone else freeing memcg_caches under our |
3461 | * noses, which can happen if a new memcg comes to life. As usual, | 3435 | * noses, which can happen if a new memcg comes to life. As usual, |
3462 | * we'll take the set_limit_mutex to protect ourselves against this. | 3436 | * we'll take the activate_kmem_mutex to protect ourselves against |
3437 | * this. | ||
3463 | */ | 3438 | */ |
3464 | mutex_lock(&set_limit_mutex); | 3439 | mutex_lock(&activate_kmem_mutex); |
3465 | for_each_memcg_cache_index(i) { | 3440 | for_each_memcg_cache_index(i) { |
3466 | c = cache_from_memcg_idx(s, i); | 3441 | c = cache_from_memcg_idx(s, i); |
3467 | if (!c) | 3442 | if (!c) |
@@ -3484,7 +3459,7 @@ void kmem_cache_destroy_memcg_children(struct kmem_cache *s) | |||
3484 | cancel_work_sync(&c->memcg_params->destroy); | 3459 | cancel_work_sync(&c->memcg_params->destroy); |
3485 | kmem_cache_destroy(c); | 3460 | kmem_cache_destroy(c); |
3486 | } | 3461 | } |
3487 | mutex_unlock(&set_limit_mutex); | 3462 | mutex_unlock(&activate_kmem_mutex); |
3488 | } | 3463 | } |
3489 | 3464 | ||
3490 | struct create_work { | 3465 | struct create_work { |
@@ -5148,11 +5123,23 @@ static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css, | |||
5148 | return val; | 5123 | return val; |
5149 | } | 5124 | } |
5150 | 5125 | ||
5151 | static int memcg_update_kmem_limit(struct cgroup_subsys_state *css, u64 val) | ||
5152 | { | ||
5153 | int ret = -EINVAL; | ||
5154 | #ifdef CONFIG_MEMCG_KMEM | 5126 | #ifdef CONFIG_MEMCG_KMEM |
5155 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | 5127 | /* should be called with activate_kmem_mutex held */ |
5128 | static int __memcg_activate_kmem(struct mem_cgroup *memcg, | ||
5129 | unsigned long long limit) | ||
5130 | { | ||
5131 | int err = 0; | ||
5132 | int memcg_id; | ||
5133 | |||
5134 | if (memcg_kmem_is_active(memcg)) | ||
5135 | return 0; | ||
5136 | |||
5137 | /* | ||
5138 | * We are going to allocate memory for data shared by all memory | ||
5139 | * cgroups so let's stop accounting here. | ||
5140 | */ | ||
5141 | memcg_stop_kmem_account(); | ||
5142 | |||
5156 | /* | 5143 | /* |
5157 | * For simplicity, we won't allow this to be disabled. It also can't | 5144 | * For simplicity, we won't allow this to be disabled. It also can't |
5158 | * be changed if the cgroup has children already, or if tasks had | 5145 | * be changed if the cgroup has children already, or if tasks had |
@@ -5166,72 +5153,101 @@ static int memcg_update_kmem_limit(struct cgroup_subsys_state *css, u64 val) | |||
5166 | * of course permitted. | 5153 | * of course permitted. |
5167 | */ | 5154 | */ |
5168 | mutex_lock(&memcg_create_mutex); | 5155 | mutex_lock(&memcg_create_mutex); |
5169 | mutex_lock(&set_limit_mutex); | 5156 | if (cgroup_task_count(memcg->css.cgroup) || memcg_has_children(memcg)) |
5170 | if (!memcg->kmem_account_flags && val != RES_COUNTER_MAX) { | 5157 | err = -EBUSY; |
5171 | if (cgroup_task_count(css->cgroup) || memcg_has_children(memcg)) { | 5158 | mutex_unlock(&memcg_create_mutex); |
5172 | ret = -EBUSY; | 5159 | if (err) |
5173 | goto out; | 5160 | goto out; |
5174 | } | ||
5175 | ret = res_counter_set_limit(&memcg->kmem, val); | ||
5176 | VM_BUG_ON(ret); | ||
5177 | 5161 | ||
5178 | ret = memcg_update_cache_sizes(memcg); | 5162 | memcg_id = ida_simple_get(&kmem_limited_groups, |
5179 | if (ret) { | 5163 | 0, MEMCG_CACHES_MAX_SIZE, GFP_KERNEL); |
5180 | res_counter_set_limit(&memcg->kmem, RES_COUNTER_MAX); | 5164 | if (memcg_id < 0) { |
5181 | goto out; | 5165 | err = memcg_id; |
5182 | } | 5166 | goto out; |
5183 | static_key_slow_inc(&memcg_kmem_enabled_key); | 5167 | } |
5184 | /* | 5168 | |
5185 | * setting the active bit after the inc will guarantee no one | 5169 | /* |
5186 | * starts accounting before all call sites are patched | 5170 | * Make sure we have enough space for this cgroup in each root cache's |
5187 | */ | 5171 | * memcg_params. |
5188 | memcg_kmem_set_active(memcg); | 5172 | */ |
5189 | } else | 5173 | err = memcg_update_all_caches(memcg_id + 1); |
5190 | ret = res_counter_set_limit(&memcg->kmem, val); | 5174 | if (err) |
5175 | goto out_rmid; | ||
5176 | |||
5177 | memcg->kmemcg_id = memcg_id; | ||
5178 | INIT_LIST_HEAD(&memcg->memcg_slab_caches); | ||
5179 | mutex_init(&memcg->slab_caches_mutex); | ||
5180 | |||
5181 | /* | ||
5182 | * We couldn't have accounted to this cgroup, because it hasn't got the | ||
5183 | * active bit set yet, so this should succeed. | ||
5184 | */ | ||
5185 | err = res_counter_set_limit(&memcg->kmem, limit); | ||
5186 | VM_BUG_ON(err); | ||
5187 | |||
5188 | static_key_slow_inc(&memcg_kmem_enabled_key); | ||
5189 | /* | ||
5190 | * Setting the active bit after enabling static branching will | ||
5191 | * guarantee no one starts accounting before all call sites are | ||
5192 | * patched. | ||
5193 | */ | ||
5194 | memcg_kmem_set_active(memcg); | ||
5191 | out: | 5195 | out: |
5192 | mutex_unlock(&set_limit_mutex); | 5196 | memcg_resume_kmem_account(); |
5193 | mutex_unlock(&memcg_create_mutex); | 5197 | return err; |
5194 | #endif | 5198 | |
5199 | out_rmid: | ||
5200 | ida_simple_remove(&kmem_limited_groups, memcg_id); | ||
5201 | goto out; | ||
5202 | } | ||
5203 | |||
5204 | static int memcg_activate_kmem(struct mem_cgroup *memcg, | ||
5205 | unsigned long long limit) | ||
5206 | { | ||
5207 | int ret; | ||
5208 | |||
5209 | mutex_lock(&activate_kmem_mutex); | ||
5210 | ret = __memcg_activate_kmem(memcg, limit); | ||
5211 | mutex_unlock(&activate_kmem_mutex); | ||
5212 | return ret; | ||
5213 | } | ||
5214 | |||
5215 | static int memcg_update_kmem_limit(struct mem_cgroup *memcg, | ||
5216 | unsigned long long val) | ||
5217 | { | ||
5218 | int ret; | ||
5219 | |||
5220 | if (!memcg_kmem_is_active(memcg)) | ||
5221 | ret = memcg_activate_kmem(memcg, val); | ||
5222 | else | ||
5223 | ret = res_counter_set_limit(&memcg->kmem, val); | ||
5195 | return ret; | 5224 | return ret; |
5196 | } | 5225 | } |
5197 | 5226 | ||
5198 | #ifdef CONFIG_MEMCG_KMEM | ||
5199 | static int memcg_propagate_kmem(struct mem_cgroup *memcg) | 5227 | static int memcg_propagate_kmem(struct mem_cgroup *memcg) |
5200 | { | 5228 | { |
5201 | int ret = 0; | 5229 | int ret = 0; |
5202 | struct mem_cgroup *parent = parent_mem_cgroup(memcg); | 5230 | struct mem_cgroup *parent = parent_mem_cgroup(memcg); |
5203 | if (!parent) | ||
5204 | goto out; | ||
5205 | 5231 | ||
5206 | memcg->kmem_account_flags = parent->kmem_account_flags; | 5232 | if (!parent) |
5207 | /* | 5233 | return 0; |
5208 | * When that happen, we need to disable the static branch only on those | ||
5209 | * memcgs that enabled it. To achieve this, we would be forced to | ||
5210 | * complicate the code by keeping track of which memcgs were the ones | ||
5211 | * that actually enabled limits, and which ones got it from its | ||
5212 | * parents. | ||
5213 | * | ||
5214 | * It is a lot simpler just to do static_key_slow_inc() on every child | ||
5215 | * that is accounted. | ||
5216 | */ | ||
5217 | if (!memcg_kmem_is_active(memcg)) | ||
5218 | goto out; | ||
5219 | 5234 | ||
5235 | mutex_lock(&activate_kmem_mutex); | ||
5220 | /* | 5236 | /* |
5221 | * __mem_cgroup_free() will issue static_key_slow_dec() because this | 5237 | * If the parent cgroup is not kmem-active now, it cannot be activated |
5222 | * memcg is active already. If the later initialization fails then the | 5238 | * after this point, because it has at least one child already. |
5223 | * cgroup core triggers the cleanup so we do not have to do it here. | ||
5224 | */ | 5239 | */ |
5225 | static_key_slow_inc(&memcg_kmem_enabled_key); | 5240 | if (memcg_kmem_is_active(parent)) |
5226 | 5241 | ret = __memcg_activate_kmem(memcg, RES_COUNTER_MAX); | |
5227 | mutex_lock(&set_limit_mutex); | 5242 | mutex_unlock(&activate_kmem_mutex); |
5228 | memcg_stop_kmem_account(); | ||
5229 | ret = memcg_update_cache_sizes(memcg); | ||
5230 | memcg_resume_kmem_account(); | ||
5231 | mutex_unlock(&set_limit_mutex); | ||
5232 | out: | ||
5233 | return ret; | 5243 | return ret; |
5234 | } | 5244 | } |
5245 | #else | ||
5246 | static int memcg_update_kmem_limit(struct mem_cgroup *memcg, | ||
5247 | unsigned long long val) | ||
5248 | { | ||
5249 | return -EINVAL; | ||
5250 | } | ||
5235 | #endif /* CONFIG_MEMCG_KMEM */ | 5251 | #endif /* CONFIG_MEMCG_KMEM */ |
5236 | 5252 | ||
5237 | /* | 5253 | /* |
@@ -5265,7 +5281,7 @@ static int mem_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft, | |||
5265 | else if (type == _MEMSWAP) | 5281 | else if (type == _MEMSWAP) |
5266 | ret = mem_cgroup_resize_memsw_limit(memcg, val); | 5282 | ret = mem_cgroup_resize_memsw_limit(memcg, val); |
5267 | else if (type == _KMEM) | 5283 | else if (type == _KMEM) |
5268 | ret = memcg_update_kmem_limit(css, val); | 5284 | ret = memcg_update_kmem_limit(memcg, val); |
5269 | else | 5285 | else |
5270 | return -EINVAL; | 5286 | return -EINVAL; |
5271 | break; | 5287 | break; |
@@ -6499,7 +6515,6 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css) | |||
6499 | { | 6515 | { |
6500 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); | 6516 | struct mem_cgroup *memcg = mem_cgroup_from_css(css); |
6501 | struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(css)); | 6517 | struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(css)); |
6502 | int error = 0; | ||
6503 | 6518 | ||
6504 | if (css->cgroup->id > MEM_CGROUP_ID_MAX) | 6519 | if (css->cgroup->id > MEM_CGROUP_ID_MAX) |
6505 | return -ENOSPC; | 6520 | return -ENOSPC; |
@@ -6534,10 +6549,9 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css) | |||
6534 | if (parent != root_mem_cgroup) | 6549 | if (parent != root_mem_cgroup) |
6535 | mem_cgroup_subsys.broken_hierarchy = true; | 6550 | mem_cgroup_subsys.broken_hierarchy = true; |
6536 | } | 6551 | } |
6537 | |||
6538 | error = memcg_init_kmem(memcg, &mem_cgroup_subsys); | ||
6539 | mutex_unlock(&memcg_create_mutex); | 6552 | mutex_unlock(&memcg_create_mutex); |
6540 | return error; | 6553 | |
6554 | return memcg_init_kmem(memcg, &mem_cgroup_subsys); | ||
6541 | } | 6555 | } |
6542 | 6556 | ||
6543 | /* | 6557 | /* |