1 files changed, 105 insertions, 91 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 706f7bc16db2..c8715056e1ef 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2977,6 +2977,8 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
 static DEFINE_MUTEX(set_limit_mutex);
 #ifdef CONFIG_MEMCG_KMEM
+static DEFINE_MUTEX(activate_kmem_mutex);
 static inline bool memcg_can_account_kmem(struct mem_cgroup *memcg)
 {
        return !mem_cgroup_disabled() && !mem_cgroup_is_root(memcg) &&
@@ -3089,34 +3091,6 @@ int memcg_cache_id(struct mem_cgroup *memcg)
        return memcg ? memcg->kmemcg_id : -1;
 }
-/*
- * This ends up being protected by the set_limit mutex, during normal
- * operation, because that is its main call site.
- *
- * But when we create a new cache, we can call this as well if its parent
- * is kmem-limited. That will have to hold set_limit_mutex as well.
- */
-static int memcg_update_cache_sizes(struct mem_cgroup *memcg)
-{
-        int num, ret;
-        num = ida_simple_get(&kmem_limited_groups,
-                                0, MEMCG_CACHES_MAX_SIZE, GFP_KERNEL);
-        if (num < 0)
-                return num;
-        ret = memcg_update_all_caches(num+1);
-        if (ret) {
-                ida_simple_remove(&kmem_limited_groups, num);
-                return ret;
-        }
-        memcg->kmemcg_id = num;
-        INIT_LIST_HEAD(&memcg->memcg_slab_caches);
-        mutex_init(&memcg->slab_caches_mutex);
-        return 0;
-}
 static size_t memcg_caches_array_size(int num_groups)
 {
        ssize_t size;
@@ -3459,9 +3433,10 @@ void kmem_cache_destroy_memcg_children(struct kmem_cache *s)
         *
         * Still, we don't want anyone else freeing memcg_caches under our
         * noses, which can happen if a new memcg comes to life. As usual,
-         * we'll take the set_limit_mutex to protect ourselves against this.
+         * we'll take the activate_kmem_mutex to protect ourselves against
+         * this.
         */
-        mutex_lock(&set_limit_mutex);
+        mutex_lock(&activate_kmem_mutex);
        for_each_memcg_cache_index(i) {
                c = cache_from_memcg_idx(s, i);
                if (!c)
@@ -3484,7 +3459,7 @@ void kmem_cache_destroy_memcg_children(struct kmem_cache *s)
                cancel_work_sync(&c->memcg_params->destroy);
                kmem_cache_destroy(c);
        }
-        mutex_unlock(&set_limit_mutex);
+        mutex_unlock(&activate_kmem_mutex);
 }
 struct create_work {
@@ -5148,11 +5123,23 @@ static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
        return val;
 }
-static int memcg_update_kmem_limit(struct cgroup_subsys_state *css, u64 val)
-{
-        int ret = -EINVAL;
 #ifdef CONFIG_MEMCG_KMEM
-        struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+/* should be called with activate_kmem_mutex held */
+static int __memcg_activate_kmem(struct mem_cgroup *memcg,
+                                 unsigned long long limit)
+{
+        int err = 0;
+        int memcg_id;
+        if (memcg_kmem_is_active(memcg))
+                return 0;
+        /*
+         * We are going to allocate memory for data shared by all memory
+         * cgroups so let's stop accounting here.
+         */
+        memcg_stop_kmem_account();
        /*
         * For simplicity, we won't allow this to be disabled.  It also can't
         * be changed if the cgroup has children already, or if tasks had
@@ -5166,72 +5153,101 @@ static int memcg_update_kmem_limit(struct cgroup_subsys_state *css, u64 val)
         * of course permitted.
         */
        mutex_lock(&memcg_create_mutex);
-        mutex_lock(&set_limit_mutex);
+        if (cgroup_task_count(memcg->css.cgroup) || memcg_has_children(memcg))
-        if (!memcg->kmem_account_flags && val != RES_COUNTER_MAX) {
+                err = -EBUSY;
-                if (cgroup_task_count(css->cgroup) || memcg_has_children(memcg)) {
+        mutex_unlock(&memcg_create_mutex);
-                        ret = -EBUSY;
+        if (err)
-                        goto out;
+                goto out;
-                }
-                ret = res_counter_set_limit(&memcg->kmem, val);
-                VM_BUG_ON(ret);
-                ret = memcg_update_cache_sizes(memcg);
+        memcg_id = ida_simple_get(&kmem_limited_groups,
-                if (ret) {
+                                  0, MEMCG_CACHES_MAX_SIZE, GFP_KERNEL);
-                        res_counter_set_limit(&memcg->kmem, RES_COUNTER_MAX);
+        if (memcg_id < 0) {
-                        goto out;
+                err = memcg_id;
-                }
+                goto out;
-                static_key_slow_inc(&memcg_kmem_enabled_key);
+        }
-                /*
-                 * setting the active bit after the inc will guarantee no one
+        /*
-                 * starts accounting before all call sites are patched
+         * Make sure we have enough space for this cgroup in each root cache's
-                 */
+         * memcg_params.
-                memcg_kmem_set_active(memcg);
+         */
-        } else
+        err = memcg_update_all_caches(memcg_id + 1);
-                ret = res_counter_set_limit(&memcg->kmem, val);
+        if (err)
+                goto out_rmid;
+        memcg->kmemcg_id = memcg_id;
+        INIT_LIST_HEAD(&memcg->memcg_slab_caches);
+        mutex_init(&memcg->slab_caches_mutex);
+        /*
+         * We couldn't have accounted to this cgroup, because it hasn't got the
+         * active bit set yet, so this should succeed.
+         */
+        err = res_counter_set_limit(&memcg->kmem, limit);
+        VM_BUG_ON(err);
+        static_key_slow_inc(&memcg_kmem_enabled_key);
+        /*
+         * Setting the active bit after enabling static branching will
+         * guarantee no one starts accounting before all call sites are
+         * patched.
+         */
+        memcg_kmem_set_active(memcg);
 out:
-        mutex_unlock(&set_limit_mutex);
+        memcg_resume_kmem_account();
-        mutex_unlock(&memcg_create_mutex);
+        return err;
-#endif
+out_rmid:
+        ida_simple_remove(&kmem_limited_groups, memcg_id);
+        goto out;
+}
+static int memcg_activate_kmem(struct mem_cgroup *memcg,
+                               unsigned long long limit)
+{
+        int ret;
+        mutex_lock(&activate_kmem_mutex);
+        ret = __memcg_activate_kmem(memcg, limit);
+        mutex_unlock(&activate_kmem_mutex);
+        return ret;
+}
+static int memcg_update_kmem_limit(struct mem_cgroup *memcg,
+                                   unsigned long long val)
+{
+        int ret;
+        if (!memcg_kmem_is_active(memcg))
+                ret = memcg_activate_kmem(memcg, val);
+        else
+                ret = res_counter_set_limit(&memcg->kmem, val);
        return ret;
 }
-#ifdef CONFIG_MEMCG_KMEM
 static int memcg_propagate_kmem(struct mem_cgroup *memcg)
 {
        int ret = 0;
        struct mem_cgroup *parent = parent_mem_cgroup(memcg);
-        if (!parent)
-                goto out;
-        memcg->kmem_account_flags = parent->kmem_account_flags;
+        if (!parent)
-        /*
+                return 0;
-         * When that happen, we need to disable the static branch only on those
-         * memcgs that enabled it. To achieve this, we would be forced to
-         * complicate the code by keeping track of which memcgs were the ones
-         * that actually enabled limits, and which ones got it from its
-         * parents.
-         *
-         * It is a lot simpler just to do static_key_slow_inc() on every child
-         * that is accounted.
-         */
-        if (!memcg_kmem_is_active(memcg))
-                goto out;
+        mutex_lock(&activate_kmem_mutex);
        /*
-         * __mem_cgroup_free() will issue static_key_slow_dec() because this
+         * If the parent cgroup is not kmem-active now, it cannot be activated
-         * memcg is active already. If the later initialization fails then the
+         * after this point, because it has at least one child already.
-         * cgroup core triggers the cleanup so we do not have to do it here.
         */
-        static_key_slow_inc(&memcg_kmem_enabled_key);
+        if (memcg_kmem_is_active(parent))
+                ret = __memcg_activate_kmem(memcg, RES_COUNTER_MAX);
-        mutex_lock(&set_limit_mutex);
+        mutex_unlock(&activate_kmem_mutex);
-        memcg_stop_kmem_account();
-        ret = memcg_update_cache_sizes(memcg);
-        memcg_resume_kmem_account();
-        mutex_unlock(&set_limit_mutex);
-out:
        return ret;
 }
+#else
+static int memcg_update_kmem_limit(struct mem_cgroup *memcg,
+                                   unsigned long long val)
+{
+        return -EINVAL;
+}
 #endif /* CONFIG_MEMCG_KMEM */
 /*
@@ -5265,7 +5281,7 @@ static int mem_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft,
                else if (type == _MEMSWAP)
                        ret = mem_cgroup_resize_memsw_limit(memcg, val);
                else if (type == _KMEM)
-                        ret = memcg_update_kmem_limit(css, val);
+                        ret = memcg_update_kmem_limit(memcg, val);
                else
                        return -EINVAL;
                break;
@@ -6499,7 +6515,6 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
 {
        struct mem_cgroup *memcg = mem_cgroup_from_css(css);
        struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(css));
-        int error = 0;
        if (css->cgroup->id > MEM_CGROUP_ID_MAX)
                return -ENOSPC;
@@ -6534,10 +6549,9 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
                if (parent != root_mem_cgroup)
                        mem_cgroup_subsys.broken_hierarchy = true;
        }
-        error = memcg_init_kmem(memcg, &mem_cgroup_subsys);
        mutex_unlock(&memcg_create_mutex);
-        return error;
+        return memcg_init_kmem(memcg, &mem_cgroup_subsys);
 }
 /*

diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 706f7bc16db2..c8715056e1ef 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c
@@ -2977,6 +2977,8 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
2977	static DEFINE_MUTEX(set_limit_mutex);	2977	static DEFINE_MUTEX(set_limit_mutex);
2978		2978
2979	#ifdef CONFIG_MEMCG_KMEM	2979	#ifdef CONFIG_MEMCG_KMEM
		2980	static DEFINE_MUTEX(activate_kmem_mutex);
		2981
2980	static inline bool memcg_can_account_kmem(struct mem_cgroup *memcg)	2982	static inline bool memcg_can_account_kmem(struct mem_cgroup *memcg)
2981	{	2983	{
2982	return !mem_cgroup_disabled() && !mem_cgroup_is_root(memcg) &&	2984	return !mem_cgroup_disabled() && !mem_cgroup_is_root(memcg) &&
@@ -3089,34 +3091,6 @@ int memcg_cache_id(struct mem_cgroup *memcg)
3089	return memcg ? memcg->kmemcg_id : -1;	3091	return memcg ? memcg->kmemcg_id : -1;
3090	}	3092	}
3091		3093
3092	/*
3093	* This ends up being protected by the set_limit mutex, during normal
3094	* operation, because that is its main call site.
3095	*
3096	* But when we create a new cache, we can call this as well if its parent
3097	* is kmem-limited. That will have to hold set_limit_mutex as well.
3098	*/
3099	static int memcg_update_cache_sizes(struct mem_cgroup *memcg)
3100	{
3101	int num, ret;
3102
3103	num = ida_simple_get(&kmem_limited_groups,
3104	0, MEMCG_CACHES_MAX_SIZE, GFP_KERNEL);
3105	if (num < 0)
3106	return num;
3107
3108	ret = memcg_update_all_caches(num+1);
3109	if (ret) {
3110	ida_simple_remove(&kmem_limited_groups, num);
3111	return ret;
3112	}
3113
3114	memcg->kmemcg_id = num;
3115	INIT_LIST_HEAD(&memcg->memcg_slab_caches);
3116	mutex_init(&memcg->slab_caches_mutex);
3117	return 0;
3118	}
3119
3120	static size_t memcg_caches_array_size(int num_groups)	3094	static size_t memcg_caches_array_size(int num_groups)
3121	{	3095	{
3122	ssize_t size;	3096	ssize_t size;
@@ -3459,9 +3433,10 @@ void kmem_cache_destroy_memcg_children(struct kmem_cache *s)
3459	*	3433	*
3460	* Still, we don't want anyone else freeing memcg_caches under our	3434	* Still, we don't want anyone else freeing memcg_caches under our
3461	* noses, which can happen if a new memcg comes to life. As usual,	3435	* noses, which can happen if a new memcg comes to life. As usual,
3462	* we'll take the set_limit_mutex to protect ourselves against this.	3436	* we'll take the activate_kmem_mutex to protect ourselves against
		3437	* this.
3463	*/	3438	*/
3464	mutex_lock(&set_limit_mutex);	3439	mutex_lock(&activate_kmem_mutex);
3465	for_each_memcg_cache_index(i) {	3440	for_each_memcg_cache_index(i) {
3466	c = cache_from_memcg_idx(s, i);	3441	c = cache_from_memcg_idx(s, i);
3467	if (!c)	3442	if (!c)
@@ -3484,7 +3459,7 @@ void kmem_cache_destroy_memcg_children(struct kmem_cache *s)
3484	cancel_work_sync(&c->memcg_params->destroy);	3459	cancel_work_sync(&c->memcg_params->destroy);
3485	kmem_cache_destroy(c);	3460	kmem_cache_destroy(c);
3486	}	3461	}
3487	mutex_unlock(&set_limit_mutex);	3462	mutex_unlock(&activate_kmem_mutex);
3488	}	3463	}
3489		3464
3490	struct create_work {	3465	struct create_work {
@@ -5148,11 +5123,23 @@ static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
5148	return val;	5123	return val;
5149	}	5124	}
5150		5125
5151	static int memcg_update_kmem_limit(struct cgroup_subsys_state *css, u64 val)
5152	{
5153	int ret = -EINVAL;
5154	#ifdef CONFIG_MEMCG_KMEM	5126	#ifdef CONFIG_MEMCG_KMEM
5155	struct mem_cgroup *memcg = mem_cgroup_from_css(css);	5127	/* should be called with activate_kmem_mutex held */
		5128	static int __memcg_activate_kmem(struct mem_cgroup *memcg,
		5129	unsigned long long limit)
		5130	{
		5131	int err = 0;
		5132	int memcg_id;
		5133
		5134	if (memcg_kmem_is_active(memcg))
		5135	return 0;
		5136
		5137	/*
		5138	* We are going to allocate memory for data shared by all memory
		5139	* cgroups so let's stop accounting here.
		5140	*/
		5141	memcg_stop_kmem_account();
		5142
5156	/*	5143	/*
5157	* For simplicity, we won't allow this to be disabled. It also can't	5144	* For simplicity, we won't allow this to be disabled. It also can't
5158	* be changed if the cgroup has children already, or if tasks had	5145	* be changed if the cgroup has children already, or if tasks had
@@ -5166,72 +5153,101 @@ static int memcg_update_kmem_limit(struct cgroup_subsys_state *css, u64 val)
5166	* of course permitted.	5153	* of course permitted.
5167	*/	5154	*/
5168	mutex_lock(&memcg_create_mutex);	5155	mutex_lock(&memcg_create_mutex);
5169	mutex_lock(&set_limit_mutex);	5156	if (cgroup_task_count(memcg->css.cgroup) \|\| memcg_has_children(memcg))
5170	if (!memcg->kmem_account_flags && val != RES_COUNTER_MAX) {	5157	err = -EBUSY;
5171	if (cgroup_task_count(css->cgroup) \|\| memcg_has_children(memcg)) {	5158	mutex_unlock(&memcg_create_mutex);
5172	ret = -EBUSY;	5159	if (err)
5173	goto out;	5160	goto out;
5174	}
5175	ret = res_counter_set_limit(&memcg->kmem, val);
5176	VM_BUG_ON(ret);
5177		5161
5178	ret = memcg_update_cache_sizes(memcg);	5162	memcg_id = ida_simple_get(&kmem_limited_groups,
5179	if (ret) {	5163	0, MEMCG_CACHES_MAX_SIZE, GFP_KERNEL);
5180	res_counter_set_limit(&memcg->kmem, RES_COUNTER_MAX);	5164	if (memcg_id < 0) {
5181	goto out;	5165	err = memcg_id;
5182	}	5166	goto out;
5183	static_key_slow_inc(&memcg_kmem_enabled_key);	5167	}
5184	/*	5168
5185	* setting the active bit after the inc will guarantee no one	5169	/*
5186	* starts accounting before all call sites are patched	5170	* Make sure we have enough space for this cgroup in each root cache's
5187	*/	5171	* memcg_params.
5188	memcg_kmem_set_active(memcg);	5172	*/
5189	} else	5173	err = memcg_update_all_caches(memcg_id + 1);
5190	ret = res_counter_set_limit(&memcg->kmem, val);	5174	if (err)
		5175	goto out_rmid;
		5176
		5177	memcg->kmemcg_id = memcg_id;
		5178	INIT_LIST_HEAD(&memcg->memcg_slab_caches);
		5179	mutex_init(&memcg->slab_caches_mutex);
		5180
		5181	/*
		5182	* We couldn't have accounted to this cgroup, because it hasn't got the
		5183	* active bit set yet, so this should succeed.
		5184	*/
		5185	err = res_counter_set_limit(&memcg->kmem, limit);
		5186	VM_BUG_ON(err);
		5187
		5188	static_key_slow_inc(&memcg_kmem_enabled_key);
		5189	/*
		5190	* Setting the active bit after enabling static branching will
		5191	* guarantee no one starts accounting before all call sites are
		5192	* patched.
		5193	*/
		5194	memcg_kmem_set_active(memcg);
5191	out:	5195	out:
5192	mutex_unlock(&set_limit_mutex);	5196	memcg_resume_kmem_account();
5193	mutex_unlock(&memcg_create_mutex);	5197	return err;
5194	#endif	5198
		5199	out_rmid:
		5200	ida_simple_remove(&kmem_limited_groups, memcg_id);
		5201	goto out;
		5202	}
		5203
		5204	static int memcg_activate_kmem(struct mem_cgroup *memcg,
		5205	unsigned long long limit)
		5206	{
		5207	int ret;
		5208
		5209	mutex_lock(&activate_kmem_mutex);
		5210	ret = __memcg_activate_kmem(memcg, limit);
		5211	mutex_unlock(&activate_kmem_mutex);
		5212	return ret;
		5213	}
		5214
		5215	static int memcg_update_kmem_limit(struct mem_cgroup *memcg,
		5216	unsigned long long val)
		5217	{
		5218	int ret;
		5219
		5220	if (!memcg_kmem_is_active(memcg))
		5221	ret = memcg_activate_kmem(memcg, val);
		5222	else
		5223	ret = res_counter_set_limit(&memcg->kmem, val);
5195	return ret;	5224	return ret;
5196	}	5225	}
5197		5226
5198	#ifdef CONFIG_MEMCG_KMEM
5199	static int memcg_propagate_kmem(struct mem_cgroup *memcg)	5227	static int memcg_propagate_kmem(struct mem_cgroup *memcg)
5200	{	5228	{
5201	int ret = 0;	5229	int ret = 0;
5202	struct mem_cgroup *parent = parent_mem_cgroup(memcg);	5230	struct mem_cgroup *parent = parent_mem_cgroup(memcg);
5203	if (!parent)
5204	goto out;
5205		5231
5206	memcg->kmem_account_flags = parent->kmem_account_flags;	5232	if (!parent)
5207	/*	5233	return 0;
5208	* When that happen, we need to disable the static branch only on those
5209	* memcgs that enabled it. To achieve this, we would be forced to
5210	* complicate the code by keeping track of which memcgs were the ones
5211	* that actually enabled limits, and which ones got it from its
5212	* parents.
5213	*
5214	* It is a lot simpler just to do static_key_slow_inc() on every child
5215	* that is accounted.
5216	*/
5217	if (!memcg_kmem_is_active(memcg))
5218	goto out;
5219		5234
		5235	mutex_lock(&activate_kmem_mutex);
5220	/*	5236	/*
5221	* __mem_cgroup_free() will issue static_key_slow_dec() because this	5237	* If the parent cgroup is not kmem-active now, it cannot be activated
5222	* memcg is active already. If the later initialization fails then the	5238	* after this point, because it has at least one child already.
5223	* cgroup core triggers the cleanup so we do not have to do it here.
5224	*/	5239	*/
5225	static_key_slow_inc(&memcg_kmem_enabled_key);	5240	if (memcg_kmem_is_active(parent))
5226		5241	ret = __memcg_activate_kmem(memcg, RES_COUNTER_MAX);
5227	mutex_lock(&set_limit_mutex);	5242	mutex_unlock(&activate_kmem_mutex);
5228	memcg_stop_kmem_account();
5229	ret = memcg_update_cache_sizes(memcg);
5230	memcg_resume_kmem_account();
5231	mutex_unlock(&set_limit_mutex);
5232	out:
5233	return ret;	5243	return ret;
5234	}	5244	}
		5245	#else
		5246	static int memcg_update_kmem_limit(struct mem_cgroup *memcg,
		5247	unsigned long long val)
		5248	{
		5249	return -EINVAL;
		5250	}
5235	#endif /* CONFIG_MEMCG_KMEM */	5251	#endif /* CONFIG_MEMCG_KMEM */
5236		5252
5237	/*	5253	/*
@@ -5265,7 +5281,7 @@ static int mem_cgroup_write(struct cgroup_subsys_state css, struct cftype cft,
5265	else if (type == _MEMSWAP)	5281	else if (type == _MEMSWAP)
5266	ret = mem_cgroup_resize_memsw_limit(memcg, val);	5282	ret = mem_cgroup_resize_memsw_limit(memcg, val);
5267	else if (type == _KMEM)	5283	else if (type == _KMEM)
5268	ret = memcg_update_kmem_limit(css, val);	5284	ret = memcg_update_kmem_limit(memcg, val);
5269	else	5285	else
5270	return -EINVAL;	5286	return -EINVAL;
5271	break;	5287	break;
@@ -6499,7 +6515,6 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
6499	{	6515	{
6500	struct mem_cgroup *memcg = mem_cgroup_from_css(css);	6516	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
6501	struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(css));	6517	struct mem_cgroup *parent = mem_cgroup_from_css(css_parent(css));
6502	int error = 0;
6503		6518
6504	if (css->cgroup->id > MEM_CGROUP_ID_MAX)	6519	if (css->cgroup->id > MEM_CGROUP_ID_MAX)
6505	return -ENOSPC;	6520	return -ENOSPC;
@@ -6534,10 +6549,9 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
6534	if (parent != root_mem_cgroup)	6549	if (parent != root_mem_cgroup)
6535	mem_cgroup_subsys.broken_hierarchy = true;	6550	mem_cgroup_subsys.broken_hierarchy = true;
6536	}	6551	}
6537
6538	error = memcg_init_kmem(memcg, &mem_cgroup_subsys);
6539	mutex_unlock(&memcg_create_mutex);	6552	mutex_unlock(&memcg_create_mutex);
6540	return error;	6553
		6554	return memcg_init_kmem(memcg, &mem_cgroup_subsys);
6541	}	6555	}
6542		6556
6543	/*	6557	/*