3 files changed, 221 insertions, 16 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 0b69a0470007..45085e14e023 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -447,6 +447,8 @@ int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s);
 void memcg_release_cache(struct kmem_cache *cachep);
 void memcg_cache_list_add(struct mem_cgroup *memcg, struct kmem_cache *cachep);
+int memcg_update_cache_size(struct kmem_cache *s, int num_groups);
+void memcg_update_array_size(int num_groups);
 /**
 * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed.
 * @gfp: the gfp allocation flags.
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 3eafe6cf6ca4..db38b60e5f87 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -378,6 +378,11 @@ static void memcg_kmem_set_activated(struct mem_cgroup *memcg)
        set_bit(KMEM_ACCOUNTED_ACTIVATED, &memcg->kmem_account_flags);
 }
+static void memcg_kmem_clear_activated(struct mem_cgroup *memcg)
+{
+        clear_bit(KMEM_ACCOUNTED_ACTIVATED, &memcg->kmem_account_flags);
+}
 static void memcg_kmem_mark_dead(struct mem_cgroup *memcg)
 {
        if (test_bit(KMEM_ACCOUNTED_ACTIVE, &memcg->kmem_account_flags))
@@ -549,12 +554,48 @@ static void disarm_sock_keys(struct mem_cgroup *memcg)
 #endif
 #ifdef CONFIG_MEMCG_KMEM
+/*
+ * This will be the memcg's index in each cache's ->memcg_params->memcg_caches.
+ * There are two main reasons for not using the css_id for this:
+ *  1) this works better in sparse environments, where we have a lot of memcgs,
+ *     but only a few kmem-limited. Or also, if we have, for instance, 200
+ *     memcgs, and none but the 200th is kmem-limited, we'd have to have a
+ *     200 entry array for that.
+ *
+ *  2) In order not to violate the cgroup API, we would like to do all memory
+ *     allocation in ->create(). At that point, we haven't yet allocated the
+ *     css_id. Having a separate index prevents us from messing with the cgroup
+ *     core for this
+ *
+ * The current size of the caches array is stored in
+ * memcg_limited_groups_array_size.  It will double each time we have to
+ * increase it.
+ */
+static DEFINE_IDA(kmem_limited_groups);
+static int memcg_limited_groups_array_size;
+/*
+ * MIN_SIZE is different than 1, because we would like to avoid going through
+ * the alloc/free process all the time. In a small machine, 4 kmem-limited
+ * cgroups is a reasonable guess. In the future, it could be a parameter or
+ * tunable, but that is strictly not necessary.
+ *
+ * MAX_SIZE should be as large as the number of css_ids. Ideally, we could get
+ * this constant directly from cgroup, but it is understandable that this is
+ * better kept as an internal representation in cgroup.c. In any case, the
+ * css_id space is not getting any smaller, and we don't have to necessarily
+ * increase ours as well if it increases.
+ */
+#define MEMCG_CACHES_MIN_SIZE 4
+#define MEMCG_CACHES_MAX_SIZE 65535
 struct static_key memcg_kmem_enabled_key;
 static void disarm_kmem_keys(struct mem_cgroup *memcg)
 {
-        if (memcg_kmem_is_active(memcg))
+        if (memcg_kmem_is_active(memcg)) {
                static_key_slow_dec(&memcg_kmem_enabled_key);
+                ida_simple_remove(&kmem_limited_groups, memcg->kmemcg_id);
+        }
        /*
         * This check can't live in kmem destruction function,
         * since the charges will outlive the cgroup
@@ -2813,6 +2854,120 @@ int memcg_cache_id(struct mem_cgroup *memcg)
        return memcg ? memcg->kmemcg_id : -1;
 }
+/*
+ * This ends up being protected by the set_limit mutex, during normal
+ * operation, because that is its main call site.
+ *
+ * But when we create a new cache, we can call this as well if its parent
+ * is kmem-limited. That will have to hold set_limit_mutex as well.
+ */
+int memcg_update_cache_sizes(struct mem_cgroup *memcg)
+{
+        int num, ret;
+        num = ida_simple_get(&kmem_limited_groups,
+                                0, MEMCG_CACHES_MAX_SIZE, GFP_KERNEL);
+        if (num < 0)
+                return num;
+        /*
+         * After this point, kmem_accounted (that we test atomically in
+         * the beginning of this conditional), is no longer 0. This
+         * guarantees only one process will set the following boolean
+         * to true. We don't need test_and_set because we're protected
+         * by the set_limit_mutex anyway.
+         */
+        memcg_kmem_set_activated(memcg);
+        ret = memcg_update_all_caches(num+1);
+        if (ret) {
+                ida_simple_remove(&kmem_limited_groups, num);
+                memcg_kmem_clear_activated(memcg);
+                return ret;
+        }
+        memcg->kmemcg_id = num;
+        INIT_LIST_HEAD(&memcg->memcg_slab_caches);
+        mutex_init(&memcg->slab_caches_mutex);
+        return 0;
+}
+static size_t memcg_caches_array_size(int num_groups)
+{
+        ssize_t size;
+        if (num_groups <= 0)
+                return 0;
+        size = 2 * num_groups;
+        if (size < MEMCG_CACHES_MIN_SIZE)
+                size = MEMCG_CACHES_MIN_SIZE;
+        else if (size > MEMCG_CACHES_MAX_SIZE)
+                size = MEMCG_CACHES_MAX_SIZE;
+        return size;
+}
+/*
+ * We should update the current array size iff all caches updates succeed. This
+ * can only be done from the slab side. The slab mutex needs to be held when
+ * calling this.
+ */
+void memcg_update_array_size(int num)
+{
+        if (num > memcg_limited_groups_array_size)
+                memcg_limited_groups_array_size = memcg_caches_array_size(num);
+}
+int memcg_update_cache_size(struct kmem_cache *s, int num_groups)
+{
+        struct memcg_cache_params *cur_params = s->memcg_params;
+        VM_BUG_ON(s->memcg_params && !s->memcg_params->is_root_cache);
+        if (num_groups > memcg_limited_groups_array_size) {
+                int i;
+                ssize_t size = memcg_caches_array_size(num_groups);
+                size *= sizeof(void *);
+                size += sizeof(struct memcg_cache_params);
+                s->memcg_params = kzalloc(size, GFP_KERNEL);
+                if (!s->memcg_params) {
+                        s->memcg_params = cur_params;
+                        return -ENOMEM;
+                }
+                s->memcg_params->is_root_cache = true;
+                /*
+                 * There is the chance it will be bigger than
+                 * memcg_limited_groups_array_size, if we failed an allocation
+                 * in a cache, in which case all caches updated before it, will
+                 * have a bigger array.
+                 *
+                 * But if that is the case, the data after
+                 * memcg_limited_groups_array_size is certainly unused
+                 */
+                for (i = 0; i < memcg_limited_groups_array_size; i++) {
+                        if (!cur_params->memcg_caches[i])
+                                continue;
+                        s->memcg_params->memcg_caches[i] =
+                                                cur_params->memcg_caches[i];
+                }
+                /*
+                 * Ideally, we would wait until all caches succeed, and only
+                 * then free the old one. But this is not worth the extra
+                 * pointer per-cache we'd have to have for this.
+                 *
+                 * It is not a big deal if some caches are left with a size
+                 * bigger than the others. And all updates will reset this
+                 * anyway.
+                 */
+                kfree(cur_params);
+        }
+        return 0;
+}
 int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s)
 {
        size_t size = sizeof(struct memcg_cache_params);
@@ -2820,6 +2975,9 @@ int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s)
        if (!memcg_kmem_enabled())
                return 0;
+        if (!memcg)
+                size += memcg_limited_groups_array_size * sizeof(void *);
        s->memcg_params = kzalloc(size, GFP_KERNEL);
        if (!s->memcg_params)
                return -ENOMEM;
@@ -4326,14 +4484,11 @@ static int memcg_update_kmem_limit(struct cgroup *cont, u64 val)
                ret = res_counter_set_limit(&memcg->kmem, val);
                VM_BUG_ON(ret);
-                /*
+                ret = memcg_update_cache_sizes(memcg);
-                 * After this point, kmem_accounted (that we test atomically in
+                if (ret) {
-                 * the beginning of this conditional), is no longer 0. This
+                        res_counter_set_limit(&memcg->kmem, RESOURCE_MAX);
-                 * guarantees only one process will set the following boolean
+                        goto out;
-                 * to true. We don't need test_and_set because we're protected
+                }
-                 * by the set_limit_mutex anyway.
-                 */
-                memcg_kmem_set_activated(memcg);
                must_inc_static_branch = true;
                /*
                 * kmem charges can outlive the cgroup. In the case of slab
@@ -4372,11 +4527,13 @@ out:
        return ret;
 }
-static void memcg_propagate_kmem(struct mem_cgroup *memcg)
+static int memcg_propagate_kmem(struct mem_cgroup *memcg)
 {
+        int ret = 0;
        struct mem_cgroup *parent = parent_mem_cgroup(memcg);
        if (!parent)
-                return;
+                goto out;
        memcg->kmem_account_flags = parent->kmem_account_flags;
 #ifdef CONFIG_MEMCG_KMEM
        /*
@@ -4389,11 +4546,24 @@ static void memcg_propagate_kmem(struct mem_cgroup *memcg)
         * It is a lot simpler just to do static_key_slow_inc() on every child
         * that is accounted.
         */
-        if (memcg_kmem_is_active(memcg)) {
+        if (!memcg_kmem_is_active(memcg))
-                mem_cgroup_get(memcg);
+                goto out;
-                static_key_slow_inc(&memcg_kmem_enabled_key);
-        }
+        /*
+         * destroy(), called if we fail, will issue static_key_slow_inc() and
+         * mem_cgroup_put() if kmem is enabled. We have to either call them
+         * unconditionally, or clear the KMEM_ACTIVE flag. I personally find
+         * this more consistent, since it always leads to the same destroy path
+         */
+        mem_cgroup_get(memcg);
+        static_key_slow_inc(&memcg_kmem_enabled_key);
+        mutex_lock(&set_limit_mutex);
+        ret = memcg_update_cache_sizes(memcg);
+        mutex_unlock(&set_limit_mutex);
 #endif
+out:
+        return ret;
 }
 /*
@@ -5075,8 +5245,12 @@ static int mem_cgroup_oom_control_write(struct cgroup *cgrp,
 #ifdef CONFIG_MEMCG_KMEM
 static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
 {
+        int ret;
        memcg->kmemcg_id = -1;
-        memcg_propagate_kmem(memcg);
+        ret = memcg_propagate_kmem(memcg);
+        if (ret)
+                return ret;
        return mem_cgroup_sockets_init(memcg, ss);
 };
@@ -5479,6 +5653,7 @@ mem_cgroup_css_alloc(struct cgroup *cont)
                res_counter_init(&memcg->res, &parent->res);
                res_counter_init(&memcg->memsw, &parent->memsw);
                res_counter_init(&memcg->kmem, &parent->kmem);
                /*
                 * We increment refcnt of the parent to ensure that we can
                 * safely access it on res_counter_charge/uncharge.
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 3031badcc577..1c424b6511bf 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -81,6 +81,34 @@ static inline int kmem_cache_sanity_check(struct mem_cgroup *memcg,
 }
 #endif
+#ifdef CONFIG_MEMCG_KMEM
+int memcg_update_all_caches(int num_memcgs)
+{
+        struct kmem_cache *s;
+        int ret = 0;
+        mutex_lock(&slab_mutex);
+        list_for_each_entry(s, &slab_caches, list) {
+                if (!is_root_cache(s))
+                        continue;
+                ret = memcg_update_cache_size(s, num_memcgs);
+                /*
+                 * See comment in memcontrol.c, memcg_update_cache_size:
+                 * Instead of freeing the memory, we'll just leave the caches
+                 * up to this point in an updated state.
+                 */
+                if (ret)
+                        goto out;
+        }
+        memcg_update_array_size(num_memcgs);
+out:
+        mutex_unlock(&slab_mutex);
+        return ret;
+}
+#endif
 /*
 * Figure out what the alignment of the objects will be given a set of
 * flags, a user specified alignment and the size of the objects.

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 0b69a0470007..45085e14e023 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h
@@ -447,6 +447,8 @@ int memcg_register_cache(struct mem_cgroup memcg, struct kmem_cache s);
447	void memcg_release_cache(struct kmem_cache *cachep);	447	void memcg_release_cache(struct kmem_cache *cachep);
448	void memcg_cache_list_add(struct mem_cgroup memcg, struct kmem_cache cachep);	448	void memcg_cache_list_add(struct mem_cgroup memcg, struct kmem_cache cachep);
449		449
		450	int memcg_update_cache_size(struct kmem_cache *s, int num_groups);
		451	void memcg_update_array_size(int num_groups);
450	/**	452	/**
451	* memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed.	453	* memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed.
452	* @gfp: the gfp allocation flags.	454	* @gfp: the gfp allocation flags.


diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 3eafe6cf6ca4..db38b60e5f87 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c
@@ -378,6 +378,11 @@ static void memcg_kmem_set_activated(struct mem_cgroup *memcg)
378	set_bit(KMEM_ACCOUNTED_ACTIVATED, &memcg->kmem_account_flags);	378	set_bit(KMEM_ACCOUNTED_ACTIVATED, &memcg->kmem_account_flags);
379	}	379	}
380		380
		381	static void memcg_kmem_clear_activated(struct mem_cgroup *memcg)
		382	{
		383	clear_bit(KMEM_ACCOUNTED_ACTIVATED, &memcg->kmem_account_flags);
		384	}
		385
381	static void memcg_kmem_mark_dead(struct mem_cgroup *memcg)	386	static void memcg_kmem_mark_dead(struct mem_cgroup *memcg)
382	{	387	{
383	if (test_bit(KMEM_ACCOUNTED_ACTIVE, &memcg->kmem_account_flags))	388	if (test_bit(KMEM_ACCOUNTED_ACTIVE, &memcg->kmem_account_flags))
@@ -549,12 +554,48 @@ static void disarm_sock_keys(struct mem_cgroup *memcg)
549	#endif	554	#endif
550		555
551	#ifdef CONFIG_MEMCG_KMEM	556	#ifdef CONFIG_MEMCG_KMEM
		557	/*
		558	* This will be the memcg's index in each cache's ->memcg_params->memcg_caches.
		559	* There are two main reasons for not using the css_id for this:
		560	* 1) this works better in sparse environments, where we have a lot of memcgs,
		561	* but only a few kmem-limited. Or also, if we have, for instance, 200
		562	* memcgs, and none but the 200th is kmem-limited, we'd have to have a
		563	* 200 entry array for that.
		564	*
		565	* 2) In order not to violate the cgroup API, we would like to do all memory
		566	* allocation in ->create(). At that point, we haven't yet allocated the
		567	* css_id. Having a separate index prevents us from messing with the cgroup
		568	* core for this
		569	*
		570	* The current size of the caches array is stored in
		571	* memcg_limited_groups_array_size. It will double each time we have to
		572	* increase it.
		573	*/
		574	static DEFINE_IDA(kmem_limited_groups);
		575	static int memcg_limited_groups_array_size;
		576	/*
		577	* MIN_SIZE is different than 1, because we would like to avoid going through
		578	* the alloc/free process all the time. In a small machine, 4 kmem-limited
		579	* cgroups is a reasonable guess. In the future, it could be a parameter or
		580	* tunable, but that is strictly not necessary.
		581	*
		582	* MAX_SIZE should be as large as the number of css_ids. Ideally, we could get
		583	* this constant directly from cgroup, but it is understandable that this is
		584	* better kept as an internal representation in cgroup.c. In any case, the
		585	* css_id space is not getting any smaller, and we don't have to necessarily
		586	* increase ours as well if it increases.
		587	*/
		588	#define MEMCG_CACHES_MIN_SIZE 4
		589	#define MEMCG_CACHES_MAX_SIZE 65535
		590
552	struct static_key memcg_kmem_enabled_key;	591	struct static_key memcg_kmem_enabled_key;
553		592
554	static void disarm_kmem_keys(struct mem_cgroup *memcg)	593	static void disarm_kmem_keys(struct mem_cgroup *memcg)
555	{	594	{
556	if (memcg_kmem_is_active(memcg))	595	if (memcg_kmem_is_active(memcg)) {
557	static_key_slow_dec(&memcg_kmem_enabled_key);	596	static_key_slow_dec(&memcg_kmem_enabled_key);
		597	ida_simple_remove(&kmem_limited_groups, memcg->kmemcg_id);
		598	}
558	/*	599	/*
559	* This check can't live in kmem destruction function,	600	* This check can't live in kmem destruction function,
560	* since the charges will outlive the cgroup	601	* since the charges will outlive the cgroup
@@ -2813,6 +2854,120 @@ int memcg_cache_id(struct mem_cgroup *memcg)
2813	return memcg ? memcg->kmemcg_id : -1;	2854	return memcg ? memcg->kmemcg_id : -1;
2814	}	2855	}
2815		2856
		2857	/*
		2858	* This ends up being protected by the set_limit mutex, during normal
		2859	* operation, because that is its main call site.
		2860	*
		2861	* But when we create a new cache, we can call this as well if its parent
		2862	* is kmem-limited. That will have to hold set_limit_mutex as well.
		2863	*/
		2864	int memcg_update_cache_sizes(struct mem_cgroup *memcg)
		2865	{
		2866	int num, ret;
		2867
		2868	num = ida_simple_get(&kmem_limited_groups,
		2869	0, MEMCG_CACHES_MAX_SIZE, GFP_KERNEL);
		2870	if (num < 0)
		2871	return num;
		2872	/*
		2873	* After this point, kmem_accounted (that we test atomically in
		2874	* the beginning of this conditional), is no longer 0. This
		2875	* guarantees only one process will set the following boolean
		2876	* to true. We don't need test_and_set because we're protected
		2877	* by the set_limit_mutex anyway.
		2878	*/
		2879	memcg_kmem_set_activated(memcg);
		2880
		2881	ret = memcg_update_all_caches(num+1);
		2882	if (ret) {
		2883	ida_simple_remove(&kmem_limited_groups, num);
		2884	memcg_kmem_clear_activated(memcg);
		2885	return ret;
		2886	}
		2887
		2888	memcg->kmemcg_id = num;
		2889	INIT_LIST_HEAD(&memcg->memcg_slab_caches);
		2890	mutex_init(&memcg->slab_caches_mutex);
		2891	return 0;
		2892	}
		2893
		2894	static size_t memcg_caches_array_size(int num_groups)
		2895	{
		2896	ssize_t size;
		2897	if (num_groups <= 0)
		2898	return 0;
		2899
		2900	size = 2 * num_groups;
		2901	if (size < MEMCG_CACHES_MIN_SIZE)
		2902	size = MEMCG_CACHES_MIN_SIZE;
		2903	else if (size > MEMCG_CACHES_MAX_SIZE)
		2904	size = MEMCG_CACHES_MAX_SIZE;
		2905
		2906	return size;
		2907	}
		2908
		2909	/*
		2910	* We should update the current array size iff all caches updates succeed. This
		2911	* can only be done from the slab side. The slab mutex needs to be held when
		2912	* calling this.
		2913	*/
		2914	void memcg_update_array_size(int num)
		2915	{
		2916	if (num > memcg_limited_groups_array_size)
		2917	memcg_limited_groups_array_size = memcg_caches_array_size(num);
		2918	}
		2919
		2920	int memcg_update_cache_size(struct kmem_cache *s, int num_groups)
		2921	{
		2922	struct memcg_cache_params *cur_params = s->memcg_params;
		2923
		2924	VM_BUG_ON(s->memcg_params && !s->memcg_params->is_root_cache);
		2925
		2926	if (num_groups > memcg_limited_groups_array_size) {
		2927	int i;
		2928	ssize_t size = memcg_caches_array_size(num_groups);
		2929
		2930	size = sizeof(void );
		2931	size += sizeof(struct memcg_cache_params);
		2932
		2933	s->memcg_params = kzalloc(size, GFP_KERNEL);
		2934	if (!s->memcg_params) {
		2935	s->memcg_params = cur_params;
		2936	return -ENOMEM;
		2937	}
		2938
		2939	s->memcg_params->is_root_cache = true;
		2940
		2941	/*
		2942	* There is the chance it will be bigger than
		2943	* memcg_limited_groups_array_size, if we failed an allocation
		2944	* in a cache, in which case all caches updated before it, will
		2945	* have a bigger array.
		2946	*
		2947	* But if that is the case, the data after
		2948	* memcg_limited_groups_array_size is certainly unused
		2949	*/
		2950	for (i = 0; i < memcg_limited_groups_array_size; i++) {
		2951	if (!cur_params->memcg_caches[i])
		2952	continue;
		2953	s->memcg_params->memcg_caches[i] =
		2954	cur_params->memcg_caches[i];
		2955	}
		2956
		2957	/*
		2958	* Ideally, we would wait until all caches succeed, and only
		2959	* then free the old one. But this is not worth the extra
		2960	* pointer per-cache we'd have to have for this.
		2961	*
		2962	* It is not a big deal if some caches are left with a size
		2963	* bigger than the others. And all updates will reset this
		2964	* anyway.
		2965	*/
		2966	kfree(cur_params);
		2967	}
		2968	return 0;
		2969	}
		2970
2816	int memcg_register_cache(struct mem_cgroup memcg, struct kmem_cache s)	2971	int memcg_register_cache(struct mem_cgroup memcg, struct kmem_cache s)
2817	{	2972	{
2818	size_t size = sizeof(struct memcg_cache_params);	2973	size_t size = sizeof(struct memcg_cache_params);
@@ -2820,6 +2975,9 @@ int memcg_register_cache(struct mem_cgroup memcg, struct kmem_cache s)
2820	if (!memcg_kmem_enabled())	2975	if (!memcg_kmem_enabled())
2821	return 0;	2976	return 0;
2822		2977
		2978	if (!memcg)
		2979	size += memcg_limited_groups_array_size * sizeof(void *);
		2980
2823	s->memcg_params = kzalloc(size, GFP_KERNEL);	2981	s->memcg_params = kzalloc(size, GFP_KERNEL);
2824	if (!s->memcg_params)	2982	if (!s->memcg_params)
2825	return -ENOMEM;	2983	return -ENOMEM;
@@ -4326,14 +4484,11 @@ static int memcg_update_kmem_limit(struct cgroup *cont, u64 val)
4326	ret = res_counter_set_limit(&memcg->kmem, val);	4484	ret = res_counter_set_limit(&memcg->kmem, val);
4327	VM_BUG_ON(ret);	4485	VM_BUG_ON(ret);
4328		4486
4329	/*	4487	ret = memcg_update_cache_sizes(memcg);
4330	* After this point, kmem_accounted (that we test atomically in	4488	if (ret) {
4331	* the beginning of this conditional), is no longer 0. This	4489	res_counter_set_limit(&memcg->kmem, RESOURCE_MAX);
4332	* guarantees only one process will set the following boolean	4490	goto out;
4333	* to true. We don't need test_and_set because we're protected	4491	}
4334	* by the set_limit_mutex anyway.
4335	*/
4336	memcg_kmem_set_activated(memcg);
4337	must_inc_static_branch = true;	4492	must_inc_static_branch = true;
4338	/*	4493	/*
4339	* kmem charges can outlive the cgroup. In the case of slab	4494	* kmem charges can outlive the cgroup. In the case of slab
@@ -4372,11 +4527,13 @@ out:
4372	return ret;	4527	return ret;
4373	}	4528	}
4374		4529
4375	static void memcg_propagate_kmem(struct mem_cgroup *memcg)	4530	static int memcg_propagate_kmem(struct mem_cgroup *memcg)
4376	{	4531	{
		4532	int ret = 0;
4377	struct mem_cgroup *parent = parent_mem_cgroup(memcg);	4533	struct mem_cgroup *parent = parent_mem_cgroup(memcg);
4378	if (!parent)	4534	if (!parent)
4379	return;	4535	goto out;
		4536
4380	memcg->kmem_account_flags = parent->kmem_account_flags;	4537	memcg->kmem_account_flags = parent->kmem_account_flags;
4381	#ifdef CONFIG_MEMCG_KMEM	4538	#ifdef CONFIG_MEMCG_KMEM
4382	/*	4539	/*
@@ -4389,11 +4546,24 @@ static void memcg_propagate_kmem(struct mem_cgroup *memcg)
4389	* It is a lot simpler just to do static_key_slow_inc() on every child	4546	* It is a lot simpler just to do static_key_slow_inc() on every child
4390	* that is accounted.	4547	* that is accounted.
4391	*/	4548	*/
4392	if (memcg_kmem_is_active(memcg)) {	4549	if (!memcg_kmem_is_active(memcg))
4393	mem_cgroup_get(memcg);	4550	goto out;
4394	static_key_slow_inc(&memcg_kmem_enabled_key);	4551
4395	}	4552	/*
		4553	* destroy(), called if we fail, will issue static_key_slow_inc() and
		4554	* mem_cgroup_put() if kmem is enabled. We have to either call them
		4555	* unconditionally, or clear the KMEM_ACTIVE flag. I personally find
		4556	* this more consistent, since it always leads to the same destroy path
		4557	*/
		4558	mem_cgroup_get(memcg);
		4559	static_key_slow_inc(&memcg_kmem_enabled_key);
		4560
		4561	mutex_lock(&set_limit_mutex);
		4562	ret = memcg_update_cache_sizes(memcg);
		4563	mutex_unlock(&set_limit_mutex);
4396	#endif	4564	#endif
		4565	out:
		4566	return ret;
4397	}	4567	}
4398		4568
4399	/*	4569	/*
@@ -5075,8 +5245,12 @@ static int mem_cgroup_oom_control_write(struct cgroup *cgrp,
5075	#ifdef CONFIG_MEMCG_KMEM	5245	#ifdef CONFIG_MEMCG_KMEM
5076	static int memcg_init_kmem(struct mem_cgroup memcg, struct cgroup_subsys ss)	5246	static int memcg_init_kmem(struct mem_cgroup memcg, struct cgroup_subsys ss)
5077	{	5247	{
		5248	int ret;
		5249
5078	memcg->kmemcg_id = -1;	5250	memcg->kmemcg_id = -1;
5079	memcg_propagate_kmem(memcg);	5251	ret = memcg_propagate_kmem(memcg);
		5252	if (ret)
		5253	return ret;
5080		5254
5081	return mem_cgroup_sockets_init(memcg, ss);	5255	return mem_cgroup_sockets_init(memcg, ss);
5082	};	5256	};
@@ -5479,6 +5653,7 @@ mem_cgroup_css_alloc(struct cgroup *cont)
5479	res_counter_init(&memcg->res, &parent->res);	5653	res_counter_init(&memcg->res, &parent->res);
5480	res_counter_init(&memcg->memsw, &parent->memsw);	5654	res_counter_init(&memcg->memsw, &parent->memsw);
5481	res_counter_init(&memcg->kmem, &parent->kmem);	5655	res_counter_init(&memcg->kmem, &parent->kmem);
		5656
5482	/*	5657	/*
5483	* We increment refcnt of the parent to ensure that we can	5658	* We increment refcnt of the parent to ensure that we can
5484	* safely access it on res_counter_charge/uncharge.	5659	* safely access it on res_counter_charge/uncharge.


diff --git a/mm/slab_common.c b/mm/slab_common.c index 3031badcc577..1c424b6511bf 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c
@@ -81,6 +81,34 @@ static inline int kmem_cache_sanity_check(struct mem_cgroup *memcg,
81	}	81	}
82	#endif	82	#endif
83		83
		84	#ifdef CONFIG_MEMCG_KMEM
		85	int memcg_update_all_caches(int num_memcgs)
		86	{
		87	struct kmem_cache *s;
		88	int ret = 0;
		89	mutex_lock(&slab_mutex);
		90
		91	list_for_each_entry(s, &slab_caches, list) {
		92	if (!is_root_cache(s))
		93	continue;
		94
		95	ret = memcg_update_cache_size(s, num_memcgs);
		96	/*
		97	* See comment in memcontrol.c, memcg_update_cache_size:
		98	* Instead of freeing the memory, we'll just leave the caches
		99	* up to this point in an updated state.
		100	*/
		101	if (ret)
		102	goto out;
		103	}
		104
		105	memcg_update_array_size(num_memcgs);
		106	out:
		107	mutex_unlock(&slab_mutex);
		108	return ret;
		109	}
		110	#endif
		111
84	/*	112	/*
85	* Figure out what the alignment of the objects will be given a set of	113	* Figure out what the alignment of the objects will be given a set of
86	* flags, a user specified alignment and the size of the objects.	114	* flags, a user specified alignment and the size of the objects.