2 files changed, 55 insertions, 3 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9914c662ed7b..f712465b05c5 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1597,6 +1597,7 @@ struct task_struct {
                unsigned long nr_pages; /* uncharged usage */
                unsigned long memsw_nr_pages; /* uncharged mem+swap usage */
        } memcg_batch;
+        unsigned int memcg_kmem_skip_account;
 #endif
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
        atomic_t ptrace_bp_refcnt;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index efd26620a60b..65302a083d2f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3025,6 +3025,37 @@ out:
        kfree(s->memcg_params);
 }
+/*
+ * During the creation a new cache, we need to disable our accounting mechanism
+ * altogether. This is true even if we are not creating, but rather just
+ * enqueing new caches to be created.
+ *
+ * This is because that process will trigger allocations; some visible, like
+ * explicit kmallocs to auxiliary data structures, name strings and internal
+ * cache structures; some well concealed, like INIT_WORK() that can allocate
+ * objects during debug.
+ *
+ * If any allocation happens during memcg_kmem_get_cache, we will recurse back
+ * to it. This may not be a bounded recursion: since the first cache creation
+ * failed to complete (waiting on the allocation), we'll just try to create the
+ * cache again, failing at the same point.
+ *
+ * memcg_kmem_get_cache is prepared to abort after seeing a positive count of
+ * memcg_kmem_skip_account. So we enclose anything that might allocate memory
+ * inside the following two functions.
+ */
+static inline void memcg_stop_kmem_account(void)
+{
+        VM_BUG_ON(!current->mm);
+        current->memcg_kmem_skip_account++;
+}
+static inline void memcg_resume_kmem_account(void)
+{
+        VM_BUG_ON(!current->mm);
+        current->memcg_kmem_skip_account--;
+}
 static char *memcg_cache_name(struct mem_cgroup *memcg, struct kmem_cache *s)
 {
        char *name;
@@ -3084,7 +3115,6 @@ static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
                goto out;
        new_cachep = kmem_cache_dup(memcg, cachep);
        if (new_cachep == NULL) {
                new_cachep = cachep;
                goto out;
@@ -3125,8 +3155,8 @@ static void memcg_create_cache_work_func(struct work_struct *w)
 * Enqueue the creation of a per-memcg kmem_cache.
 * Called with rcu_read_lock.
 */
-static void memcg_create_cache_enqueue(struct mem_cgroup *memcg,
+static void __memcg_create_cache_enqueue(struct mem_cgroup *memcg,
-                                       struct kmem_cache *cachep)
+                                         struct kmem_cache *cachep)
 {
        struct create_work *cw;
@@ -3147,6 +3177,24 @@ static void memcg_create_cache_enqueue(struct mem_cgroup *memcg,
        schedule_work(&cw->work);
 }
+static void memcg_create_cache_enqueue(struct mem_cgroup *memcg,
+                                       struct kmem_cache *cachep)
+{
+        /*
+         * We need to stop accounting when we kmalloc, because if the
+         * corresponding kmalloc cache is not yet created, the first allocation
+         * in __memcg_create_cache_enqueue will recurse.
+         *
+         * However, it is better to enclose the whole function. Depending on
+         * the debugging options enabled, INIT_WORK(), for instance, can
+         * trigger an allocation. This too, will make us recurse. Because at
+         * this point we can't allow ourselves back into memcg_kmem_get_cache,
+         * the safest choice is to do it like this, wrapping the whole function.
+         */
+        memcg_stop_kmem_account();
+        __memcg_create_cache_enqueue(memcg, cachep);
+        memcg_resume_kmem_account();
+}
 /*
 * Return the kmem_cache we're supposed to use for a slab allocation.
 * We try to use the current memcg's version of the cache.
@@ -3169,6 +3217,9 @@ struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep,
        VM_BUG_ON(!cachep->memcg_params);
        VM_BUG_ON(!cachep->memcg_params->is_root_cache);
+        if (!current->mm || current->memcg_kmem_skip_account)
+                return cachep;
        rcu_read_lock();
        memcg = mem_cgroup_from_task(rcu_dereference(current->mm->owner));
        rcu_read_unlock();

diff --git a/include/linux/sched.h b/include/linux/sched.h index 9914c662ed7b..f712465b05c5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h
@@ -1597,6 +1597,7 @@ struct task_struct {
1597	unsigned long nr_pages; /* uncharged usage */	1597	unsigned long nr_pages; /* uncharged usage */
1598	unsigned long memsw_nr_pages; /* uncharged mem+swap usage */	1598	unsigned long memsw_nr_pages; /* uncharged mem+swap usage */
1599	} memcg_batch;	1599	} memcg_batch;
		1600	unsigned int memcg_kmem_skip_account;
1600	#endif	1601	#endif
1601	#ifdef CONFIG_HAVE_HW_BREAKPOINT	1602	#ifdef CONFIG_HAVE_HW_BREAKPOINT
1602	atomic_t ptrace_bp_refcnt;	1603	atomic_t ptrace_bp_refcnt;


diff --git a/mm/memcontrol.c b/mm/memcontrol.c index efd26620a60b..65302a083d2f 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c
@@ -3025,6 +3025,37 @@ out:
3025	kfree(s->memcg_params);	3025	kfree(s->memcg_params);
3026	}	3026	}
3027		3027
		3028	/*
		3029	* During the creation a new cache, we need to disable our accounting mechanism
		3030	* altogether. This is true even if we are not creating, but rather just
		3031	* enqueing new caches to be created.
		3032	*
		3033	* This is because that process will trigger allocations; some visible, like
		3034	* explicit kmallocs to auxiliary data structures, name strings and internal
		3035	* cache structures; some well concealed, like INIT_WORK() that can allocate
		3036	* objects during debug.
		3037	*
		3038	* If any allocation happens during memcg_kmem_get_cache, we will recurse back
		3039	* to it. This may not be a bounded recursion: since the first cache creation
		3040	* failed to complete (waiting on the allocation), we'll just try to create the
		3041	* cache again, failing at the same point.
		3042	*
		3043	* memcg_kmem_get_cache is prepared to abort after seeing a positive count of
		3044	* memcg_kmem_skip_account. So we enclose anything that might allocate memory
		3045	* inside the following two functions.
		3046	*/
		3047	static inline void memcg_stop_kmem_account(void)
		3048	{
		3049	VM_BUG_ON(!current->mm);
		3050	current->memcg_kmem_skip_account++;
		3051	}
		3052
		3053	static inline void memcg_resume_kmem_account(void)
		3054	{
		3055	VM_BUG_ON(!current->mm);
		3056	current->memcg_kmem_skip_account--;
		3057	}
		3058
3028	static char memcg_cache_name(struct mem_cgroup memcg, struct kmem_cache *s)	3059	static char memcg_cache_name(struct mem_cgroup memcg, struct kmem_cache *s)
3029	{	3060	{
3030	char *name;	3061	char *name;
@@ -3084,7 +3115,6 @@ static struct kmem_cache memcg_create_kmem_cache(struct mem_cgroup memcg,
3084	goto out;	3115	goto out;
3085		3116
3086	new_cachep = kmem_cache_dup(memcg, cachep);	3117	new_cachep = kmem_cache_dup(memcg, cachep);
3087
3088	if (new_cachep == NULL) {	3118	if (new_cachep == NULL) {
3089	new_cachep = cachep;	3119	new_cachep = cachep;
3090	goto out;	3120	goto out;
@@ -3125,8 +3155,8 @@ static void memcg_create_cache_work_func(struct work_struct *w)
3125	* Enqueue the creation of a per-memcg kmem_cache.	3155	* Enqueue the creation of a per-memcg kmem_cache.
3126	* Called with rcu_read_lock.	3156	* Called with rcu_read_lock.
3127	*/	3157	*/
3128	static void memcg_create_cache_enqueue(struct mem_cgroup *memcg,	3158	static void __memcg_create_cache_enqueue(struct mem_cgroup *memcg,
3129	struct kmem_cache *cachep)	3159	struct kmem_cache *cachep)
3130	{	3160	{
3131	struct create_work *cw;	3161	struct create_work *cw;
3132		3162
@@ -3147,6 +3177,24 @@ static void memcg_create_cache_enqueue(struct mem_cgroup *memcg,
3147	schedule_work(&cw->work);	3177	schedule_work(&cw->work);
3148	}	3178	}
3149		3179
		3180	static void memcg_create_cache_enqueue(struct mem_cgroup *memcg,
		3181	struct kmem_cache *cachep)
		3182	{
		3183	/*
		3184	* We need to stop accounting when we kmalloc, because if the
		3185	* corresponding kmalloc cache is not yet created, the first allocation
		3186	* in __memcg_create_cache_enqueue will recurse.
		3187	*
		3188	* However, it is better to enclose the whole function. Depending on
		3189	* the debugging options enabled, INIT_WORK(), for instance, can
		3190	* trigger an allocation. This too, will make us recurse. Because at
		3191	* this point we can't allow ourselves back into memcg_kmem_get_cache,
		3192	* the safest choice is to do it like this, wrapping the whole function.
		3193	*/
		3194	memcg_stop_kmem_account();
		3195	__memcg_create_cache_enqueue(memcg, cachep);
		3196	memcg_resume_kmem_account();
		3197	}
3150	/*	3198	/*
3151	* Return the kmem_cache we're supposed to use for a slab allocation.	3199	* Return the kmem_cache we're supposed to use for a slab allocation.
3152	* We try to use the current memcg's version of the cache.	3200	* We try to use the current memcg's version of the cache.
@@ -3169,6 +3217,9 @@ struct kmem_cache __memcg_kmem_get_cache(struct kmem_cache cachep,
3169	VM_BUG_ON(!cachep->memcg_params);	3217	VM_BUG_ON(!cachep->memcg_params);
3170	VM_BUG_ON(!cachep->memcg_params->is_root_cache);	3218	VM_BUG_ON(!cachep->memcg_params->is_root_cache);
3171		3219
		3220	if (!current->mm \|\| current->memcg_kmem_skip_account)
		3221	return cachep;
		3222
3172	rcu_read_lock();	3223	rcu_read_lock();
3173	memcg = mem_cgroup_from_task(rcu_dereference(current->mm->owner));	3224	memcg = mem_cgroup_from_task(rcu_dereference(current->mm->owner));
3174	rcu_read_unlock();	3225	rcu_read_unlock();