4 files changed, 4 insertions, 69 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 5155d09e749d..087a45314181 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -509,7 +509,6 @@ __memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp);
 int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size);
 void memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size);
-void mem_cgroup_destroy_cache(struct kmem_cache *cachep);
 int __kmem_cache_destroy_memcg_children(struct kmem_cache *s);
 /**
diff --git a/include/linux/slab.h b/include/linux/slab.h
index a6aab2c0dfc5..905541dd3778 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -524,7 +524,6 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
 * @memcg: pointer to the memcg this cache belongs to
 * @list: list_head for the list of all caches in this memcg
 * @root_cache: pointer to the global, root cache, this cache was derived from
- * @dead: set to true after the memcg dies; the cache may still be around.
 * @nr_pages: number of pages that belongs to this cache.
 * @destroy: worker to be called whenever we are ready, or believe we may be
 *           ready, to destroy this cache.
@@ -540,7 +539,6 @@ struct memcg_cache_params {
                        struct mem_cgroup *memcg;
                        struct list_head list;
                        struct kmem_cache *root_cache;
-                        bool dead;
                        atomic_t nr_pages;
                        struct work_struct destroy;
                };
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9f4ff49c6add..6b1c45ced733 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3277,60 +3277,11 @@ static void kmem_cache_destroy_work_func(struct work_struct *w)
        cachep = memcg_params_to_cache(p);
-        /*
+        kmem_cache_shrink(cachep);
-         * If we get down to 0 after shrink, we could delete right away.
+        if (atomic_read(&cachep->memcg_params->nr_pages) == 0)
-         * However, memcg_release_pages() already puts us back in the workqueue
-         * in that case. If we proceed deleting, we'll get a dangling
-         * reference, and removing the object from the workqueue in that case
-         * is unnecessary complication. We are not a fast path.
-         *
-         * Note that this case is fundamentally different from racing with
-         * shrink_slab(): if memcg_cgroup_destroy_cache() is called in
-         * kmem_cache_shrink, not only we would be reinserting a dead cache
-         * into the queue, but doing so from inside the worker racing to
-         * destroy it.
-         *
-         * So if we aren't down to zero, we'll just schedule a worker and try
-         * again
-         */
-        if (atomic_read(&cachep->memcg_params->nr_pages) != 0)
-                kmem_cache_shrink(cachep);
-        else
                kmem_cache_destroy(cachep);
 }
-void mem_cgroup_destroy_cache(struct kmem_cache *cachep)
-{
-        if (!cachep->memcg_params->dead)
-                return;
-        /*
-         * There are many ways in which we can get here.
-         *
-         * We can get to a memory-pressure situation while the delayed work is
-         * still pending to run. The vmscan shrinkers can then release all
-         * cache memory and get us to destruction. If this is the case, we'll
-         * be executed twice, which is a bug (the second time will execute over
-         * bogus data). In this case, cancelling the work should be fine.
-         *
-         * But we can also get here from the worker itself, if
-         * kmem_cache_shrink is enough to shake all the remaining objects and
-         * get the page count to 0. In this case, we'll deadlock if we try to
-         * cancel the work (the worker runs with an internal lock held, which
-         * is the same lock we would hold for cancel_work_sync().)
-         *
-         * Since we can't possibly know who got us here, just refrain from
-         * running if there is already work pending
-         */
-        if (work_pending(&cachep->memcg_params->destroy))
-                return;
-        /*
-         * We have to defer the actual destroying to a workqueue, because
-         * we might currently be in a context that cannot sleep.
-         */
-        schedule_work(&cachep->memcg_params->destroy);
-}
 int __kmem_cache_destroy_memcg_children(struct kmem_cache *s)
 {
        struct kmem_cache *c;
@@ -3356,16 +3307,7 @@ int __kmem_cache_destroy_memcg_children(struct kmem_cache *s)
                 * We will now manually delete the caches, so to avoid races
                 * we need to cancel all pending destruction workers and
                 * proceed with destruction ourselves.
-                 *
-                 * kmem_cache_destroy() will call kmem_cache_shrink internally,
-                 * and that could spawn the workers again: it is likely that
-                 * the cache still have active pages until this very moment.
-                 * This would lead us back to mem_cgroup_destroy_cache.
-                 *
-                 * But that will not execute at all if the "dead" flag is not
-                 * set, so flip it down to guarantee we are in control.
                 */
-                c->memcg_params->dead = false;
                cancel_work_sync(&c->memcg_params->destroy);
                kmem_cache_destroy(c);
@@ -3387,7 +3329,6 @@ static void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg)
        mutex_lock(&memcg->slab_caches_mutex);
        list_for_each_entry(params, &memcg->memcg_slab_caches, list) {
                cachep = memcg_params_to_cache(params);
-                cachep->memcg_params->dead = true;
                schedule_work(&cachep->memcg_params->destroy);
        }
        mutex_unlock(&memcg->slab_caches_mutex);
diff --git a/mm/slab.h b/mm/slab.h
index d85d59803d5f..b59447ac4533 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -129,11 +129,8 @@ static inline void memcg_bind_pages(struct kmem_cache *s, int order)
 static inline void memcg_release_pages(struct kmem_cache *s, int order)
 {
-        if (is_root_cache(s))
+        if (!is_root_cache(s))
-                return;
+                atomic_sub(1 << order, &s->memcg_params->nr_pages);
-        if (atomic_sub_and_test((1 << order), &s->memcg_params->nr_pages))
-                mem_cgroup_destroy_cache(s);
 }
 static inline bool slab_equal_or_root(struct kmem_cache *s,

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 5155d09e749d..087a45314181 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h
@@ -509,7 +509,6 @@ __memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp);
509	int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size);	509	int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size);
510	void memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size);	510	void memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size);
511		511
512	void mem_cgroup_destroy_cache(struct kmem_cache *cachep);
513	int __kmem_cache_destroy_memcg_children(struct kmem_cache *s);	512	int __kmem_cache_destroy_memcg_children(struct kmem_cache *s);
514		513
515	/**	514	/**


diff --git a/include/linux/slab.h b/include/linux/slab.h index a6aab2c0dfc5..905541dd3778 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h
@@ -524,7 +524,6 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
524	* @memcg: pointer to the memcg this cache belongs to	524	* @memcg: pointer to the memcg this cache belongs to
525	* @list: list_head for the list of all caches in this memcg	525	* @list: list_head for the list of all caches in this memcg
526	* @root_cache: pointer to the global, root cache, this cache was derived from	526	* @root_cache: pointer to the global, root cache, this cache was derived from
527	* @dead: set to true after the memcg dies; the cache may still be around.
528	* @nr_pages: number of pages that belongs to this cache.	527	* @nr_pages: number of pages that belongs to this cache.
529	* @destroy: worker to be called whenever we are ready, or believe we may be	528	* @destroy: worker to be called whenever we are ready, or believe we may be
530	* ready, to destroy this cache.	529	* ready, to destroy this cache.
@@ -540,7 +539,6 @@ struct memcg_cache_params {
540	struct mem_cgroup *memcg;	539	struct mem_cgroup *memcg;
541	struct list_head list;	540	struct list_head list;
542	struct kmem_cache *root_cache;	541	struct kmem_cache *root_cache;
543	bool dead;
544	atomic_t nr_pages;	542	atomic_t nr_pages;
545	struct work_struct destroy;	543	struct work_struct destroy;
546	};	544	};


diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 9f4ff49c6add..6b1c45ced733 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c
@@ -3277,60 +3277,11 @@ static void kmem_cache_destroy_work_func(struct work_struct *w)
3277		3277
3278	cachep = memcg_params_to_cache(p);	3278	cachep = memcg_params_to_cache(p);
3279		3279
3280	/*	3280	kmem_cache_shrink(cachep);
3281	* If we get down to 0 after shrink, we could delete right away.	3281	if (atomic_read(&cachep->memcg_params->nr_pages) == 0)
3282	* However, memcg_release_pages() already puts us back in the workqueue
3283	* in that case. If we proceed deleting, we'll get a dangling
3284	* reference, and removing the object from the workqueue in that case
3285	* is unnecessary complication. We are not a fast path.
3286	*
3287	* Note that this case is fundamentally different from racing with
3288	* shrink_slab(): if memcg_cgroup_destroy_cache() is called in
3289	* kmem_cache_shrink, not only we would be reinserting a dead cache
3290	* into the queue, but doing so from inside the worker racing to
3291	* destroy it.
3292	*
3293	* So if we aren't down to zero, we'll just schedule a worker and try
3294	* again
3295	*/
3296	if (atomic_read(&cachep->memcg_params->nr_pages) != 0)
3297	kmem_cache_shrink(cachep);
3298	else
3299	kmem_cache_destroy(cachep);	3282	kmem_cache_destroy(cachep);
3300	}	3283	}
3301		3284
3302	void mem_cgroup_destroy_cache(struct kmem_cache *cachep)
3303	{
3304	if (!cachep->memcg_params->dead)
3305	return;
3306
3307	/*
3308	* There are many ways in which we can get here.
3309	*
3310	* We can get to a memory-pressure situation while the delayed work is
3311	* still pending to run. The vmscan shrinkers can then release all
3312	* cache memory and get us to destruction. If this is the case, we'll
3313	* be executed twice, which is a bug (the second time will execute over
3314	* bogus data). In this case, cancelling the work should be fine.
3315	*
3316	* But we can also get here from the worker itself, if
3317	* kmem_cache_shrink is enough to shake all the remaining objects and
3318	* get the page count to 0. In this case, we'll deadlock if we try to
3319	* cancel the work (the worker runs with an internal lock held, which
3320	* is the same lock we would hold for cancel_work_sync().)
3321	*
3322	* Since we can't possibly know who got us here, just refrain from
3323	* running if there is already work pending
3324	*/
3325	if (work_pending(&cachep->memcg_params->destroy))
3326	return;
3327	/*
3328	* We have to defer the actual destroying to a workqueue, because
3329	* we might currently be in a context that cannot sleep.
3330	*/
3331	schedule_work(&cachep->memcg_params->destroy);
3332	}
3333
3334	int __kmem_cache_destroy_memcg_children(struct kmem_cache *s)	3285	int __kmem_cache_destroy_memcg_children(struct kmem_cache *s)
3335	{	3286	{
3336	struct kmem_cache *c;	3287	struct kmem_cache *c;
@@ -3356,16 +3307,7 @@ int __kmem_cache_destroy_memcg_children(struct kmem_cache *s)
3356	* We will now manually delete the caches, so to avoid races	3307	* We will now manually delete the caches, so to avoid races
3357	* we need to cancel all pending destruction workers and	3308	* we need to cancel all pending destruction workers and
3358	* proceed with destruction ourselves.	3309	* proceed with destruction ourselves.
3359	*
3360	* kmem_cache_destroy() will call kmem_cache_shrink internally,
3361	* and that could spawn the workers again: it is likely that
3362	* the cache still have active pages until this very moment.
3363	* This would lead us back to mem_cgroup_destroy_cache.
3364	*
3365	* But that will not execute at all if the "dead" flag is not
3366	* set, so flip it down to guarantee we are in control.
3367	*/	3310	*/
3368	c->memcg_params->dead = false;
3369	cancel_work_sync(&c->memcg_params->destroy);	3311	cancel_work_sync(&c->memcg_params->destroy);
3370	kmem_cache_destroy(c);	3312	kmem_cache_destroy(c);
3371		3313
@@ -3387,7 +3329,6 @@ static void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg)
3387	mutex_lock(&memcg->slab_caches_mutex);	3329	mutex_lock(&memcg->slab_caches_mutex);
3388	list_for_each_entry(params, &memcg->memcg_slab_caches, list) {	3330	list_for_each_entry(params, &memcg->memcg_slab_caches, list) {
3389	cachep = memcg_params_to_cache(params);	3331	cachep = memcg_params_to_cache(params);
3390	cachep->memcg_params->dead = true;
3391	schedule_work(&cachep->memcg_params->destroy);	3332	schedule_work(&cachep->memcg_params->destroy);
3392	}	3333	}
3393	mutex_unlock(&memcg->slab_caches_mutex);	3334	mutex_unlock(&memcg->slab_caches_mutex);


diff --git a/mm/slab.h b/mm/slab.h index d85d59803d5f..b59447ac4533 100644 --- a/mm/slab.h +++ b/mm/slab.h
@@ -129,11 +129,8 @@ static inline void memcg_bind_pages(struct kmem_cache *s, int order)
129		129
130	static inline void memcg_release_pages(struct kmem_cache *s, int order)	130	static inline void memcg_release_pages(struct kmem_cache *s, int order)
131	{	131	{
132	if (is_root_cache(s))	132	if (!is_root_cache(s))
133	return;	133	atomic_sub(1 << order, &s->memcg_params->nr_pages);
134
135	if (atomic_sub_and_test((1 << order), &s->memcg_params->nr_pages))
136	mem_cgroup_destroy_cache(s);
137	}	134	}
138		135
139	static inline bool slab_equal_or_root(struct kmem_cache *s,	136	static inline bool slab_equal_or_root(struct kmem_cache *s,