memcg: zap memcg_slab_caches and memcg_slab_mutex

mem_cgroup->memcg_slab_caches is a list of kmem caches corresponding to the given cgroup. Currently, it is only used on css free in order to destroy all caches corresponding to the memory cgroup being freed. The list is protected by memcg_slab_mutex. The mutex is also used to protect kmem_cache->memcg_params->memcg_caches arrays and synchronizes kmem_cache_destroy vs memcg_unregister_all_caches. However, we can perfectly get on without these two. To destroy all caches corresponding to a memory cgroup, we can walk over the global list of kmem caches, slab_caches, and we can do all the synchronization stuff using the slab_mutex instead of the memcg_slab_mutex. This patch therefore gets rid of the memcg_slab_caches and memcg_slab_mutex. Apart from this nice cleanup, it also: - assures that rcu_barrier() is called once at max when a root cache is destroyed or a memory cgroup is freed, no matter how many caches have SLAB_DESTROY_BY_RCU flag set; - fixes the race between kmem_cache_destroy and kmem_cache_create that exists, because memcg_cleanup_cache_params, which is called from kmem_cache_destroy after checking that kmem_cache->refcount=0, releases the slab_mutex, which gives kmem_cache_create a chance to make an alias to a cache doomed to be destroyed. Signed-off-by: Vladimir Davydov <vdavydov@parallels.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Michal Hocko <mhocko@suse.cz> Acked-by: Christoph Lameter <cl@linux.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
author: Vladimir Davydov <vdavydov@parallels.com> 2015-02-10 17:11:47 -0500
committer: Linus Torvalds <torvalds@linux-foundation.org> 2015-02-10 17:30:34 -0500
commit: d5b3cf7139b8770af4ed8bb36a1ab9d290ac39e9 (patch)
tree: d96432c889fcb6f3790f41651a13a489e6af8f5c /mm
parent: 3e0350a36414a73c5c2d1e354f8c0ab4ace1296d (diff)
2 files changed, 118 insertions, 180 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index baf7eb27e3ae..f3f8a4f52a0c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -343,9 +343,6 @@ struct mem_cgroup {
        struct cg_proto tcp_mem;
 #endif
 #if defined(CONFIG_MEMCG_KMEM)
-        /* analogous to slab_common's slab_caches list, but per-memcg;
-         * protected by memcg_slab_mutex */
-        struct list_head memcg_slab_caches;
        /* Index in the kmem_cache->memcg_params->memcg_caches array */
        int kmemcg_id;
 #endif
@@ -2476,25 +2473,6 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
 }
 #ifdef CONFIG_MEMCG_KMEM
-/*
- * The memcg_slab_mutex is held whenever a per memcg kmem cache is created or
- * destroyed. It protects memcg_caches arrays and memcg_slab_caches lists.
- */
-static DEFINE_MUTEX(memcg_slab_mutex);
-/*
- * This is a bit cumbersome, but it is rarely used and avoids a backpointer
- * in the memcg_cache_params struct.
- */
-static struct kmem_cache *memcg_params_to_cache(struct memcg_cache_params *p)
-{
-        struct kmem_cache *cachep;
-        VM_BUG_ON(p->is_root_cache);
-        cachep = p->root_cache;
-        return cache_from_memcg_idx(cachep, memcg_cache_id(p->memcg));
-}
 int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp,
                      unsigned long nr_pages)
 {
@@ -2578,10 +2556,7 @@ static int memcg_alloc_cache_id(void)
        else if (size > MEMCG_CACHES_MAX_SIZE)
                size = MEMCG_CACHES_MAX_SIZE;
-        mutex_lock(&memcg_slab_mutex);
        err = memcg_update_all_caches(size);
-        mutex_unlock(&memcg_slab_mutex);
        if (err) {
                ida_simple_remove(&kmem_limited_groups, id);
                return err;
@@ -2604,120 +2579,20 @@ void memcg_update_array_size(int num)
        memcg_limited_groups_array_size = num;
 }
-static void memcg_register_cache(struct mem_cgroup *memcg,
+struct memcg_kmem_cache_create_work {
-                                 struct kmem_cache *root_cache)
-{
-        struct kmem_cache *cachep;
-        int id;
-        lockdep_assert_held(&memcg_slab_mutex);
-        id = memcg_cache_id(memcg);
-        /*
-         * Since per-memcg caches are created asynchronously on first
-         * allocation (see memcg_kmem_get_cache()), several threads can try to
-         * create the same cache, but only one of them may succeed.
-         */
-        if (cache_from_memcg_idx(root_cache, id))
-                return;
-        cachep = memcg_create_kmem_cache(memcg, root_cache);
-        /*
-         * If we could not create a memcg cache, do not complain, because
-         * that's not critical at all as we can always proceed with the root
-         * cache.
-         */
-        if (!cachep)
-                return;
-        list_add(&cachep->memcg_params->list, &memcg->memcg_slab_caches);
-        /*
-         * Since readers won't lock (see cache_from_memcg_idx()), we need a
-         * barrier here to ensure nobody will see the kmem_cache partially
-         * initialized.
-         */
-        smp_wmb();
-        BUG_ON(root_cache->memcg_params->memcg_caches[id]);
-        root_cache->memcg_params->memcg_caches[id] = cachep;
-}
-static void memcg_unregister_cache(struct kmem_cache *cachep)
-{
-        struct kmem_cache *root_cache;
-        struct mem_cgroup *memcg;
-        int id;
-        lockdep_assert_held(&memcg_slab_mutex);
-        BUG_ON(is_root_cache(cachep));
-        root_cache = cachep->memcg_params->root_cache;
-        memcg = cachep->memcg_params->memcg;
-        id = memcg_cache_id(memcg);
-        BUG_ON(root_cache->memcg_params->memcg_caches[id] != cachep);
-        root_cache->memcg_params->memcg_caches[id] = NULL;
-        list_del(&cachep->memcg_params->list);
-        kmem_cache_destroy(cachep);
-}
-int __memcg_cleanup_cache_params(struct kmem_cache *s)
-{
-        struct kmem_cache *c;
-        int i, failed = 0;
-        mutex_lock(&memcg_slab_mutex);
-        for_each_memcg_cache_index(i) {
-                c = cache_from_memcg_idx(s, i);
-                if (!c)
-                        continue;
-                memcg_unregister_cache(c);
-                if (cache_from_memcg_idx(s, i))
-                        failed++;
-        }
-        mutex_unlock(&memcg_slab_mutex);
-        return failed;
-}
-static void memcg_unregister_all_caches(struct mem_cgroup *memcg)
-{
-        struct kmem_cache *cachep;
-        struct memcg_cache_params *params, *tmp;
-        if (!memcg_kmem_is_active(memcg))
-                return;
-        mutex_lock(&memcg_slab_mutex);
-        list_for_each_entry_safe(params, tmp, &memcg->memcg_slab_caches, list) {
-                cachep = memcg_params_to_cache(params);
-                memcg_unregister_cache(cachep);
-        }
-        mutex_unlock(&memcg_slab_mutex);
-}
-struct memcg_register_cache_work {
        struct mem_cgroup *memcg;
        struct kmem_cache *cachep;
        struct work_struct work;
 };
-static void memcg_register_cache_func(struct work_struct *w)
+static void memcg_kmem_cache_create_func(struct work_struct *w)
 {
-        struct memcg_register_cache_work *cw =
+        struct memcg_kmem_cache_create_work *cw =
-                container_of(w, struct memcg_register_cache_work, work);
+                container_of(w, struct memcg_kmem_cache_create_work, work);
        struct mem_cgroup *memcg = cw->memcg;
        struct kmem_cache *cachep = cw->cachep;
-        mutex_lock(&memcg_slab_mutex);
+        memcg_create_kmem_cache(memcg, cachep);
-        memcg_register_cache(memcg, cachep);
-        mutex_unlock(&memcg_slab_mutex);
        css_put(&memcg->css);
        kfree(cw);
@@ -2726,10 +2601,10 @@ static void memcg_register_cache_func(struct work_struct *w)
 /*
 * Enqueue the creation of a per-memcg kmem_cache.
 */
-static void __memcg_schedule_register_cache(struct mem_cgroup *memcg,
+static void __memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg,
-                                            struct kmem_cache *cachep)
+                                               struct kmem_cache *cachep)
 {
-        struct memcg_register_cache_work *cw;
+        struct memcg_kmem_cache_create_work *cw;
        cw = kmalloc(sizeof(*cw), GFP_NOWAIT);
        if (!cw)
@@ -2739,18 +2614,18 @@ static void __memcg_schedule_register_cache(struct mem_cgroup *memcg,
        cw->memcg = memcg;
        cw->cachep = cachep;
+        INIT_WORK(&cw->work, memcg_kmem_cache_create_func);
-        INIT_WORK(&cw->work, memcg_register_cache_func);
        schedule_work(&cw->work);
 }
-static void memcg_schedule_register_cache(struct mem_cgroup *memcg,
+static void memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg,
-                                          struct kmem_cache *cachep)
+                                             struct kmem_cache *cachep)
 {
        /*
         * We need to stop accounting when we kmalloc, because if the
         * corresponding kmalloc cache is not yet created, the first allocation
-         * in __memcg_schedule_register_cache will recurse.
+         * in __memcg_schedule_kmem_cache_create will recurse.
         *
         * However, it is better to enclose the whole function. Depending on
         * the debugging options enabled, INIT_WORK(), for instance, can
@@ -2759,7 +2634,7 @@ static void memcg_schedule_register_cache(struct mem_cgroup *memcg,
         * the safest choice is to do it like this, wrapping the whole function.
         */
        current->memcg_kmem_skip_account = 1;
-        __memcg_schedule_register_cache(memcg, cachep);
+        __memcg_schedule_kmem_cache_create(memcg, cachep);
        current->memcg_kmem_skip_account = 0;
 }
@@ -2807,7 +2682,7 @@ struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep)
         * could happen with the slab_mutex held. So it's better to
         * defer everything.
         */
-        memcg_schedule_register_cache(memcg, cachep);
+        memcg_schedule_kmem_cache_create(memcg, cachep);
 out:
        css_put(&memcg->css);
        return cachep;
@@ -4136,7 +4011,7 @@ static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
 static void memcg_destroy_kmem(struct mem_cgroup *memcg)
 {
-        memcg_unregister_all_caches(memcg);
+        memcg_destroy_kmem_caches(memcg);
        mem_cgroup_sockets_destroy(memcg);
 }
 #else
@@ -4664,7 +4539,6 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
        spin_lock_init(&memcg->event_list_lock);
 #ifdef CONFIG_MEMCG_KMEM
        memcg->kmemcg_id = -1;
-        INIT_LIST_HEAD(&memcg->memcg_slab_caches);
 #endif
        return &memcg->css;
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 1b782a2d3b3d..6e1e4cf65836 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -425,6 +425,49 @@ out_unlock:
 }
 EXPORT_SYMBOL(kmem_cache_create);
+static int do_kmem_cache_shutdown(struct kmem_cache *s,
+                struct list_head *release, bool *need_rcu_barrier)
+{
+        if (__kmem_cache_shutdown(s) != 0) {
+                printk(KERN_ERR "kmem_cache_destroy %s: "
+                       "Slab cache still has objects\n", s->name);
+                dump_stack();
+                return -EBUSY;
+        }
+        if (s->flags & SLAB_DESTROY_BY_RCU)
+                *need_rcu_barrier = true;
+#ifdef CONFIG_MEMCG_KMEM
+        if (!is_root_cache(s)) {
+                struct kmem_cache *root_cache = s->memcg_params->root_cache;
+                int memcg_id = memcg_cache_id(s->memcg_params->memcg);
+                BUG_ON(root_cache->memcg_params->memcg_caches[memcg_id] != s);
+                root_cache->memcg_params->memcg_caches[memcg_id] = NULL;
+        }
+#endif
+        list_move(&s->list, release);
+        return 0;
+}
+static void do_kmem_cache_release(struct list_head *release,
+                                  bool need_rcu_barrier)
+{
+        struct kmem_cache *s, *s2;
+        if (need_rcu_barrier)
+                rcu_barrier();
+        list_for_each_entry_safe(s, s2, release, list) {
+#ifdef SLAB_SUPPORTS_SYSFS
+                sysfs_slab_remove(s);
+#else
+                slab_kmem_cache_release(s);
+#endif
+        }
+}
 #ifdef CONFIG_MEMCG_KMEM
 /*
 * memcg_create_kmem_cache - Create a cache for a memory cgroup.
@@ -435,10 +478,11 @@ EXPORT_SYMBOL(kmem_cache_create);
 * requests going from @memcg to @root_cache. The new cache inherits properties
 * from its parent.
 */
-struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
+void memcg_create_kmem_cache(struct mem_cgroup *memcg,
-                                           struct kmem_cache *root_cache)
+                             struct kmem_cache *root_cache)
 {
        static char memcg_name_buf[NAME_MAX + 1]; /* protected by slab_mutex */
+        int memcg_id = memcg_cache_id(memcg);
        struct kmem_cache *s = NULL;
        char *cache_name;
@@ -447,6 +491,14 @@ struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
        mutex_lock(&slab_mutex);
+        /*
+         * Since per-memcg caches are created asynchronously on first
+         * allocation (see memcg_kmem_get_cache()), several threads can try to
+         * create the same cache, but only one of them may succeed.
+         */
+        if (cache_from_memcg_idx(root_cache, memcg_id))
+                goto out_unlock;
        cgroup_name(mem_cgroup_css(memcg)->cgroup,
                    memcg_name_buf, sizeof(memcg_name_buf));
        cache_name = kasprintf(GFP_KERNEL, "%s(%d:%s)", root_cache->name,
@@ -458,49 +510,73 @@ struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
                                 root_cache->size, root_cache->align,
                                 root_cache->flags, root_cache->ctor,
                                 memcg, root_cache);
+        /*
+         * If we could not create a memcg cache, do not complain, because
+         * that's not critical at all as we can always proceed with the root
+         * cache.
+         */
        if (IS_ERR(s)) {
                kfree(cache_name);
-                s = NULL;
+                goto out_unlock;
        }
+        /*
+         * Since readers won't lock (see cache_from_memcg_idx()), we need a
+         * barrier here to ensure nobody will see the kmem_cache partially
+         * initialized.
+         */
+        smp_wmb();
+        root_cache->memcg_params->memcg_caches[memcg_id] = s;
 out_unlock:
        mutex_unlock(&slab_mutex);
        put_online_mems();
        put_online_cpus();
-        return s;
 }
-static int memcg_cleanup_cache_params(struct kmem_cache *s)
+void memcg_destroy_kmem_caches(struct mem_cgroup *memcg)
 {
-        int rc;
+        LIST_HEAD(release);
+        bool need_rcu_barrier = false;
+        struct kmem_cache *s, *s2;
-        if (!s->memcg_params ||
+        get_online_cpus();
-            !s->memcg_params->is_root_cache)
+        get_online_mems();
-                return 0;
-        mutex_unlock(&slab_mutex);
-        rc = __memcg_cleanup_cache_params(s);
        mutex_lock(&slab_mutex);
+        list_for_each_entry_safe(s, s2, &slab_caches, list) {
+                if (is_root_cache(s) || s->memcg_params->memcg != memcg)
+                        continue;
+                /*
+                 * The cgroup is about to be freed and therefore has no charges
+                 * left. Hence, all its caches must be empty by now.
+                 */
+                BUG_ON(do_kmem_cache_shutdown(s, &release, &need_rcu_barrier));
+        }
+        mutex_unlock(&slab_mutex);
-        return rc;
+        put_online_mems();
-}
+        put_online_cpus();
-#else
-static int memcg_cleanup_cache_params(struct kmem_cache *s)
+        do_kmem_cache_release(&release, need_rcu_barrier);
-{
-        return 0;
 }
 #endif /* CONFIG_MEMCG_KMEM */
 void slab_kmem_cache_release(struct kmem_cache *s)
 {
+        memcg_free_cache_params(s);
        kfree(s->name);
        kmem_cache_free(kmem_cache, s);
 }
 void kmem_cache_destroy(struct kmem_cache *s)
 {
+        int i;
+        LIST_HEAD(release);
+        bool need_rcu_barrier = false;
+        bool busy = false;
        get_online_cpus();
        get_online_mems();
@@ -510,35 +586,23 @@ void kmem_cache_destroy(struct kmem_cache *s)
        if (s->refcount)
                goto out_unlock;
-        if (memcg_cleanup_cache_params(s) != 0)
+        for_each_memcg_cache_index(i) {
-                goto out_unlock;
+                struct kmem_cache *c = cache_from_memcg_idx(s, i);
-        if (__kmem_cache_shutdown(s) != 0) {
+                if (c && do_kmem_cache_shutdown(c, &release, &need_rcu_barrier))
-                printk(KERN_ERR "kmem_cache_destroy %s: "
+                        busy = true;
-                       "Slab cache still has objects\n", s->name);
-                dump_stack();
-                goto out_unlock;
        }
-        list_del(&s->list);
+        if (!busy)
+                do_kmem_cache_shutdown(s, &release, &need_rcu_barrier);
-        mutex_unlock(&slab_mutex);
-        if (s->flags & SLAB_DESTROY_BY_RCU)
-                rcu_barrier();
-        memcg_free_cache_params(s);
-#ifdef SLAB_SUPPORTS_SYSFS
-        sysfs_slab_remove(s);
-#else
-        slab_kmem_cache_release(s);
-#endif
-        goto out;
 out_unlock:
        mutex_unlock(&slab_mutex);
-out:
        put_online_mems();
        put_online_cpus();
+        do_kmem_cache_release(&release, need_rcu_barrier);
 }
 EXPORT_SYMBOL(kmem_cache_destroy);
author	Vladimir Davydov <vdavydov@parallels.com>	2015-02-10 17:11:47 -0500
committer	Linus Torvalds <torvalds@linux-foundation.org>	2015-02-10 17:30:34 -0500
commit	d5b3cf7139b8770af4ed8bb36a1ab9d290ac39e9 (patch)
tree	d96432c889fcb6f3790f41651a13a489e6af8f5c /mm
parent	3e0350a36414a73c5c2d1e354f8c0ab4ace1296d (diff)

diff --git a/mm/memcontrol.c b/mm/memcontrol.c index baf7eb27e3ae..f3f8a4f52a0c 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c
@@ -343,9 +343,6 @@ struct mem_cgroup {
343	struct cg_proto tcp_mem;	343	struct cg_proto tcp_mem;
344	#endif	344	#endif
345	#if defined(CONFIG_MEMCG_KMEM)	345	#if defined(CONFIG_MEMCG_KMEM)
346	/* analogous to slab_common's slab_caches list, but per-memcg;
347	* protected by memcg_slab_mutex */
348	struct list_head memcg_slab_caches;
349	/* Index in the kmem_cache->memcg_params->memcg_caches array */	346	/* Index in the kmem_cache->memcg_params->memcg_caches array */
350	int kmemcg_id;	347	int kmemcg_id;
351	#endif	348	#endif
@@ -2476,25 +2473,6 @@ static void commit_charge(struct page page, struct mem_cgroup memcg,
2476	}	2473	}
2477		2474
2478	#ifdef CONFIG_MEMCG_KMEM	2475	#ifdef CONFIG_MEMCG_KMEM
2479	/*
2480	* The memcg_slab_mutex is held whenever a per memcg kmem cache is created or
2481	* destroyed. It protects memcg_caches arrays and memcg_slab_caches lists.
2482	*/
2483	static DEFINE_MUTEX(memcg_slab_mutex);
2484
2485	/*
2486	* This is a bit cumbersome, but it is rarely used and avoids a backpointer
2487	* in the memcg_cache_params struct.
2488	*/
2489	static struct kmem_cache memcg_params_to_cache(struct memcg_cache_params p)
2490	{
2491	struct kmem_cache *cachep;
2492
2493	VM_BUG_ON(p->is_root_cache);
2494	cachep = p->root_cache;
2495	return cache_from_memcg_idx(cachep, memcg_cache_id(p->memcg));
2496	}
2497
2498	int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp,	2476	int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp,
2499	unsigned long nr_pages)	2477	unsigned long nr_pages)
2500	{	2478	{
@@ -2578,10 +2556,7 @@ static int memcg_alloc_cache_id(void)
2578	else if (size > MEMCG_CACHES_MAX_SIZE)	2556	else if (size > MEMCG_CACHES_MAX_SIZE)
2579	size = MEMCG_CACHES_MAX_SIZE;	2557	size = MEMCG_CACHES_MAX_SIZE;
2580		2558
2581	mutex_lock(&memcg_slab_mutex);
2582	err = memcg_update_all_caches(size);	2559	err = memcg_update_all_caches(size);
2583	mutex_unlock(&memcg_slab_mutex);
2584
2585	if (err) {	2560	if (err) {
2586	ida_simple_remove(&kmem_limited_groups, id);	2561	ida_simple_remove(&kmem_limited_groups, id);
2587	return err;	2562	return err;
@@ -2604,120 +2579,20 @@ void memcg_update_array_size(int num)
2604	memcg_limited_groups_array_size = num;	2579	memcg_limited_groups_array_size = num;
2605	}	2580	}
2606		2581
2607	static void memcg_register_cache(struct mem_cgroup *memcg,	2582	struct memcg_kmem_cache_create_work {
2608	struct kmem_cache *root_cache)
2609	{
2610	struct kmem_cache *cachep;
2611	int id;
2612
2613	lockdep_assert_held(&memcg_slab_mutex);
2614
2615	id = memcg_cache_id(memcg);
2616
2617	/*
2618	* Since per-memcg caches are created asynchronously on first
2619	* allocation (see memcg_kmem_get_cache()), several threads can try to
2620	* create the same cache, but only one of them may succeed.
2621	*/
2622	if (cache_from_memcg_idx(root_cache, id))
2623	return;
2624
2625	cachep = memcg_create_kmem_cache(memcg, root_cache);
2626	/*
2627	* If we could not create a memcg cache, do not complain, because
2628	* that's not critical at all as we can always proceed with the root
2629	* cache.
2630	*/
2631	if (!cachep)
2632	return;
2633
2634	list_add(&cachep->memcg_params->list, &memcg->memcg_slab_caches);
2635
2636	/*
2637	* Since readers won't lock (see cache_from_memcg_idx()), we need a
2638	* barrier here to ensure nobody will see the kmem_cache partially
2639	* initialized.
2640	*/
2641	smp_wmb();
2642
2643	BUG_ON(root_cache->memcg_params->memcg_caches[id]);
2644	root_cache->memcg_params->memcg_caches[id] = cachep;
2645	}
2646
2647	static void memcg_unregister_cache(struct kmem_cache *cachep)
2648	{
2649	struct kmem_cache *root_cache;
2650	struct mem_cgroup *memcg;
2651	int id;
2652
2653	lockdep_assert_held(&memcg_slab_mutex);
2654
2655	BUG_ON(is_root_cache(cachep));
2656
2657	root_cache = cachep->memcg_params->root_cache;
2658	memcg = cachep->memcg_params->memcg;
2659	id = memcg_cache_id(memcg);
2660
2661	BUG_ON(root_cache->memcg_params->memcg_caches[id] != cachep);
2662	root_cache->memcg_params->memcg_caches[id] = NULL;
2663
2664	list_del(&cachep->memcg_params->list);
2665
2666	kmem_cache_destroy(cachep);
2667	}
2668
2669	int __memcg_cleanup_cache_params(struct kmem_cache *s)
2670	{
2671	struct kmem_cache *c;
2672	int i, failed = 0;
2673
2674	mutex_lock(&memcg_slab_mutex);
2675	for_each_memcg_cache_index(i) {
2676	c = cache_from_memcg_idx(s, i);
2677	if (!c)
2678	continue;
2679
2680	memcg_unregister_cache(c);
2681
2682	if (cache_from_memcg_idx(s, i))
2683	failed++;
2684	}
2685	mutex_unlock(&memcg_slab_mutex);
2686	return failed;
2687	}
2688
2689	static void memcg_unregister_all_caches(struct mem_cgroup *memcg)
2690	{
2691	struct kmem_cache *cachep;
2692	struct memcg_cache_params params, tmp;
2693
2694	if (!memcg_kmem_is_active(memcg))
2695	return;
2696
2697	mutex_lock(&memcg_slab_mutex);
2698	list_for_each_entry_safe(params, tmp, &memcg->memcg_slab_caches, list) {
2699	cachep = memcg_params_to_cache(params);
2700	memcg_unregister_cache(cachep);
2701	}
2702	mutex_unlock(&memcg_slab_mutex);
2703	}
2704
2705	struct memcg_register_cache_work {
2706	struct mem_cgroup *memcg;	2583	struct mem_cgroup *memcg;
2707	struct kmem_cache *cachep;	2584	struct kmem_cache *cachep;
2708	struct work_struct work;	2585	struct work_struct work;
2709	};	2586	};
2710		2587
2711	static void memcg_register_cache_func(struct work_struct *w)	2588	static void memcg_kmem_cache_create_func(struct work_struct *w)
2712	{	2589	{
2713	struct memcg_register_cache_work *cw =	2590	struct memcg_kmem_cache_create_work *cw =
2714	container_of(w, struct memcg_register_cache_work, work);	2591	container_of(w, struct memcg_kmem_cache_create_work, work);
2715	struct mem_cgroup *memcg = cw->memcg;	2592	struct mem_cgroup *memcg = cw->memcg;
2716	struct kmem_cache *cachep = cw->cachep;	2593	struct kmem_cache *cachep = cw->cachep;
2717		2594
2718	mutex_lock(&memcg_slab_mutex);	2595	memcg_create_kmem_cache(memcg, cachep);
2719	memcg_register_cache(memcg, cachep);
2720	mutex_unlock(&memcg_slab_mutex);
2721		2596
2722	css_put(&memcg->css);	2597	css_put(&memcg->css);
2723	kfree(cw);	2598	kfree(cw);
@@ -2726,10 +2601,10 @@ static void memcg_register_cache_func(struct work_struct *w)
2726	/*	2601	/*
2727	* Enqueue the creation of a per-memcg kmem_cache.	2602	* Enqueue the creation of a per-memcg kmem_cache.
2728	*/	2603	*/
2729	static void __memcg_schedule_register_cache(struct mem_cgroup *memcg,	2604	static void __memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg,
2730	struct kmem_cache *cachep)	2605	struct kmem_cache *cachep)
2731	{	2606	{
2732	struct memcg_register_cache_work *cw;	2607	struct memcg_kmem_cache_create_work *cw;
2733		2608
2734	cw = kmalloc(sizeof(*cw), GFP_NOWAIT);	2609	cw = kmalloc(sizeof(*cw), GFP_NOWAIT);
2735	if (!cw)	2610	if (!cw)
@@ -2739,18 +2614,18 @@ static void __memcg_schedule_register_cache(struct mem_cgroup *memcg,
2739		2614
2740	cw->memcg = memcg;	2615	cw->memcg = memcg;
2741	cw->cachep = cachep;	2616	cw->cachep = cachep;
		2617	INIT_WORK(&cw->work, memcg_kmem_cache_create_func);
2742		2618
2743	INIT_WORK(&cw->work, memcg_register_cache_func);
2744	schedule_work(&cw->work);	2619	schedule_work(&cw->work);
2745	}	2620	}
2746		2621
2747	static void memcg_schedule_register_cache(struct mem_cgroup *memcg,	2622	static void memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg,
2748	struct kmem_cache *cachep)	2623	struct kmem_cache *cachep)
2749	{	2624	{
2750	/*	2625	/*
2751	* We need to stop accounting when we kmalloc, because if the	2626	* We need to stop accounting when we kmalloc, because if the
2752	* corresponding kmalloc cache is not yet created, the first allocation	2627	* corresponding kmalloc cache is not yet created, the first allocation
2753	* in __memcg_schedule_register_cache will recurse.	2628	* in __memcg_schedule_kmem_cache_create will recurse.
2754	*	2629	*
2755	* However, it is better to enclose the whole function. Depending on	2630	* However, it is better to enclose the whole function. Depending on
2756	* the debugging options enabled, INIT_WORK(), for instance, can	2631	* the debugging options enabled, INIT_WORK(), for instance, can
@@ -2759,7 +2634,7 @@ static void memcg_schedule_register_cache(struct mem_cgroup *memcg,
2759	* the safest choice is to do it like this, wrapping the whole function.	2634	* the safest choice is to do it like this, wrapping the whole function.
2760	*/	2635	*/
2761	current->memcg_kmem_skip_account = 1;	2636	current->memcg_kmem_skip_account = 1;
2762	__memcg_schedule_register_cache(memcg, cachep);	2637	__memcg_schedule_kmem_cache_create(memcg, cachep);
2763	current->memcg_kmem_skip_account = 0;	2638	current->memcg_kmem_skip_account = 0;
2764	}	2639	}
2765		2640
@@ -2807,7 +2682,7 @@ struct kmem_cache __memcg_kmem_get_cache(struct kmem_cache cachep)
2807	* could happen with the slab_mutex held. So it's better to	2682	* could happen with the slab_mutex held. So it's better to
2808	* defer everything.	2683	* defer everything.
2809	*/	2684	*/
2810	memcg_schedule_register_cache(memcg, cachep);	2685	memcg_schedule_kmem_cache_create(memcg, cachep);
2811	out:	2686	out:
2812	css_put(&memcg->css);	2687	css_put(&memcg->css);
2813	return cachep;	2688	return cachep;
@@ -4136,7 +4011,7 @@ static int memcg_init_kmem(struct mem_cgroup memcg, struct cgroup_subsys ss)
4136		4011
4137	static void memcg_destroy_kmem(struct mem_cgroup *memcg)	4012	static void memcg_destroy_kmem(struct mem_cgroup *memcg)
4138	{	4013	{
4139	memcg_unregister_all_caches(memcg);	4014	memcg_destroy_kmem_caches(memcg);
4140	mem_cgroup_sockets_destroy(memcg);	4015	mem_cgroup_sockets_destroy(memcg);
4141	}	4016	}
4142	#else	4017	#else
@@ -4664,7 +4539,6 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
4664	spin_lock_init(&memcg->event_list_lock);	4539	spin_lock_init(&memcg->event_list_lock);
4665	#ifdef CONFIG_MEMCG_KMEM	4540	#ifdef CONFIG_MEMCG_KMEM
4666	memcg->kmemcg_id = -1;	4541	memcg->kmemcg_id = -1;
4667	INIT_LIST_HEAD(&memcg->memcg_slab_caches);
4668	#endif	4542	#endif
4669		4543
4670	return &memcg->css;	4544	return &memcg->css;


diff --git a/mm/slab_common.c b/mm/slab_common.c index 1b782a2d3b3d..6e1e4cf65836 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c
@@ -425,6 +425,49 @@ out_unlock:
425	}	425	}
426	EXPORT_SYMBOL(kmem_cache_create);	426	EXPORT_SYMBOL(kmem_cache_create);
427		427
		428	static int do_kmem_cache_shutdown(struct kmem_cache *s,
		429	struct list_head release, bool need_rcu_barrier)
		430	{
		431	if (__kmem_cache_shutdown(s) != 0) {
		432	printk(KERN_ERR "kmem_cache_destroy %s: "
		433	"Slab cache still has objects\n", s->name);
		434	dump_stack();
		435	return -EBUSY;
		436	}
		437
		438	if (s->flags & SLAB_DESTROY_BY_RCU)
		439	*need_rcu_barrier = true;
		440
		441	#ifdef CONFIG_MEMCG_KMEM
		442	if (!is_root_cache(s)) {
		443	struct kmem_cache *root_cache = s->memcg_params->root_cache;
		444	int memcg_id = memcg_cache_id(s->memcg_params->memcg);
		445
		446	BUG_ON(root_cache->memcg_params->memcg_caches[memcg_id] != s);
		447	root_cache->memcg_params->memcg_caches[memcg_id] = NULL;
		448	}
		449	#endif
		450	list_move(&s->list, release);
		451	return 0;
		452	}
		453
		454	static void do_kmem_cache_release(struct list_head *release,
		455	bool need_rcu_barrier)
		456	{
		457	struct kmem_cache s, s2;
		458
		459	if (need_rcu_barrier)
		460	rcu_barrier();
		461
		462	list_for_each_entry_safe(s, s2, release, list) {
		463	#ifdef SLAB_SUPPORTS_SYSFS
		464	sysfs_slab_remove(s);
		465	#else
		466	slab_kmem_cache_release(s);
		467	#endif
		468	}
		469	}
		470
428	#ifdef CONFIG_MEMCG_KMEM	471	#ifdef CONFIG_MEMCG_KMEM
429	/*	472	/*
430	* memcg_create_kmem_cache - Create a cache for a memory cgroup.	473	* memcg_create_kmem_cache - Create a cache for a memory cgroup.
@@ -435,10 +478,11 @@ EXPORT_SYMBOL(kmem_cache_create);
435	* requests going from @memcg to @root_cache. The new cache inherits properties	478	* requests going from @memcg to @root_cache. The new cache inherits properties
436	* from its parent.	479	* from its parent.
437	*/	480	*/
438	struct kmem_cache memcg_create_kmem_cache(struct mem_cgroup memcg,	481	void memcg_create_kmem_cache(struct mem_cgroup *memcg,
439	struct kmem_cache *root_cache)	482	struct kmem_cache *root_cache)
440	{	483	{
441	static char memcg_name_buf[NAME_MAX + 1]; /* protected by slab_mutex */	484	static char memcg_name_buf[NAME_MAX + 1]; /* protected by slab_mutex */
		485	int memcg_id = memcg_cache_id(memcg);
442	struct kmem_cache *s = NULL;	486	struct kmem_cache *s = NULL;
443	char *cache_name;	487	char *cache_name;
444		488
@@ -447,6 +491,14 @@ struct kmem_cache memcg_create_kmem_cache(struct mem_cgroup memcg,
447		491
448	mutex_lock(&slab_mutex);	492	mutex_lock(&slab_mutex);
449		493
		494	/*
		495	* Since per-memcg caches are created asynchronously on first
		496	* allocation (see memcg_kmem_get_cache()), several threads can try to
		497	* create the same cache, but only one of them may succeed.
		498	*/
		499	if (cache_from_memcg_idx(root_cache, memcg_id))
		500	goto out_unlock;
		501
450	cgroup_name(mem_cgroup_css(memcg)->cgroup,	502	cgroup_name(mem_cgroup_css(memcg)->cgroup,
451	memcg_name_buf, sizeof(memcg_name_buf));	503	memcg_name_buf, sizeof(memcg_name_buf));
452	cache_name = kasprintf(GFP_KERNEL, "%s(%d:%s)", root_cache->name,	504	cache_name = kasprintf(GFP_KERNEL, "%s(%d:%s)", root_cache->name,
@@ -458,49 +510,73 @@ struct kmem_cache memcg_create_kmem_cache(struct mem_cgroup memcg,
458	root_cache->size, root_cache->align,	510	root_cache->size, root_cache->align,
459	root_cache->flags, root_cache->ctor,	511	root_cache->flags, root_cache->ctor,
460	memcg, root_cache);	512	memcg, root_cache);
		513	/*
		514	* If we could not create a memcg cache, do not complain, because
		515	* that's not critical at all as we can always proceed with the root
		516	* cache.
		517	*/
461	if (IS_ERR(s)) {	518	if (IS_ERR(s)) {
462	kfree(cache_name);	519	kfree(cache_name);
463	s = NULL;	520	goto out_unlock;
464	}	521	}
465		522
		523	/*
		524	* Since readers won't lock (see cache_from_memcg_idx()), we need a
		525	* barrier here to ensure nobody will see the kmem_cache partially
		526	* initialized.
		527	*/
		528	smp_wmb();
		529	root_cache->memcg_params->memcg_caches[memcg_id] = s;
		530
466	out_unlock:	531	out_unlock:
467	mutex_unlock(&slab_mutex);	532	mutex_unlock(&slab_mutex);
468		533
469	put_online_mems();	534	put_online_mems();
470	put_online_cpus();	535	put_online_cpus();
471
472	return s;
473	}	536	}
474		537
475	static int memcg_cleanup_cache_params(struct kmem_cache *s)	538	void memcg_destroy_kmem_caches(struct mem_cgroup *memcg)
476	{	539	{
477	int rc;	540	LIST_HEAD(release);
		541	bool need_rcu_barrier = false;
		542	struct kmem_cache s, s2;
478		543
479	if (!s->memcg_params \|\|	544	get_online_cpus();
480	!s->memcg_params->is_root_cache)	545	get_online_mems();
481	return 0;
482		546
483	mutex_unlock(&slab_mutex);
484	rc = __memcg_cleanup_cache_params(s);
485	mutex_lock(&slab_mutex);	547	mutex_lock(&slab_mutex);
		548	list_for_each_entry_safe(s, s2, &slab_caches, list) {
		549	if (is_root_cache(s) \|\| s->memcg_params->memcg != memcg)
		550	continue;
		551	/*
		552	* The cgroup is about to be freed and therefore has no charges
		553	* left. Hence, all its caches must be empty by now.
		554	*/
		555	BUG_ON(do_kmem_cache_shutdown(s, &release, &need_rcu_barrier));
		556	}
		557	mutex_unlock(&slab_mutex);
486		558
487	return rc;	559	put_online_mems();
488	}	560	put_online_cpus();
489	#else	561
490	static int memcg_cleanup_cache_params(struct kmem_cache *s)	562	do_kmem_cache_release(&release, need_rcu_barrier);
491	{
492	return 0;
493	}	563	}
494	#endif /* CONFIG_MEMCG_KMEM */	564	#endif /* CONFIG_MEMCG_KMEM */
495		565
496	void slab_kmem_cache_release(struct kmem_cache *s)	566	void slab_kmem_cache_release(struct kmem_cache *s)
497	{	567	{
		568	memcg_free_cache_params(s);
498	kfree(s->name);	569	kfree(s->name);
499	kmem_cache_free(kmem_cache, s);	570	kmem_cache_free(kmem_cache, s);
500	}	571	}
501		572
502	void kmem_cache_destroy(struct kmem_cache *s)	573	void kmem_cache_destroy(struct kmem_cache *s)
503	{	574	{
		575	int i;
		576	LIST_HEAD(release);
		577	bool need_rcu_barrier = false;
		578	bool busy = false;
		579
504	get_online_cpus();	580	get_online_cpus();
505	get_online_mems();	581	get_online_mems();
506		582
@@ -510,35 +586,23 @@ void kmem_cache_destroy(struct kmem_cache *s)
510	if (s->refcount)	586	if (s->refcount)
511	goto out_unlock;	587	goto out_unlock;
512		588
513	if (memcg_cleanup_cache_params(s) != 0)	589	for_each_memcg_cache_index(i) {
514	goto out_unlock;	590	struct kmem_cache *c = cache_from_memcg_idx(s, i);
515		591
516	if (__kmem_cache_shutdown(s) != 0) {	592	if (c && do_kmem_cache_shutdown(c, &release, &need_rcu_barrier))
517	printk(KERN_ERR "kmem_cache_destroy %s: "	593	busy = true;
518	"Slab cache still has objects\n", s->name);
519	dump_stack();
520	goto out_unlock;
521	}	594	}
522		595
523	list_del(&s->list);	596	if (!busy)
524		597	do_kmem_cache_shutdown(s, &release, &need_rcu_barrier);
525	mutex_unlock(&slab_mutex);
526	if (s->flags & SLAB_DESTROY_BY_RCU)
527	rcu_barrier();
528
529	memcg_free_cache_params(s);
530	#ifdef SLAB_SUPPORTS_SYSFS
531	sysfs_slab_remove(s);
532	#else
533	slab_kmem_cache_release(s);
534	#endif
535	goto out;
536		598
537	out_unlock:	599	out_unlock:
538	mutex_unlock(&slab_mutex);	600	mutex_unlock(&slab_mutex);
539	out:	601
540	put_online_mems();	602	put_online_mems();
541	put_online_cpus();	603	put_online_cpus();
		604
		605	do_kmem_cache_release(&release, need_rcu_barrier);
542	}	606	}
543	EXPORT_SYMBOL(kmem_cache_destroy);	607	EXPORT_SYMBOL(kmem_cache_destroy);
544		608