1 files changed, 217 insertions, 0 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index db38b60e5f87..efd26620a60b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -588,7 +588,14 @@ static int memcg_limited_groups_array_size;
 #define MEMCG_CACHES_MIN_SIZE 4
 #define MEMCG_CACHES_MAX_SIZE 65535
+/*
+ * A lot of the calls to the cache allocation functions are expected to be
+ * inlined by the compiler. Since the calls to memcg_kmem_get_cache are
+ * conditional to this static branch, we'll have to allow modules that does
+ * kmem_cache_alloc and the such to see this symbol as well
+ */
 struct static_key memcg_kmem_enabled_key;
+EXPORT_SYMBOL(memcg_kmem_enabled_key);
 static void disarm_kmem_keys(struct mem_cgroup *memcg)
 {
@@ -2989,9 +2996,219 @@ int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s)
 void memcg_release_cache(struct kmem_cache *s)
 {
+        struct kmem_cache *root;
+        struct mem_cgroup *memcg;
+        int id;
+        /*
+         * This happens, for instance, when a root cache goes away before we
+         * add any memcg.
+         */
+        if (!s->memcg_params)
+                return;
+        if (s->memcg_params->is_root_cache)
+                goto out;
+        memcg = s->memcg_params->memcg;
+        id  = memcg_cache_id(memcg);
+        root = s->memcg_params->root_cache;
+        root->memcg_params->memcg_caches[id] = NULL;
+        mem_cgroup_put(memcg);
+        mutex_lock(&memcg->slab_caches_mutex);
+        list_del(&s->memcg_params->list);
+        mutex_unlock(&memcg->slab_caches_mutex);
+out:
        kfree(s->memcg_params);
 }
+static char *memcg_cache_name(struct mem_cgroup *memcg, struct kmem_cache *s)
+{
+        char *name;
+        struct dentry *dentry;
+        rcu_read_lock();
+        dentry = rcu_dereference(memcg->css.cgroup->dentry);
+        rcu_read_unlock();
+        BUG_ON(dentry == NULL);
+        name = kasprintf(GFP_KERNEL, "%s(%d:%s)", s->name,
+                         memcg_cache_id(memcg), dentry->d_name.name);
+        return name;
+}
+static struct kmem_cache *kmem_cache_dup(struct mem_cgroup *memcg,
+                                         struct kmem_cache *s)
+{
+        char *name;
+        struct kmem_cache *new;
+        name = memcg_cache_name(memcg, s);
+        if (!name)
+                return NULL;
+        new = kmem_cache_create_memcg(memcg, name, s->object_size, s->align,
+                                      (s->flags & ~SLAB_PANIC), s->ctor);
+        kfree(name);
+        return new;
+}
+/*
+ * This lock protects updaters, not readers. We want readers to be as fast as
+ * they can, and they will either see NULL or a valid cache value. Our model
+ * allow them to see NULL, in which case the root memcg will be selected.
+ *
+ * We need this lock because multiple allocations to the same cache from a non
+ * will span more than one worker. Only one of them can create the cache.
+ */
+static DEFINE_MUTEX(memcg_cache_mutex);
+static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
+                                                  struct kmem_cache *cachep)
+{
+        struct kmem_cache *new_cachep;
+        int idx;
+        BUG_ON(!memcg_can_account_kmem(memcg));
+        idx = memcg_cache_id(memcg);
+        mutex_lock(&memcg_cache_mutex);
+        new_cachep = cachep->memcg_params->memcg_caches[idx];
+        if (new_cachep)
+                goto out;
+        new_cachep = kmem_cache_dup(memcg, cachep);
+        if (new_cachep == NULL) {
+                new_cachep = cachep;
+                goto out;
+        }
+        mem_cgroup_get(memcg);
+        new_cachep->memcg_params->root_cache = cachep;
+        cachep->memcg_params->memcg_caches[idx] = new_cachep;
+        /*
+         * the readers won't lock, make sure everybody sees the updated value,
+         * so they won't put stuff in the queue again for no reason
+         */
+        wmb();
+out:
+        mutex_unlock(&memcg_cache_mutex);
+        return new_cachep;
+}
+struct create_work {
+        struct mem_cgroup *memcg;
+        struct kmem_cache *cachep;
+        struct work_struct work;
+};
+static void memcg_create_cache_work_func(struct work_struct *w)
+{
+        struct create_work *cw;
+        cw = container_of(w, struct create_work, work);
+        memcg_create_kmem_cache(cw->memcg, cw->cachep);
+        /* Drop the reference gotten when we enqueued. */
+        css_put(&cw->memcg->css);
+        kfree(cw);
+}
+/*
+ * Enqueue the creation of a per-memcg kmem_cache.
+ * Called with rcu_read_lock.
+ */
+static void memcg_create_cache_enqueue(struct mem_cgroup *memcg,
+                                       struct kmem_cache *cachep)
+{
+        struct create_work *cw;
+        cw = kmalloc(sizeof(struct create_work), GFP_NOWAIT);
+        if (cw == NULL)
+                return;
+        /* The corresponding put will be done in the workqueue. */
+        if (!css_tryget(&memcg->css)) {
+                kfree(cw);
+                return;
+        }
+        cw->memcg = memcg;
+        cw->cachep = cachep;
+        INIT_WORK(&cw->work, memcg_create_cache_work_func);
+        schedule_work(&cw->work);
+}
+/*
+ * Return the kmem_cache we're supposed to use for a slab allocation.
+ * We try to use the current memcg's version of the cache.
+ *
+ * If the cache does not exist yet, if we are the first user of it,
+ * we either create it immediately, if possible, or create it asynchronously
+ * in a workqueue.
+ * In the latter case, we will let the current allocation go through with
+ * the original cache.
+ *
+ * Can't be called in interrupt context or from kernel threads.
+ * This function needs to be called with rcu_read_lock() held.
+ */
+struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep,
+                                          gfp_t gfp)
+{
+        struct mem_cgroup *memcg;
+        int idx;
+        VM_BUG_ON(!cachep->memcg_params);
+        VM_BUG_ON(!cachep->memcg_params->is_root_cache);
+        rcu_read_lock();
+        memcg = mem_cgroup_from_task(rcu_dereference(current->mm->owner));
+        rcu_read_unlock();
+        if (!memcg_can_account_kmem(memcg))
+                return cachep;
+        idx = memcg_cache_id(memcg);
+        /*
+         * barrier to mare sure we're always seeing the up to date value.  The
+         * code updating memcg_caches will issue a write barrier to match this.
+         */
+        read_barrier_depends();
+        if (unlikely(cachep->memcg_params->memcg_caches[idx] == NULL)) {
+                /*
+                 * If we are in a safe context (can wait, and not in interrupt
+                 * context), we could be be predictable and return right away.
+                 * This would guarantee that the allocation being performed
+                 * already belongs in the new cache.
+                 *
+                 * However, there are some clashes that can arrive from locking.
+                 * For instance, because we acquire the slab_mutex while doing
+                 * kmem_cache_dup, this means no further allocation could happen
+                 * with the slab_mutex held.
+                 *
+                 * Also, because cache creation issue get_online_cpus(), this
+                 * creates a lock chain: memcg_slab_mutex -> cpu_hotplug_mutex,
+                 * that ends up reversed during cpu hotplug. (cpuset allocates
+                 * a bunch of GFP_KERNEL memory during cpuup). Due to all that,
+                 * better to defer everything.
+                 */
+                memcg_create_cache_enqueue(memcg, cachep);
+                return cachep;
+        }
+        return cachep->memcg_params->memcg_caches[idx];
+}
+EXPORT_SYMBOL(__memcg_kmem_get_cache);
 /*
 * We need to verify if the allocation against current->mm->owner's memcg is
 * possible for the given order. But the page is not allocated yet, so we'll

diff --git a/mm/memcontrol.c b/mm/memcontrol.c index db38b60e5f87..efd26620a60b 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c
@@ -588,7 +588,14 @@ static int memcg_limited_groups_array_size;
588	#define MEMCG_CACHES_MIN_SIZE 4	588	#define MEMCG_CACHES_MIN_SIZE 4
589	#define MEMCG_CACHES_MAX_SIZE 65535	589	#define MEMCG_CACHES_MAX_SIZE 65535
590		590
		591	/*
		592	* A lot of the calls to the cache allocation functions are expected to be
		593	* inlined by the compiler. Since the calls to memcg_kmem_get_cache are
		594	* conditional to this static branch, we'll have to allow modules that does
		595	* kmem_cache_alloc and the such to see this symbol as well
		596	*/
591	struct static_key memcg_kmem_enabled_key;	597	struct static_key memcg_kmem_enabled_key;
		598	EXPORT_SYMBOL(memcg_kmem_enabled_key);
592		599
593	static void disarm_kmem_keys(struct mem_cgroup *memcg)	600	static void disarm_kmem_keys(struct mem_cgroup *memcg)
594	{	601	{
@@ -2989,9 +2996,219 @@ int memcg_register_cache(struct mem_cgroup memcg, struct kmem_cache s)
2989		2996
2990	void memcg_release_cache(struct kmem_cache *s)	2997	void memcg_release_cache(struct kmem_cache *s)
2991	{	2998	{
		2999	struct kmem_cache *root;
		3000	struct mem_cgroup *memcg;
		3001	int id;
		3002
		3003	/*
		3004	* This happens, for instance, when a root cache goes away before we
		3005	* add any memcg.
		3006	*/
		3007	if (!s->memcg_params)
		3008	return;
		3009
		3010	if (s->memcg_params->is_root_cache)
		3011	goto out;
		3012
		3013	memcg = s->memcg_params->memcg;
		3014	id = memcg_cache_id(memcg);
		3015
		3016	root = s->memcg_params->root_cache;
		3017	root->memcg_params->memcg_caches[id] = NULL;
		3018	mem_cgroup_put(memcg);
		3019
		3020	mutex_lock(&memcg->slab_caches_mutex);
		3021	list_del(&s->memcg_params->list);
		3022	mutex_unlock(&memcg->slab_caches_mutex);
		3023
		3024	out:
2992	kfree(s->memcg_params);	3025	kfree(s->memcg_params);
2993	}	3026	}
2994		3027
		3028	static char memcg_cache_name(struct mem_cgroup memcg, struct kmem_cache *s)
		3029	{
		3030	char *name;
		3031	struct dentry *dentry;
		3032
		3033	rcu_read_lock();
		3034	dentry = rcu_dereference(memcg->css.cgroup->dentry);
		3035	rcu_read_unlock();
		3036
		3037	BUG_ON(dentry == NULL);
		3038
		3039	name = kasprintf(GFP_KERNEL, "%s(%d:%s)", s->name,
		3040	memcg_cache_id(memcg), dentry->d_name.name);
		3041
		3042	return name;
		3043	}
		3044
		3045	static struct kmem_cache kmem_cache_dup(struct mem_cgroup memcg,
		3046	struct kmem_cache *s)
		3047	{
		3048	char *name;
		3049	struct kmem_cache *new;
		3050
		3051	name = memcg_cache_name(memcg, s);
		3052	if (!name)
		3053	return NULL;
		3054
		3055	new = kmem_cache_create_memcg(memcg, name, s->object_size, s->align,
		3056	(s->flags & ~SLAB_PANIC), s->ctor);
		3057
		3058	kfree(name);
		3059	return new;
		3060	}
		3061
		3062	/*
		3063	* This lock protects updaters, not readers. We want readers to be as fast as
		3064	* they can, and they will either see NULL or a valid cache value. Our model
		3065	* allow them to see NULL, in which case the root memcg will be selected.
		3066	*
		3067	* We need this lock because multiple allocations to the same cache from a non
		3068	* will span more than one worker. Only one of them can create the cache.
		3069	*/
		3070	static DEFINE_MUTEX(memcg_cache_mutex);
		3071	static struct kmem_cache memcg_create_kmem_cache(struct mem_cgroup memcg,
		3072	struct kmem_cache *cachep)
		3073	{
		3074	struct kmem_cache *new_cachep;
		3075	int idx;
		3076
		3077	BUG_ON(!memcg_can_account_kmem(memcg));
		3078
		3079	idx = memcg_cache_id(memcg);
		3080
		3081	mutex_lock(&memcg_cache_mutex);
		3082	new_cachep = cachep->memcg_params->memcg_caches[idx];
		3083	if (new_cachep)
		3084	goto out;
		3085
		3086	new_cachep = kmem_cache_dup(memcg, cachep);
		3087
		3088	if (new_cachep == NULL) {
		3089	new_cachep = cachep;
		3090	goto out;
		3091	}
		3092
		3093	mem_cgroup_get(memcg);
		3094	new_cachep->memcg_params->root_cache = cachep;
		3095
		3096	cachep->memcg_params->memcg_caches[idx] = new_cachep;
		3097	/*
		3098	* the readers won't lock, make sure everybody sees the updated value,
		3099	* so they won't put stuff in the queue again for no reason
		3100	*/
		3101	wmb();
		3102	out:
		3103	mutex_unlock(&memcg_cache_mutex);
		3104	return new_cachep;
		3105	}
		3106
		3107	struct create_work {
		3108	struct mem_cgroup *memcg;
		3109	struct kmem_cache *cachep;
		3110	struct work_struct work;
		3111	};
		3112
		3113	static void memcg_create_cache_work_func(struct work_struct *w)
		3114	{
		3115	struct create_work *cw;
		3116
		3117	cw = container_of(w, struct create_work, work);
		3118	memcg_create_kmem_cache(cw->memcg, cw->cachep);
		3119	/* Drop the reference gotten when we enqueued. */
		3120	css_put(&cw->memcg->css);
		3121	kfree(cw);
		3122	}
		3123
		3124	/*
		3125	* Enqueue the creation of a per-memcg kmem_cache.
		3126	* Called with rcu_read_lock.
		3127	*/
		3128	static void memcg_create_cache_enqueue(struct mem_cgroup *memcg,
		3129	struct kmem_cache *cachep)
		3130	{
		3131	struct create_work *cw;
		3132
		3133	cw = kmalloc(sizeof(struct create_work), GFP_NOWAIT);
		3134	if (cw == NULL)
		3135	return;
		3136
		3137	/* The corresponding put will be done in the workqueue. */
		3138	if (!css_tryget(&memcg->css)) {
		3139	kfree(cw);
		3140	return;
		3141	}
		3142
		3143	cw->memcg = memcg;
		3144	cw->cachep = cachep;
		3145
		3146	INIT_WORK(&cw->work, memcg_create_cache_work_func);
		3147	schedule_work(&cw->work);
		3148	}
		3149
		3150	/*
		3151	* Return the kmem_cache we're supposed to use for a slab allocation.
		3152	* We try to use the current memcg's version of the cache.
		3153	*
		3154	* If the cache does not exist yet, if we are the first user of it,
		3155	* we either create it immediately, if possible, or create it asynchronously
		3156	* in a workqueue.
		3157	* In the latter case, we will let the current allocation go through with
		3158	* the original cache.
		3159	*
		3160	* Can't be called in interrupt context or from kernel threads.
		3161	* This function needs to be called with rcu_read_lock() held.
		3162	*/
		3163	struct kmem_cache __memcg_kmem_get_cache(struct kmem_cache cachep,
		3164	gfp_t gfp)
		3165	{
		3166	struct mem_cgroup *memcg;
		3167	int idx;
		3168
		3169	VM_BUG_ON(!cachep->memcg_params);
		3170	VM_BUG_ON(!cachep->memcg_params->is_root_cache);
		3171
		3172	rcu_read_lock();
		3173	memcg = mem_cgroup_from_task(rcu_dereference(current->mm->owner));
		3174	rcu_read_unlock();
		3175
		3176	if (!memcg_can_account_kmem(memcg))
		3177	return cachep;
		3178
		3179	idx = memcg_cache_id(memcg);
		3180
		3181	/*
		3182	* barrier to mare sure we're always seeing the up to date value. The
		3183	* code updating memcg_caches will issue a write barrier to match this.
		3184	*/
		3185	read_barrier_depends();
		3186	if (unlikely(cachep->memcg_params->memcg_caches[idx] == NULL)) {
		3187	/*
		3188	* If we are in a safe context (can wait, and not in interrupt
		3189	* context), we could be be predictable and return right away.
		3190	* This would guarantee that the allocation being performed
		3191	* already belongs in the new cache.
		3192	*
		3193	* However, there are some clashes that can arrive from locking.
		3194	* For instance, because we acquire the slab_mutex while doing
		3195	* kmem_cache_dup, this means no further allocation could happen
		3196	* with the slab_mutex held.
		3197	*
		3198	* Also, because cache creation issue get_online_cpus(), this
		3199	* creates a lock chain: memcg_slab_mutex -> cpu_hotplug_mutex,
		3200	* that ends up reversed during cpu hotplug. (cpuset allocates
		3201	* a bunch of GFP_KERNEL memory during cpuup). Due to all that,
		3202	* better to defer everything.
		3203	*/
		3204	memcg_create_cache_enqueue(memcg, cachep);
		3205	return cachep;
		3206	}
		3207
		3208	return cachep->memcg_params->memcg_caches[idx];
		3209	}
		3210	EXPORT_SYMBOL(__memcg_kmem_get_cache);
		3211
2995	/*	3212	/*
2996	* We need to verify if the allocation against current->mm->owner's memcg is	3213	* We need to verify if the allocation against current->mm->owner's memcg is
2997	* possible for the given order. But the page is not allocated yet, so we'll	3214	* possible for the given order. But the page is not allocated yet, so we'll