aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/memcontrol.c217
1 files changed, 217 insertions, 0 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index db38b60e5f87..efd26620a60b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -588,7 +588,14 @@ static int memcg_limited_groups_array_size;
588#define MEMCG_CACHES_MIN_SIZE 4 588#define MEMCG_CACHES_MIN_SIZE 4
589#define MEMCG_CACHES_MAX_SIZE 65535 589#define MEMCG_CACHES_MAX_SIZE 65535
590 590
591/*
592 * A lot of the calls to the cache allocation functions are expected to be
593 * inlined by the compiler. Since the calls to memcg_kmem_get_cache are
594 * conditional to this static branch, we'll have to allow modules that does
595 * kmem_cache_alloc and the such to see this symbol as well
596 */
591struct static_key memcg_kmem_enabled_key; 597struct static_key memcg_kmem_enabled_key;
598EXPORT_SYMBOL(memcg_kmem_enabled_key);
592 599
593static void disarm_kmem_keys(struct mem_cgroup *memcg) 600static void disarm_kmem_keys(struct mem_cgroup *memcg)
594{ 601{
@@ -2989,9 +2996,219 @@ int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s)
2989 2996
2990void memcg_release_cache(struct kmem_cache *s) 2997void memcg_release_cache(struct kmem_cache *s)
2991{ 2998{
2999 struct kmem_cache *root;
3000 struct mem_cgroup *memcg;
3001 int id;
3002
3003 /*
3004 * This happens, for instance, when a root cache goes away before we
3005 * add any memcg.
3006 */
3007 if (!s->memcg_params)
3008 return;
3009
3010 if (s->memcg_params->is_root_cache)
3011 goto out;
3012
3013 memcg = s->memcg_params->memcg;
3014 id = memcg_cache_id(memcg);
3015
3016 root = s->memcg_params->root_cache;
3017 root->memcg_params->memcg_caches[id] = NULL;
3018 mem_cgroup_put(memcg);
3019
3020 mutex_lock(&memcg->slab_caches_mutex);
3021 list_del(&s->memcg_params->list);
3022 mutex_unlock(&memcg->slab_caches_mutex);
3023
3024out:
2992 kfree(s->memcg_params); 3025 kfree(s->memcg_params);
2993} 3026}
2994 3027
3028static char *memcg_cache_name(struct mem_cgroup *memcg, struct kmem_cache *s)
3029{
3030 char *name;
3031 struct dentry *dentry;
3032
3033 rcu_read_lock();
3034 dentry = rcu_dereference(memcg->css.cgroup->dentry);
3035 rcu_read_unlock();
3036
3037 BUG_ON(dentry == NULL);
3038
3039 name = kasprintf(GFP_KERNEL, "%s(%d:%s)", s->name,
3040 memcg_cache_id(memcg), dentry->d_name.name);
3041
3042 return name;
3043}
3044
3045static struct kmem_cache *kmem_cache_dup(struct mem_cgroup *memcg,
3046 struct kmem_cache *s)
3047{
3048 char *name;
3049 struct kmem_cache *new;
3050
3051 name = memcg_cache_name(memcg, s);
3052 if (!name)
3053 return NULL;
3054
3055 new = kmem_cache_create_memcg(memcg, name, s->object_size, s->align,
3056 (s->flags & ~SLAB_PANIC), s->ctor);
3057
3058 kfree(name);
3059 return new;
3060}
3061
3062/*
3063 * This lock protects updaters, not readers. We want readers to be as fast as
3064 * they can, and they will either see NULL or a valid cache value. Our model
3065 * allow them to see NULL, in which case the root memcg will be selected.
3066 *
3067 * We need this lock because multiple allocations to the same cache from a non
3068 * will span more than one worker. Only one of them can create the cache.
3069 */
3070static DEFINE_MUTEX(memcg_cache_mutex);
3071static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
3072 struct kmem_cache *cachep)
3073{
3074 struct kmem_cache *new_cachep;
3075 int idx;
3076
3077 BUG_ON(!memcg_can_account_kmem(memcg));
3078
3079 idx = memcg_cache_id(memcg);
3080
3081 mutex_lock(&memcg_cache_mutex);
3082 new_cachep = cachep->memcg_params->memcg_caches[idx];
3083 if (new_cachep)
3084 goto out;
3085
3086 new_cachep = kmem_cache_dup(memcg, cachep);
3087
3088 if (new_cachep == NULL) {
3089 new_cachep = cachep;
3090 goto out;
3091 }
3092
3093 mem_cgroup_get(memcg);
3094 new_cachep->memcg_params->root_cache = cachep;
3095
3096 cachep->memcg_params->memcg_caches[idx] = new_cachep;
3097 /*
3098 * the readers won't lock, make sure everybody sees the updated value,
3099 * so they won't put stuff in the queue again for no reason
3100 */
3101 wmb();
3102out:
3103 mutex_unlock(&memcg_cache_mutex);
3104 return new_cachep;
3105}
3106
3107struct create_work {
3108 struct mem_cgroup *memcg;
3109 struct kmem_cache *cachep;
3110 struct work_struct work;
3111};
3112
3113static void memcg_create_cache_work_func(struct work_struct *w)
3114{
3115 struct create_work *cw;
3116
3117 cw = container_of(w, struct create_work, work);
3118 memcg_create_kmem_cache(cw->memcg, cw->cachep);
3119 /* Drop the reference gotten when we enqueued. */
3120 css_put(&cw->memcg->css);
3121 kfree(cw);
3122}
3123
3124/*
3125 * Enqueue the creation of a per-memcg kmem_cache.
3126 * Called with rcu_read_lock.
3127 */
3128static void memcg_create_cache_enqueue(struct mem_cgroup *memcg,
3129 struct kmem_cache *cachep)
3130{
3131 struct create_work *cw;
3132
3133 cw = kmalloc(sizeof(struct create_work), GFP_NOWAIT);
3134 if (cw == NULL)
3135 return;
3136
3137 /* The corresponding put will be done in the workqueue. */
3138 if (!css_tryget(&memcg->css)) {
3139 kfree(cw);
3140 return;
3141 }
3142
3143 cw->memcg = memcg;
3144 cw->cachep = cachep;
3145
3146 INIT_WORK(&cw->work, memcg_create_cache_work_func);
3147 schedule_work(&cw->work);
3148}
3149
3150/*
3151 * Return the kmem_cache we're supposed to use for a slab allocation.
3152 * We try to use the current memcg's version of the cache.
3153 *
3154 * If the cache does not exist yet, if we are the first user of it,
3155 * we either create it immediately, if possible, or create it asynchronously
3156 * in a workqueue.
3157 * In the latter case, we will let the current allocation go through with
3158 * the original cache.
3159 *
3160 * Can't be called in interrupt context or from kernel threads.
3161 * This function needs to be called with rcu_read_lock() held.
3162 */
3163struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep,
3164 gfp_t gfp)
3165{
3166 struct mem_cgroup *memcg;
3167 int idx;
3168
3169 VM_BUG_ON(!cachep->memcg_params);
3170 VM_BUG_ON(!cachep->memcg_params->is_root_cache);
3171
3172 rcu_read_lock();
3173 memcg = mem_cgroup_from_task(rcu_dereference(current->mm->owner));
3174 rcu_read_unlock();
3175
3176 if (!memcg_can_account_kmem(memcg))
3177 return cachep;
3178
3179 idx = memcg_cache_id(memcg);
3180
3181 /*
3182 * barrier to mare sure we're always seeing the up to date value. The
3183 * code updating memcg_caches will issue a write barrier to match this.
3184 */
3185 read_barrier_depends();
3186 if (unlikely(cachep->memcg_params->memcg_caches[idx] == NULL)) {
3187 /*
3188 * If we are in a safe context (can wait, and not in interrupt
3189 * context), we could be be predictable and return right away.
3190 * This would guarantee that the allocation being performed
3191 * already belongs in the new cache.
3192 *
3193 * However, there are some clashes that can arrive from locking.
3194 * For instance, because we acquire the slab_mutex while doing
3195 * kmem_cache_dup, this means no further allocation could happen
3196 * with the slab_mutex held.
3197 *
3198 * Also, because cache creation issue get_online_cpus(), this
3199 * creates a lock chain: memcg_slab_mutex -> cpu_hotplug_mutex,
3200 * that ends up reversed during cpu hotplug. (cpuset allocates
3201 * a bunch of GFP_KERNEL memory during cpuup). Due to all that,
3202 * better to defer everything.
3203 */
3204 memcg_create_cache_enqueue(memcg, cachep);
3205 return cachep;
3206 }
3207
3208 return cachep->memcg_params->memcg_caches[idx];
3209}
3210EXPORT_SYMBOL(__memcg_kmem_get_cache);
3211
2995/* 3212/*
2996 * We need to verify if the allocation against current->mm->owner's memcg is 3213 * We need to verify if the allocation against current->mm->owner's memcg is
2997 * possible for the given order. But the page is not allocated yet, so we'll 3214 * possible for the given order. But the page is not allocated yet, so we'll