diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/memcontrol.c | 217 |
1 files changed, 217 insertions, 0 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index db38b60e5f8..efd26620a60 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -588,7 +588,14 @@ static int memcg_limited_groups_array_size; | |||
588 | #define MEMCG_CACHES_MIN_SIZE 4 | 588 | #define MEMCG_CACHES_MIN_SIZE 4 |
589 | #define MEMCG_CACHES_MAX_SIZE 65535 | 589 | #define MEMCG_CACHES_MAX_SIZE 65535 |
590 | 590 | ||
591 | /* | ||
592 | * A lot of the calls to the cache allocation functions are expected to be | ||
593 | * inlined by the compiler. Since the calls to memcg_kmem_get_cache are | ||
594 | * conditional to this static branch, we'll have to allow modules that does | ||
595 | * kmem_cache_alloc and the such to see this symbol as well | ||
596 | */ | ||
591 | struct static_key memcg_kmem_enabled_key; | 597 | struct static_key memcg_kmem_enabled_key; |
598 | EXPORT_SYMBOL(memcg_kmem_enabled_key); | ||
592 | 599 | ||
593 | static void disarm_kmem_keys(struct mem_cgroup *memcg) | 600 | static void disarm_kmem_keys(struct mem_cgroup *memcg) |
594 | { | 601 | { |
@@ -2989,9 +2996,219 @@ int memcg_register_cache(struct mem_cgroup *memcg, struct kmem_cache *s) | |||
2989 | 2996 | ||
2990 | void memcg_release_cache(struct kmem_cache *s) | 2997 | void memcg_release_cache(struct kmem_cache *s) |
2991 | { | 2998 | { |
2999 | struct kmem_cache *root; | ||
3000 | struct mem_cgroup *memcg; | ||
3001 | int id; | ||
3002 | |||
3003 | /* | ||
3004 | * This happens, for instance, when a root cache goes away before we | ||
3005 | * add any memcg. | ||
3006 | */ | ||
3007 | if (!s->memcg_params) | ||
3008 | return; | ||
3009 | |||
3010 | if (s->memcg_params->is_root_cache) | ||
3011 | goto out; | ||
3012 | |||
3013 | memcg = s->memcg_params->memcg; | ||
3014 | id = memcg_cache_id(memcg); | ||
3015 | |||
3016 | root = s->memcg_params->root_cache; | ||
3017 | root->memcg_params->memcg_caches[id] = NULL; | ||
3018 | mem_cgroup_put(memcg); | ||
3019 | |||
3020 | mutex_lock(&memcg->slab_caches_mutex); | ||
3021 | list_del(&s->memcg_params->list); | ||
3022 | mutex_unlock(&memcg->slab_caches_mutex); | ||
3023 | |||
3024 | out: | ||
2992 | kfree(s->memcg_params); | 3025 | kfree(s->memcg_params); |
2993 | } | 3026 | } |
2994 | 3027 | ||
3028 | static char *memcg_cache_name(struct mem_cgroup *memcg, struct kmem_cache *s) | ||
3029 | { | ||
3030 | char *name; | ||
3031 | struct dentry *dentry; | ||
3032 | |||
3033 | rcu_read_lock(); | ||
3034 | dentry = rcu_dereference(memcg->css.cgroup->dentry); | ||
3035 | rcu_read_unlock(); | ||
3036 | |||
3037 | BUG_ON(dentry == NULL); | ||
3038 | |||
3039 | name = kasprintf(GFP_KERNEL, "%s(%d:%s)", s->name, | ||
3040 | memcg_cache_id(memcg), dentry->d_name.name); | ||
3041 | |||
3042 | return name; | ||
3043 | } | ||
3044 | |||
3045 | static struct kmem_cache *kmem_cache_dup(struct mem_cgroup *memcg, | ||
3046 | struct kmem_cache *s) | ||
3047 | { | ||
3048 | char *name; | ||
3049 | struct kmem_cache *new; | ||
3050 | |||
3051 | name = memcg_cache_name(memcg, s); | ||
3052 | if (!name) | ||
3053 | return NULL; | ||
3054 | |||
3055 | new = kmem_cache_create_memcg(memcg, name, s->object_size, s->align, | ||
3056 | (s->flags & ~SLAB_PANIC), s->ctor); | ||
3057 | |||
3058 | kfree(name); | ||
3059 | return new; | ||
3060 | } | ||
3061 | |||
3062 | /* | ||
3063 | * This lock protects updaters, not readers. We want readers to be as fast as | ||
3064 | * they can, and they will either see NULL or a valid cache value. Our model | ||
3065 | * allow them to see NULL, in which case the root memcg will be selected. | ||
3066 | * | ||
3067 | * We need this lock because multiple allocations to the same cache from a non | ||
3068 | * will span more than one worker. Only one of them can create the cache. | ||
3069 | */ | ||
3070 | static DEFINE_MUTEX(memcg_cache_mutex); | ||
3071 | static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg, | ||
3072 | struct kmem_cache *cachep) | ||
3073 | { | ||
3074 | struct kmem_cache *new_cachep; | ||
3075 | int idx; | ||
3076 | |||
3077 | BUG_ON(!memcg_can_account_kmem(memcg)); | ||
3078 | |||
3079 | idx = memcg_cache_id(memcg); | ||
3080 | |||
3081 | mutex_lock(&memcg_cache_mutex); | ||
3082 | new_cachep = cachep->memcg_params->memcg_caches[idx]; | ||
3083 | if (new_cachep) | ||
3084 | goto out; | ||
3085 | |||
3086 | new_cachep = kmem_cache_dup(memcg, cachep); | ||
3087 | |||
3088 | if (new_cachep == NULL) { | ||
3089 | new_cachep = cachep; | ||
3090 | goto out; | ||
3091 | } | ||
3092 | |||
3093 | mem_cgroup_get(memcg); | ||
3094 | new_cachep->memcg_params->root_cache = cachep; | ||
3095 | |||
3096 | cachep->memcg_params->memcg_caches[idx] = new_cachep; | ||
3097 | /* | ||
3098 | * the readers won't lock, make sure everybody sees the updated value, | ||
3099 | * so they won't put stuff in the queue again for no reason | ||
3100 | */ | ||
3101 | wmb(); | ||
3102 | out: | ||
3103 | mutex_unlock(&memcg_cache_mutex); | ||
3104 | return new_cachep; | ||
3105 | } | ||
3106 | |||
3107 | struct create_work { | ||
3108 | struct mem_cgroup *memcg; | ||
3109 | struct kmem_cache *cachep; | ||
3110 | struct work_struct work; | ||
3111 | }; | ||
3112 | |||
3113 | static void memcg_create_cache_work_func(struct work_struct *w) | ||
3114 | { | ||
3115 | struct create_work *cw; | ||
3116 | |||
3117 | cw = container_of(w, struct create_work, work); | ||
3118 | memcg_create_kmem_cache(cw->memcg, cw->cachep); | ||
3119 | /* Drop the reference gotten when we enqueued. */ | ||
3120 | css_put(&cw->memcg->css); | ||
3121 | kfree(cw); | ||
3122 | } | ||
3123 | |||
3124 | /* | ||
3125 | * Enqueue the creation of a per-memcg kmem_cache. | ||
3126 | * Called with rcu_read_lock. | ||
3127 | */ | ||
3128 | static void memcg_create_cache_enqueue(struct mem_cgroup *memcg, | ||
3129 | struct kmem_cache *cachep) | ||
3130 | { | ||
3131 | struct create_work *cw; | ||
3132 | |||
3133 | cw = kmalloc(sizeof(struct create_work), GFP_NOWAIT); | ||
3134 | if (cw == NULL) | ||
3135 | return; | ||
3136 | |||
3137 | /* The corresponding put will be done in the workqueue. */ | ||
3138 | if (!css_tryget(&memcg->css)) { | ||
3139 | kfree(cw); | ||
3140 | return; | ||
3141 | } | ||
3142 | |||
3143 | cw->memcg = memcg; | ||
3144 | cw->cachep = cachep; | ||
3145 | |||
3146 | INIT_WORK(&cw->work, memcg_create_cache_work_func); | ||
3147 | schedule_work(&cw->work); | ||
3148 | } | ||
3149 | |||
3150 | /* | ||
3151 | * Return the kmem_cache we're supposed to use for a slab allocation. | ||
3152 | * We try to use the current memcg's version of the cache. | ||
3153 | * | ||
3154 | * If the cache does not exist yet, if we are the first user of it, | ||
3155 | * we either create it immediately, if possible, or create it asynchronously | ||
3156 | * in a workqueue. | ||
3157 | * In the latter case, we will let the current allocation go through with | ||
3158 | * the original cache. | ||
3159 | * | ||
3160 | * Can't be called in interrupt context or from kernel threads. | ||
3161 | * This function needs to be called with rcu_read_lock() held. | ||
3162 | */ | ||
3163 | struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep, | ||
3164 | gfp_t gfp) | ||
3165 | { | ||
3166 | struct mem_cgroup *memcg; | ||
3167 | int idx; | ||
3168 | |||
3169 | VM_BUG_ON(!cachep->memcg_params); | ||
3170 | VM_BUG_ON(!cachep->memcg_params->is_root_cache); | ||
3171 | |||
3172 | rcu_read_lock(); | ||
3173 | memcg = mem_cgroup_from_task(rcu_dereference(current->mm->owner)); | ||
3174 | rcu_read_unlock(); | ||
3175 | |||
3176 | if (!memcg_can_account_kmem(memcg)) | ||
3177 | return cachep; | ||
3178 | |||
3179 | idx = memcg_cache_id(memcg); | ||
3180 | |||
3181 | /* | ||
3182 | * barrier to mare sure we're always seeing the up to date value. The | ||
3183 | * code updating memcg_caches will issue a write barrier to match this. | ||
3184 | */ | ||
3185 | read_barrier_depends(); | ||
3186 | if (unlikely(cachep->memcg_params->memcg_caches[idx] == NULL)) { | ||
3187 | /* | ||
3188 | * If we are in a safe context (can wait, and not in interrupt | ||
3189 | * context), we could be be predictable and return right away. | ||
3190 | * This would guarantee that the allocation being performed | ||
3191 | * already belongs in the new cache. | ||
3192 | * | ||
3193 | * However, there are some clashes that can arrive from locking. | ||
3194 | * For instance, because we acquire the slab_mutex while doing | ||
3195 | * kmem_cache_dup, this means no further allocation could happen | ||
3196 | * with the slab_mutex held. | ||
3197 | * | ||
3198 | * Also, because cache creation issue get_online_cpus(), this | ||
3199 | * creates a lock chain: memcg_slab_mutex -> cpu_hotplug_mutex, | ||
3200 | * that ends up reversed during cpu hotplug. (cpuset allocates | ||
3201 | * a bunch of GFP_KERNEL memory during cpuup). Due to all that, | ||
3202 | * better to defer everything. | ||
3203 | */ | ||
3204 | memcg_create_cache_enqueue(memcg, cachep); | ||
3205 | return cachep; | ||
3206 | } | ||
3207 | |||
3208 | return cachep->memcg_params->memcg_caches[idx]; | ||
3209 | } | ||
3210 | EXPORT_SYMBOL(__memcg_kmem_get_cache); | ||
3211 | |||
2995 | /* | 3212 | /* |
2996 | * We need to verify if the allocation against current->mm->owner's memcg is | 3213 | * We need to verify if the allocation against current->mm->owner's memcg is |
2997 | * possible for the given order. But the page is not allocated yet, so we'll | 3214 | * possible for the given order. But the page is not allocated yet, so we'll |