aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorVladimir Davydov <vdavydov@parallels.com>2014-06-04 19:07:40 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-06-04 19:54:01 -0400
commitbd67314586a3d5725e60f2f6587b4cb0f659bb67 (patch)
tree581a51f3f04f18bf3e8b906fa2d6f440ccd12055 /mm
parentc67a8a685a6e9abbaf0235e084168f15a721ae39 (diff)
memcg, slab: simplify synchronization scheme
At present, we have the following mutexes protecting data related to per memcg kmem caches: - slab_mutex. This one is held during the whole kmem cache creation and destruction paths. We also take it when updating per root cache memcg_caches arrays (see memcg_update_all_caches). As a result, taking it guarantees there will be no changes to any kmem cache (including per memcg). Why do we need something else then? The point is it is private to slab implementation and has some internal dependencies with other mutexes (get_online_cpus). So we just don't want to rely upon it and prefer to introduce additional mutexes instead. - activate_kmem_mutex. Initially it was added to synchronize initializing kmem limit (memcg_activate_kmem). However, since we can grow per root cache memcg_caches arrays only on kmem limit initialization (see memcg_update_all_caches), we also employ it to protect against memcg_caches arrays relocation (e.g. see __kmem_cache_destroy_memcg_children). - We have a convention not to take slab_mutex in memcontrol.c, but we want to walk over per memcg memcg_slab_caches lists there (e.g. for destroying all memcg caches on offline). So we have per memcg slab_caches_mutex's protecting those lists. The mutexes are taken in the following order: activate_kmem_mutex -> slab_mutex -> memcg::slab_caches_mutex Such a syncrhonization scheme has a number of flaws, for instance: - We can't call kmem_cache_{destroy,shrink} while walking over a memcg::memcg_slab_caches list due to locking order. As a result, in mem_cgroup_destroy_all_caches we schedule the memcg_cache_params::destroy work shrinking and destroying the cache. - We don't have a mutex to synchronize per memcg caches destruction between memcg offline (mem_cgroup_destroy_all_caches) and root cache destruction (__kmem_cache_destroy_memcg_children). Currently we just don't bother about it. This patch simplifies it by substituting per memcg slab_caches_mutex's with the global memcg_slab_mutex. It will be held whenever a new per memcg cache is created or destroyed, so it protects per root cache memcg_caches arrays and per memcg memcg_slab_caches lists. The locking order is following: activate_kmem_mutex -> memcg_slab_mutex -> slab_mutex This allows us to call kmem_cache_{create,shrink,destroy} under the memcg_slab_mutex. As a result, we don't need memcg_cache_params::destroy work any more - we can simply destroy caches while iterating over a per memcg slab caches list. Also using the global mutex simplifies synchronization between concurrent per memcg caches creation/destruction, e.g. mem_cgroup_destroy_all_caches vs __kmem_cache_destroy_memcg_children. The downside of this is that we substitute per-memcg slab_caches_mutex's with a hummer-like global mutex, but since we already take either the slab_mutex or the cgroup_mutex along with a memcg::slab_caches_mutex, it shouldn't hurt concurrency a lot. Signed-off-by: Vladimir Davydov <vdavydov@parallels.com> Acked-by: Johannes Weiner <hannes@cmpxchg.org> Cc: Michal Hocko <mhocko@suse.cz> Cc: Glauber Costa <glommer@gmail.com> Cc: Pekka Enberg <penberg@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/memcontrol.c150
-rw-r--r--mm/slab_common.c23
2 files changed, 67 insertions, 106 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 86a2078805e5..6b448881422b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -357,10 +357,9 @@ struct mem_cgroup {
357 struct cg_proto tcp_mem; 357 struct cg_proto tcp_mem;
358#endif 358#endif
359#if defined(CONFIG_MEMCG_KMEM) 359#if defined(CONFIG_MEMCG_KMEM)
360 /* analogous to slab_common's slab_caches list. per-memcg */ 360 /* analogous to slab_common's slab_caches list, but per-memcg;
361 * protected by memcg_slab_mutex */
361 struct list_head memcg_slab_caches; 362 struct list_head memcg_slab_caches;
362 /* Not a spinlock, we can take a lot of time walking the list */
363 struct mutex slab_caches_mutex;
364 /* Index in the kmem_cache->memcg_params->memcg_caches array */ 363 /* Index in the kmem_cache->memcg_params->memcg_caches array */
365 int kmemcg_id; 364 int kmemcg_id;
366#endif 365#endif
@@ -2913,6 +2912,12 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
2913static DEFINE_MUTEX(set_limit_mutex); 2912static DEFINE_MUTEX(set_limit_mutex);
2914 2913
2915#ifdef CONFIG_MEMCG_KMEM 2914#ifdef CONFIG_MEMCG_KMEM
2915/*
2916 * The memcg_slab_mutex is held whenever a per memcg kmem cache is created or
2917 * destroyed. It protects memcg_caches arrays and memcg_slab_caches lists.
2918 */
2919static DEFINE_MUTEX(memcg_slab_mutex);
2920
2916static DEFINE_MUTEX(activate_kmem_mutex); 2921static DEFINE_MUTEX(activate_kmem_mutex);
2917 2922
2918static inline bool memcg_can_account_kmem(struct mem_cgroup *memcg) 2923static inline bool memcg_can_account_kmem(struct mem_cgroup *memcg)
@@ -2945,10 +2950,10 @@ static int mem_cgroup_slabinfo_read(struct seq_file *m, void *v)
2945 2950
2946 print_slabinfo_header(m); 2951 print_slabinfo_header(m);
2947 2952
2948 mutex_lock(&memcg->slab_caches_mutex); 2953 mutex_lock(&memcg_slab_mutex);
2949 list_for_each_entry(params, &memcg->memcg_slab_caches, list) 2954 list_for_each_entry(params, &memcg->memcg_slab_caches, list)
2950 cache_show(memcg_params_to_cache(params), m); 2955 cache_show(memcg_params_to_cache(params), m);
2951 mutex_unlock(&memcg->slab_caches_mutex); 2956 mutex_unlock(&memcg_slab_mutex);
2952 2957
2953 return 0; 2958 return 0;
2954} 2959}
@@ -3050,8 +3055,6 @@ void memcg_update_array_size(int num)
3050 memcg_limited_groups_array_size = memcg_caches_array_size(num); 3055 memcg_limited_groups_array_size = memcg_caches_array_size(num);
3051} 3056}
3052 3057
3053static void kmem_cache_destroy_work_func(struct work_struct *w);
3054
3055int memcg_update_cache_size(struct kmem_cache *s, int num_groups) 3058int memcg_update_cache_size(struct kmem_cache *s, int num_groups)
3056{ 3059{
3057 struct memcg_cache_params *cur_params = s->memcg_params; 3060 struct memcg_cache_params *cur_params = s->memcg_params;
@@ -3148,8 +3151,6 @@ int memcg_alloc_cache_params(struct mem_cgroup *memcg, struct kmem_cache *s,
3148 if (memcg) { 3151 if (memcg) {
3149 s->memcg_params->memcg = memcg; 3152 s->memcg_params->memcg = memcg;
3150 s->memcg_params->root_cache = root_cache; 3153 s->memcg_params->root_cache = root_cache;
3151 INIT_WORK(&s->memcg_params->destroy,
3152 kmem_cache_destroy_work_func);
3153 css_get(&memcg->css); 3154 css_get(&memcg->css);
3154 } else 3155 } else
3155 s->memcg_params->is_root_cache = true; 3156 s->memcg_params->is_root_cache = true;
@@ -3166,24 +3167,34 @@ void memcg_free_cache_params(struct kmem_cache *s)
3166 kfree(s->memcg_params); 3167 kfree(s->memcg_params);
3167} 3168}
3168 3169
3169void memcg_register_cache(struct kmem_cache *s) 3170static void memcg_kmem_create_cache(struct mem_cgroup *memcg,
3171 struct kmem_cache *root_cache)
3170{ 3172{
3171 struct kmem_cache *root; 3173 struct kmem_cache *cachep;
3172 struct mem_cgroup *memcg;
3173 int id; 3174 int id;
3174 3175
3175 if (is_root_cache(s)) 3176 lockdep_assert_held(&memcg_slab_mutex);
3177
3178 id = memcg_cache_id(memcg);
3179
3180 /*
3181 * Since per-memcg caches are created asynchronously on first
3182 * allocation (see memcg_kmem_get_cache()), several threads can try to
3183 * create the same cache, but only one of them may succeed.
3184 */
3185 if (cache_from_memcg_idx(root_cache, id))
3176 return; 3186 return;
3177 3187
3188 cachep = kmem_cache_create_memcg(memcg, root_cache);
3178 /* 3189 /*
3179 * Holding the slab_mutex assures nobody will touch the memcg_caches 3190 * If we could not create a memcg cache, do not complain, because
3180 * array while we are modifying it. 3191 * that's not critical at all as we can always proceed with the root
3192 * cache.
3181 */ 3193 */
3182 lockdep_assert_held(&slab_mutex); 3194 if (!cachep)
3195 return;
3183 3196
3184 root = s->memcg_params->root_cache; 3197 list_add(&cachep->memcg_params->list, &memcg->memcg_slab_caches);
3185 memcg = s->memcg_params->memcg;
3186 id = memcg_cache_id(memcg);
3187 3198
3188 /* 3199 /*
3189 * Since readers won't lock (see cache_from_memcg_idx()), we need a 3200 * Since readers won't lock (see cache_from_memcg_idx()), we need a
@@ -3192,49 +3203,30 @@ void memcg_register_cache(struct kmem_cache *s)
3192 */ 3203 */
3193 smp_wmb(); 3204 smp_wmb();
3194 3205
3195 /* 3206 BUG_ON(root_cache->memcg_params->memcg_caches[id]);
3196 * Initialize the pointer to this cache in its parent's memcg_params 3207 root_cache->memcg_params->memcg_caches[id] = cachep;
3197 * before adding it to the memcg_slab_caches list, otherwise we can
3198 * fail to convert memcg_params_to_cache() while traversing the list.
3199 */
3200 VM_BUG_ON(root->memcg_params->memcg_caches[id]);
3201 root->memcg_params->memcg_caches[id] = s;
3202
3203 mutex_lock(&memcg->slab_caches_mutex);
3204 list_add(&s->memcg_params->list, &memcg->memcg_slab_caches);
3205 mutex_unlock(&memcg->slab_caches_mutex);
3206} 3208}
3207 3209
3208void memcg_unregister_cache(struct kmem_cache *s) 3210static void memcg_kmem_destroy_cache(struct kmem_cache *cachep)
3209{ 3211{
3210 struct kmem_cache *root; 3212 struct kmem_cache *root_cache;
3211 struct mem_cgroup *memcg; 3213 struct mem_cgroup *memcg;
3212 int id; 3214 int id;
3213 3215
3214 if (is_root_cache(s)) 3216 lockdep_assert_held(&memcg_slab_mutex);
3215 return;
3216 3217
3217 /* 3218 BUG_ON(is_root_cache(cachep));
3218 * Holding the slab_mutex assures nobody will touch the memcg_caches
3219 * array while we are modifying it.
3220 */
3221 lockdep_assert_held(&slab_mutex);
3222 3219
3223 root = s->memcg_params->root_cache; 3220 root_cache = cachep->memcg_params->root_cache;
3224 memcg = s->memcg_params->memcg; 3221 memcg = cachep->memcg_params->memcg;
3225 id = memcg_cache_id(memcg); 3222 id = memcg_cache_id(memcg);
3226 3223
3227 mutex_lock(&memcg->slab_caches_mutex); 3224 BUG_ON(root_cache->memcg_params->memcg_caches[id] != cachep);
3228 list_del(&s->memcg_params->list); 3225 root_cache->memcg_params->memcg_caches[id] = NULL;
3229 mutex_unlock(&memcg->slab_caches_mutex);
3230 3226
3231 /* 3227 list_del(&cachep->memcg_params->list);
3232 * Clear the pointer to this cache in its parent's memcg_params only 3228
3233 * after removing it from the memcg_slab_caches list, otherwise we can 3229 kmem_cache_destroy(cachep);
3234 * fail to convert memcg_params_to_cache() while traversing the list.
3235 */
3236 VM_BUG_ON(root->memcg_params->memcg_caches[id] != s);
3237 root->memcg_params->memcg_caches[id] = NULL;
3238} 3230}
3239 3231
3240/* 3232/*
@@ -3268,70 +3260,42 @@ static inline void memcg_resume_kmem_account(void)
3268 current->memcg_kmem_skip_account--; 3260 current->memcg_kmem_skip_account--;
3269} 3261}
3270 3262
3271static void kmem_cache_destroy_work_func(struct work_struct *w)
3272{
3273 struct kmem_cache *cachep;
3274 struct memcg_cache_params *p;
3275
3276 p = container_of(w, struct memcg_cache_params, destroy);
3277
3278 cachep = memcg_params_to_cache(p);
3279
3280 kmem_cache_shrink(cachep);
3281 if (atomic_read(&cachep->memcg_params->nr_pages) == 0)
3282 kmem_cache_destroy(cachep);
3283}
3284
3285int __kmem_cache_destroy_memcg_children(struct kmem_cache *s) 3263int __kmem_cache_destroy_memcg_children(struct kmem_cache *s)
3286{ 3264{
3287 struct kmem_cache *c; 3265 struct kmem_cache *c;
3288 int i, failed = 0; 3266 int i, failed = 0;
3289 3267
3290 /* 3268 mutex_lock(&memcg_slab_mutex);
3291 * If the cache is being destroyed, we trust that there is no one else
3292 * requesting objects from it. Even if there are, the sanity checks in
3293 * kmem_cache_destroy should caught this ill-case.
3294 *
3295 * Still, we don't want anyone else freeing memcg_caches under our
3296 * noses, which can happen if a new memcg comes to life. As usual,
3297 * we'll take the activate_kmem_mutex to protect ourselves against
3298 * this.
3299 */
3300 mutex_lock(&activate_kmem_mutex);
3301 for_each_memcg_cache_index(i) { 3269 for_each_memcg_cache_index(i) {
3302 c = cache_from_memcg_idx(s, i); 3270 c = cache_from_memcg_idx(s, i);
3303 if (!c) 3271 if (!c)
3304 continue; 3272 continue;
3305 3273
3306 /* 3274 memcg_kmem_destroy_cache(c);
3307 * We will now manually delete the caches, so to avoid races
3308 * we need to cancel all pending destruction workers and
3309 * proceed with destruction ourselves.
3310 */
3311 cancel_work_sync(&c->memcg_params->destroy);
3312 kmem_cache_destroy(c);
3313 3275
3314 if (cache_from_memcg_idx(s, i)) 3276 if (cache_from_memcg_idx(s, i))
3315 failed++; 3277 failed++;
3316 } 3278 }
3317 mutex_unlock(&activate_kmem_mutex); 3279 mutex_unlock(&memcg_slab_mutex);
3318 return failed; 3280 return failed;
3319} 3281}
3320 3282
3321static void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg) 3283static void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg)
3322{ 3284{
3323 struct kmem_cache *cachep; 3285 struct kmem_cache *cachep;
3324 struct memcg_cache_params *params; 3286 struct memcg_cache_params *params, *tmp;
3325 3287
3326 if (!memcg_kmem_is_active(memcg)) 3288 if (!memcg_kmem_is_active(memcg))
3327 return; 3289 return;
3328 3290
3329 mutex_lock(&memcg->slab_caches_mutex); 3291 mutex_lock(&memcg_slab_mutex);
3330 list_for_each_entry(params, &memcg->memcg_slab_caches, list) { 3292 list_for_each_entry_safe(params, tmp, &memcg->memcg_slab_caches, list) {
3331 cachep = memcg_params_to_cache(params); 3293 cachep = memcg_params_to_cache(params);
3332 schedule_work(&cachep->memcg_params->destroy); 3294 kmem_cache_shrink(cachep);
3295 if (atomic_read(&cachep->memcg_params->nr_pages) == 0)
3296 memcg_kmem_destroy_cache(cachep);
3333 } 3297 }
3334 mutex_unlock(&memcg->slab_caches_mutex); 3298 mutex_unlock(&memcg_slab_mutex);
3335} 3299}
3336 3300
3337struct create_work { 3301struct create_work {
@@ -3346,7 +3310,10 @@ static void memcg_create_cache_work_func(struct work_struct *w)
3346 struct mem_cgroup *memcg = cw->memcg; 3310 struct mem_cgroup *memcg = cw->memcg;
3347 struct kmem_cache *cachep = cw->cachep; 3311 struct kmem_cache *cachep = cw->cachep;
3348 3312
3349 kmem_cache_create_memcg(memcg, cachep); 3313 mutex_lock(&memcg_slab_mutex);
3314 memcg_kmem_create_cache(memcg, cachep);
3315 mutex_unlock(&memcg_slab_mutex);
3316
3350 css_put(&memcg->css); 3317 css_put(&memcg->css);
3351 kfree(cw); 3318 kfree(cw);
3352} 3319}
@@ -5022,13 +4989,14 @@ static int __memcg_activate_kmem(struct mem_cgroup *memcg,
5022 * Make sure we have enough space for this cgroup in each root cache's 4989 * Make sure we have enough space for this cgroup in each root cache's
5023 * memcg_params. 4990 * memcg_params.
5024 */ 4991 */
4992 mutex_lock(&memcg_slab_mutex);
5025 err = memcg_update_all_caches(memcg_id + 1); 4993 err = memcg_update_all_caches(memcg_id + 1);
4994 mutex_unlock(&memcg_slab_mutex);
5026 if (err) 4995 if (err)
5027 goto out_rmid; 4996 goto out_rmid;
5028 4997
5029 memcg->kmemcg_id = memcg_id; 4998 memcg->kmemcg_id = memcg_id;
5030 INIT_LIST_HEAD(&memcg->memcg_slab_caches); 4999 INIT_LIST_HEAD(&memcg->memcg_slab_caches);
5031 mutex_init(&memcg->slab_caches_mutex);
5032 5000
5033 /* 5001 /*
5034 * We couldn't have accounted to this cgroup, because it hasn't got the 5002 * We couldn't have accounted to this cgroup, because it hasn't got the
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 2dd920dc3776..7e348cff814d 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -160,7 +160,6 @@ do_kmem_cache_create(char *name, size_t object_size, size_t size, size_t align,
160 160
161 s->refcount = 1; 161 s->refcount = 1;
162 list_add(&s->list, &slab_caches); 162 list_add(&s->list, &slab_caches);
163 memcg_register_cache(s);
164out: 163out:
165 if (err) 164 if (err)
166 return ERR_PTR(err); 165 return ERR_PTR(err);
@@ -270,9 +269,10 @@ EXPORT_SYMBOL(kmem_cache_create);
270 * requests going from @memcg to @root_cache. The new cache inherits properties 269 * requests going from @memcg to @root_cache. The new cache inherits properties
271 * from its parent. 270 * from its parent.
272 */ 271 */
273void kmem_cache_create_memcg(struct mem_cgroup *memcg, struct kmem_cache *root_cache) 272struct kmem_cache *kmem_cache_create_memcg(struct mem_cgroup *memcg,
273 struct kmem_cache *root_cache)
274{ 274{
275 struct kmem_cache *s; 275 struct kmem_cache *s = NULL;
276 char *cache_name; 276 char *cache_name;
277 277
278 get_online_cpus(); 278 get_online_cpus();
@@ -280,14 +280,6 @@ void kmem_cache_create_memcg(struct mem_cgroup *memcg, struct kmem_cache *root_c
280 280
281 mutex_lock(&slab_mutex); 281 mutex_lock(&slab_mutex);
282 282
283 /*
284 * Since per-memcg caches are created asynchronously on first
285 * allocation (see memcg_kmem_get_cache()), several threads can try to
286 * create the same cache, but only one of them may succeed.
287 */
288 if (cache_from_memcg_idx(root_cache, memcg_cache_id(memcg)))
289 goto out_unlock;
290
291 cache_name = memcg_create_cache_name(memcg, root_cache); 283 cache_name = memcg_create_cache_name(memcg, root_cache);
292 if (!cache_name) 284 if (!cache_name)
293 goto out_unlock; 285 goto out_unlock;
@@ -296,14 +288,18 @@ void kmem_cache_create_memcg(struct mem_cgroup *memcg, struct kmem_cache *root_c
296 root_cache->size, root_cache->align, 288 root_cache->size, root_cache->align,
297 root_cache->flags, root_cache->ctor, 289 root_cache->flags, root_cache->ctor,
298 memcg, root_cache); 290 memcg, root_cache);
299 if (IS_ERR(s)) 291 if (IS_ERR(s)) {
300 kfree(cache_name); 292 kfree(cache_name);
293 s = NULL;
294 }
301 295
302out_unlock: 296out_unlock:
303 mutex_unlock(&slab_mutex); 297 mutex_unlock(&slab_mutex);
304 298
305 put_online_mems(); 299 put_online_mems();
306 put_online_cpus(); 300 put_online_cpus();
301
302 return s;
307} 303}
308 304
309static int kmem_cache_destroy_memcg_children(struct kmem_cache *s) 305static int kmem_cache_destroy_memcg_children(struct kmem_cache *s)
@@ -348,11 +344,8 @@ void kmem_cache_destroy(struct kmem_cache *s)
348 goto out_unlock; 344 goto out_unlock;
349 345
350 list_del(&s->list); 346 list_del(&s->list);
351 memcg_unregister_cache(s);
352
353 if (__kmem_cache_shutdown(s) != 0) { 347 if (__kmem_cache_shutdown(s) != 0) {
354 list_add(&s->list, &slab_caches); 348 list_add(&s->list, &slab_caches);
355 memcg_register_cache(s);
356 printk(KERN_ERR "kmem_cache_destroy %s: " 349 printk(KERN_ERR "kmem_cache_destroy %s: "
357 "Slab cache still has objects\n", s->name); 350 "Slab cache still has objects\n", s->name);
358 dump_stack(); 351 dump_stack();