aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/memcontrol.h1
-rw-r--r--include/linux/slab.h2
-rw-r--r--mm/memcontrol.c63
-rw-r--r--mm/slab.h7
4 files changed, 4 insertions, 69 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 5155d09e749d..087a45314181 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -509,7 +509,6 @@ __memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp);
509int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size); 509int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size);
510void memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size); 510void memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size);
511 511
512void mem_cgroup_destroy_cache(struct kmem_cache *cachep);
513int __kmem_cache_destroy_memcg_children(struct kmem_cache *s); 512int __kmem_cache_destroy_memcg_children(struct kmem_cache *s);
514 513
515/** 514/**
diff --git a/include/linux/slab.h b/include/linux/slab.h
index a6aab2c0dfc5..905541dd3778 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -524,7 +524,6 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
524 * @memcg: pointer to the memcg this cache belongs to 524 * @memcg: pointer to the memcg this cache belongs to
525 * @list: list_head for the list of all caches in this memcg 525 * @list: list_head for the list of all caches in this memcg
526 * @root_cache: pointer to the global, root cache, this cache was derived from 526 * @root_cache: pointer to the global, root cache, this cache was derived from
527 * @dead: set to true after the memcg dies; the cache may still be around.
528 * @nr_pages: number of pages that belongs to this cache. 527 * @nr_pages: number of pages that belongs to this cache.
529 * @destroy: worker to be called whenever we are ready, or believe we may be 528 * @destroy: worker to be called whenever we are ready, or believe we may be
530 * ready, to destroy this cache. 529 * ready, to destroy this cache.
@@ -540,7 +539,6 @@ struct memcg_cache_params {
540 struct mem_cgroup *memcg; 539 struct mem_cgroup *memcg;
541 struct list_head list; 540 struct list_head list;
542 struct kmem_cache *root_cache; 541 struct kmem_cache *root_cache;
543 bool dead;
544 atomic_t nr_pages; 542 atomic_t nr_pages;
545 struct work_struct destroy; 543 struct work_struct destroy;
546 }; 544 };
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9f4ff49c6add..6b1c45ced733 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3277,60 +3277,11 @@ static void kmem_cache_destroy_work_func(struct work_struct *w)
3277 3277
3278 cachep = memcg_params_to_cache(p); 3278 cachep = memcg_params_to_cache(p);
3279 3279
3280 /* 3280 kmem_cache_shrink(cachep);
3281 * If we get down to 0 after shrink, we could delete right away. 3281 if (atomic_read(&cachep->memcg_params->nr_pages) == 0)
3282 * However, memcg_release_pages() already puts us back in the workqueue
3283 * in that case. If we proceed deleting, we'll get a dangling
3284 * reference, and removing the object from the workqueue in that case
3285 * is unnecessary complication. We are not a fast path.
3286 *
3287 * Note that this case is fundamentally different from racing with
3288 * shrink_slab(): if memcg_cgroup_destroy_cache() is called in
3289 * kmem_cache_shrink, not only we would be reinserting a dead cache
3290 * into the queue, but doing so from inside the worker racing to
3291 * destroy it.
3292 *
3293 * So if we aren't down to zero, we'll just schedule a worker and try
3294 * again
3295 */
3296 if (atomic_read(&cachep->memcg_params->nr_pages) != 0)
3297 kmem_cache_shrink(cachep);
3298 else
3299 kmem_cache_destroy(cachep); 3282 kmem_cache_destroy(cachep);
3300} 3283}
3301 3284
3302void mem_cgroup_destroy_cache(struct kmem_cache *cachep)
3303{
3304 if (!cachep->memcg_params->dead)
3305 return;
3306
3307 /*
3308 * There are many ways in which we can get here.
3309 *
3310 * We can get to a memory-pressure situation while the delayed work is
3311 * still pending to run. The vmscan shrinkers can then release all
3312 * cache memory and get us to destruction. If this is the case, we'll
3313 * be executed twice, which is a bug (the second time will execute over
3314 * bogus data). In this case, cancelling the work should be fine.
3315 *
3316 * But we can also get here from the worker itself, if
3317 * kmem_cache_shrink is enough to shake all the remaining objects and
3318 * get the page count to 0. In this case, we'll deadlock if we try to
3319 * cancel the work (the worker runs with an internal lock held, which
3320 * is the same lock we would hold for cancel_work_sync().)
3321 *
3322 * Since we can't possibly know who got us here, just refrain from
3323 * running if there is already work pending
3324 */
3325 if (work_pending(&cachep->memcg_params->destroy))
3326 return;
3327 /*
3328 * We have to defer the actual destroying to a workqueue, because
3329 * we might currently be in a context that cannot sleep.
3330 */
3331 schedule_work(&cachep->memcg_params->destroy);
3332}
3333
3334int __kmem_cache_destroy_memcg_children(struct kmem_cache *s) 3285int __kmem_cache_destroy_memcg_children(struct kmem_cache *s)
3335{ 3286{
3336 struct kmem_cache *c; 3287 struct kmem_cache *c;
@@ -3356,16 +3307,7 @@ int __kmem_cache_destroy_memcg_children(struct kmem_cache *s)
3356 * We will now manually delete the caches, so to avoid races 3307 * We will now manually delete the caches, so to avoid races
3357 * we need to cancel all pending destruction workers and 3308 * we need to cancel all pending destruction workers and
3358 * proceed with destruction ourselves. 3309 * proceed with destruction ourselves.
3359 *
3360 * kmem_cache_destroy() will call kmem_cache_shrink internally,
3361 * and that could spawn the workers again: it is likely that
3362 * the cache still have active pages until this very moment.
3363 * This would lead us back to mem_cgroup_destroy_cache.
3364 *
3365 * But that will not execute at all if the "dead" flag is not
3366 * set, so flip it down to guarantee we are in control.
3367 */ 3310 */
3368 c->memcg_params->dead = false;
3369 cancel_work_sync(&c->memcg_params->destroy); 3311 cancel_work_sync(&c->memcg_params->destroy);
3370 kmem_cache_destroy(c); 3312 kmem_cache_destroy(c);
3371 3313
@@ -3387,7 +3329,6 @@ static void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg)
3387 mutex_lock(&memcg->slab_caches_mutex); 3329 mutex_lock(&memcg->slab_caches_mutex);
3388 list_for_each_entry(params, &memcg->memcg_slab_caches, list) { 3330 list_for_each_entry(params, &memcg->memcg_slab_caches, list) {
3389 cachep = memcg_params_to_cache(params); 3331 cachep = memcg_params_to_cache(params);
3390 cachep->memcg_params->dead = true;
3391 schedule_work(&cachep->memcg_params->destroy); 3332 schedule_work(&cachep->memcg_params->destroy);
3392 } 3333 }
3393 mutex_unlock(&memcg->slab_caches_mutex); 3334 mutex_unlock(&memcg->slab_caches_mutex);
diff --git a/mm/slab.h b/mm/slab.h
index d85d59803d5f..b59447ac4533 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -129,11 +129,8 @@ static inline void memcg_bind_pages(struct kmem_cache *s, int order)
129 129
130static inline void memcg_release_pages(struct kmem_cache *s, int order) 130static inline void memcg_release_pages(struct kmem_cache *s, int order)
131{ 131{
132 if (is_root_cache(s)) 132 if (!is_root_cache(s))
133 return; 133 atomic_sub(1 << order, &s->memcg_params->nr_pages);
134
135 if (atomic_sub_and_test((1 << order), &s->memcg_params->nr_pages))
136 mem_cgroup_destroy_cache(s);
137} 134}
138 135
139static inline bool slab_equal_or_root(struct kmem_cache *s, 136static inline bool slab_equal_or_root(struct kmem_cache *s,