aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGlauber Costa <glommer@parallels.com>2012-12-18 17:22:59 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-12-18 18:02:14 -0500
commit22933152934f30de6f05b600c03f8a08f853a8d2 (patch)
tree1abc838ffd9a130d25a493091dfe631145feea26
parent7cf2798240a2a2230cb16a391beef98d8a7ad362 (diff)
memcg/sl[au]b: shrink dead caches
This means that when we destroy a memcg cache that happened to be empty, those caches may take a lot of time to go away: removing the memcg reference won't destroy them - because there are pending references, and the empty pages will stay there, until a shrinker is called upon for any reason. In this patch, we will call kmem_cache_shrink() for all dead caches that cannot be destroyed because of remaining pages. After shrinking, it is possible that it could be freed. If this is not the case, we'll schedule a lazy worker to keep trying. Signed-off-by: Glauber Costa <glommer@parallels.com> Cc: Christoph Lameter <cl@linux.com> Cc: David Rientjes <rientjes@google.com> Cc: Frederic Weisbecker <fweisbec@redhat.com> Cc: Greg Thelen <gthelen@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: JoonSoo Kim <js1304@gmail.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Michal Hocko <mhocko@suse.cz> Cc: Pekka Enberg <penberg@cs.helsinki.fi> Cc: Rik van Riel <riel@redhat.com> Cc: Suleiman Souhlal <suleiman@google.com> Cc: Tejun Heo <tj@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/memcontrol.c46
1 files changed, 43 insertions, 3 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 4b68ec2c8df6..7633e0d429e0 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3080,7 +3080,27 @@ static void kmem_cache_destroy_work_func(struct work_struct *w)
3080 3080
3081 cachep = memcg_params_to_cache(p); 3081 cachep = memcg_params_to_cache(p);
3082 3082
3083 if (!atomic_read(&cachep->memcg_params->nr_pages)) 3083 /*
3084 * If we get down to 0 after shrink, we could delete right away.
3085 * However, memcg_release_pages() already puts us back in the workqueue
3086 * in that case. If we proceed deleting, we'll get a dangling
3087 * reference, and removing the object from the workqueue in that case
3088 * is unnecessary complication. We are not a fast path.
3089 *
3090 * Note that this case is fundamentally different from racing with
3091 * shrink_slab(): if memcg_cgroup_destroy_cache() is called in
3092 * kmem_cache_shrink, not only we would be reinserting a dead cache
3093 * into the queue, but doing so from inside the worker racing to
3094 * destroy it.
3095 *
3096 * So if we aren't down to zero, we'll just schedule a worker and try
3097 * again
3098 */
3099 if (atomic_read(&cachep->memcg_params->nr_pages) != 0) {
3100 kmem_cache_shrink(cachep);
3101 if (atomic_read(&cachep->memcg_params->nr_pages) == 0)
3102 return;
3103 } else
3084 kmem_cache_destroy(cachep); 3104 kmem_cache_destroy(cachep);
3085} 3105}
3086 3106
@@ -3090,6 +3110,26 @@ void mem_cgroup_destroy_cache(struct kmem_cache *cachep)
3090 return; 3110 return;
3091 3111
3092 /* 3112 /*
3113 * There are many ways in which we can get here.
3114 *
3115 * We can get to a memory-pressure situation while the delayed work is
3116 * still pending to run. The vmscan shrinkers can then release all
3117 * cache memory and get us to destruction. If this is the case, we'll
3118 * be executed twice, which is a bug (the second time will execute over
3119 * bogus data). In this case, cancelling the work should be fine.
3120 *
3121 * But we can also get here from the worker itself, if
3122 * kmem_cache_shrink is enough to shake all the remaining objects and
3123 * get the page count to 0. In this case, we'll deadlock if we try to
3124 * cancel the work (the worker runs with an internal lock held, which
3125 * is the same lock we would hold for cancel_work_sync().)
3126 *
3127 * Since we can't possibly know who got us here, just refrain from
3128 * running if there is already work pending
3129 */
3130 if (work_pending(&cachep->memcg_params->destroy))
3131 return;
3132 /*
3093 * We have to defer the actual destroying to a workqueue, because 3133 * We have to defer the actual destroying to a workqueue, because
3094 * we might currently be in a context that cannot sleep. 3134 * we might currently be in a context that cannot sleep.
3095 */ 3135 */
@@ -3217,7 +3257,7 @@ void kmem_cache_destroy_memcg_children(struct kmem_cache *s)
3217 * set, so flip it down to guarantee we are in control. 3257 * set, so flip it down to guarantee we are in control.
3218 */ 3258 */
3219 c->memcg_params->dead = false; 3259 c->memcg_params->dead = false;
3220 cancel_delayed_work_sync(&c->memcg_params->destroy); 3260 cancel_work_sync(&c->memcg_params->destroy);
3221 kmem_cache_destroy(c); 3261 kmem_cache_destroy(c);
3222 } 3262 }
3223 mutex_unlock(&set_limit_mutex); 3263 mutex_unlock(&set_limit_mutex);
@@ -3242,7 +3282,7 @@ static void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg)
3242 cachep = memcg_params_to_cache(params); 3282 cachep = memcg_params_to_cache(params);
3243 cachep->memcg_params->dead = true; 3283 cachep->memcg_params->dead = true;
3244 INIT_WORK(&cachep->memcg_params->destroy, 3284 INIT_WORK(&cachep->memcg_params->destroy,
3245 kmem_cache_destroy_work_func); 3285 kmem_cache_destroy_work_func);
3246 schedule_work(&cachep->memcg_params->destroy); 3286 schedule_work(&cachep->memcg_params->destroy);
3247 } 3287 }
3248 mutex_unlock(&memcg->slab_caches_mutex); 3288 mutex_unlock(&memcg->slab_caches_mutex);