diff options
author | Glauber Costa <glommer@parallels.com> | 2012-12-18 17:22:59 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-12-18 18:02:14 -0500 |
commit | 22933152934f30de6f05b600c03f8a08f853a8d2 (patch) | |
tree | 1abc838ffd9a130d25a493091dfe631145feea26 | |
parent | 7cf2798240a2a2230cb16a391beef98d8a7ad362 (diff) |
memcg/sl[au]b: shrink dead caches
This means that when we destroy a memcg cache that happened to be empty,
those caches may take a lot of time to go away: removing the memcg
reference won't destroy them - because there are pending references, and
the empty pages will stay there, until a shrinker is called upon for any
reason.
In this patch, we will call kmem_cache_shrink() for all dead caches that
cannot be destroyed because of remaining pages. After shrinking, it is
possible that it could be freed. If this is not the case, we'll schedule
a lazy worker to keep trying.
Signed-off-by: Glauber Costa <glommer@parallels.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Frederic Weisbecker <fweisbec@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: JoonSoo Kim <js1304@gmail.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Pekka Enberg <penberg@cs.helsinki.fi>
Cc: Rik van Riel <riel@redhat.com>
Cc: Suleiman Souhlal <suleiman@google.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | mm/memcontrol.c | 46 |
1 files changed, 43 insertions, 3 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 4b68ec2c8df6..7633e0d429e0 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -3080,7 +3080,27 @@ static void kmem_cache_destroy_work_func(struct work_struct *w) | |||
3080 | 3080 | ||
3081 | cachep = memcg_params_to_cache(p); | 3081 | cachep = memcg_params_to_cache(p); |
3082 | 3082 | ||
3083 | if (!atomic_read(&cachep->memcg_params->nr_pages)) | 3083 | /* |
3084 | * If we get down to 0 after shrink, we could delete right away. | ||
3085 | * However, memcg_release_pages() already puts us back in the workqueue | ||
3086 | * in that case. If we proceed deleting, we'll get a dangling | ||
3087 | * reference, and removing the object from the workqueue in that case | ||
3088 | * is unnecessary complication. We are not a fast path. | ||
3089 | * | ||
3090 | * Note that this case is fundamentally different from racing with | ||
3091 | * shrink_slab(): if memcg_cgroup_destroy_cache() is called in | ||
3092 | * kmem_cache_shrink, not only we would be reinserting a dead cache | ||
3093 | * into the queue, but doing so from inside the worker racing to | ||
3094 | * destroy it. | ||
3095 | * | ||
3096 | * So if we aren't down to zero, we'll just schedule a worker and try | ||
3097 | * again | ||
3098 | */ | ||
3099 | if (atomic_read(&cachep->memcg_params->nr_pages) != 0) { | ||
3100 | kmem_cache_shrink(cachep); | ||
3101 | if (atomic_read(&cachep->memcg_params->nr_pages) == 0) | ||
3102 | return; | ||
3103 | } else | ||
3084 | kmem_cache_destroy(cachep); | 3104 | kmem_cache_destroy(cachep); |
3085 | } | 3105 | } |
3086 | 3106 | ||
@@ -3090,6 +3110,26 @@ void mem_cgroup_destroy_cache(struct kmem_cache *cachep) | |||
3090 | return; | 3110 | return; |
3091 | 3111 | ||
3092 | /* | 3112 | /* |
3113 | * There are many ways in which we can get here. | ||
3114 | * | ||
3115 | * We can get to a memory-pressure situation while the delayed work is | ||
3116 | * still pending to run. The vmscan shrinkers can then release all | ||
3117 | * cache memory and get us to destruction. If this is the case, we'll | ||
3118 | * be executed twice, which is a bug (the second time will execute over | ||
3119 | * bogus data). In this case, cancelling the work should be fine. | ||
3120 | * | ||
3121 | * But we can also get here from the worker itself, if | ||
3122 | * kmem_cache_shrink is enough to shake all the remaining objects and | ||
3123 | * get the page count to 0. In this case, we'll deadlock if we try to | ||
3124 | * cancel the work (the worker runs with an internal lock held, which | ||
3125 | * is the same lock we would hold for cancel_work_sync().) | ||
3126 | * | ||
3127 | * Since we can't possibly know who got us here, just refrain from | ||
3128 | * running if there is already work pending | ||
3129 | */ | ||
3130 | if (work_pending(&cachep->memcg_params->destroy)) | ||
3131 | return; | ||
3132 | /* | ||
3093 | * We have to defer the actual destroying to a workqueue, because | 3133 | * We have to defer the actual destroying to a workqueue, because |
3094 | * we might currently be in a context that cannot sleep. | 3134 | * we might currently be in a context that cannot sleep. |
3095 | */ | 3135 | */ |
@@ -3217,7 +3257,7 @@ void kmem_cache_destroy_memcg_children(struct kmem_cache *s) | |||
3217 | * set, so flip it down to guarantee we are in control. | 3257 | * set, so flip it down to guarantee we are in control. |
3218 | */ | 3258 | */ |
3219 | c->memcg_params->dead = false; | 3259 | c->memcg_params->dead = false; |
3220 | cancel_delayed_work_sync(&c->memcg_params->destroy); | 3260 | cancel_work_sync(&c->memcg_params->destroy); |
3221 | kmem_cache_destroy(c); | 3261 | kmem_cache_destroy(c); |
3222 | } | 3262 | } |
3223 | mutex_unlock(&set_limit_mutex); | 3263 | mutex_unlock(&set_limit_mutex); |
@@ -3242,7 +3282,7 @@ static void mem_cgroup_destroy_all_caches(struct mem_cgroup *memcg) | |||
3242 | cachep = memcg_params_to_cache(params); | 3282 | cachep = memcg_params_to_cache(params); |
3243 | cachep->memcg_params->dead = true; | 3283 | cachep->memcg_params->dead = true; |
3244 | INIT_WORK(&cachep->memcg_params->destroy, | 3284 | INIT_WORK(&cachep->memcg_params->destroy, |
3245 | kmem_cache_destroy_work_func); | 3285 | kmem_cache_destroy_work_func); |
3246 | schedule_work(&cachep->memcg_params->destroy); | 3286 | schedule_work(&cachep->memcg_params->destroy); |
3247 | } | 3287 | } |
3248 | mutex_unlock(&memcg->slab_caches_mutex); | 3288 | mutex_unlock(&memcg->slab_caches_mutex); |