aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
authorGlauber Costa <glommer@parallels.com>2012-12-18 17:22:42 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-12-18 18:02:14 -0500
commit0e9d92f2d02d8c8320f0502307c688d07bdac2b3 (patch)
treebbbdb6759579766c60679963497064e0db3a6f32 /mm/memcontrol.c
parentd7f25f8a2f81252d1ac134470ba1d0a287cf8fcd (diff)
memcg: skip memcg kmem allocations in specified code regions
Create a mechanism that skip memcg allocations during certain pieces of our core code. It basically works in the same way as preempt_disable()/preempt_enable(): By marking a region under which all allocations will be accounted to the root memcg. We need this to prevent races in early cache creation, when we allocate data using caches that are not necessarily created already. Signed-off-by: Glauber Costa <glommer@parallels.com> yCc: Christoph Lameter <cl@linux.com> Cc: David Rientjes <rientjes@google.com> Cc: Frederic Weisbecker <fweisbec@redhat.com> Cc: Greg Thelen <gthelen@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: JoonSoo Kim <js1304@gmail.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Michal Hocko <mhocko@suse.cz> Cc: Pekka Enberg <penberg@cs.helsinki.fi> Cc: Rik van Riel <riel@redhat.com> Cc: Suleiman Souhlal <suleiman@google.com> Cc: Tejun Heo <tj@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c57
1 files changed, 54 insertions, 3 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index efd26620a60b..65302a083d2f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3025,6 +3025,37 @@ out:
3025 kfree(s->memcg_params); 3025 kfree(s->memcg_params);
3026} 3026}
3027 3027
3028/*
3029 * During the creation a new cache, we need to disable our accounting mechanism
3030 * altogether. This is true even if we are not creating, but rather just
3031 * enqueing new caches to be created.
3032 *
3033 * This is because that process will trigger allocations; some visible, like
3034 * explicit kmallocs to auxiliary data structures, name strings and internal
3035 * cache structures; some well concealed, like INIT_WORK() that can allocate
3036 * objects during debug.
3037 *
3038 * If any allocation happens during memcg_kmem_get_cache, we will recurse back
3039 * to it. This may not be a bounded recursion: since the first cache creation
3040 * failed to complete (waiting on the allocation), we'll just try to create the
3041 * cache again, failing at the same point.
3042 *
3043 * memcg_kmem_get_cache is prepared to abort after seeing a positive count of
3044 * memcg_kmem_skip_account. So we enclose anything that might allocate memory
3045 * inside the following two functions.
3046 */
3047static inline void memcg_stop_kmem_account(void)
3048{
3049 VM_BUG_ON(!current->mm);
3050 current->memcg_kmem_skip_account++;
3051}
3052
3053static inline void memcg_resume_kmem_account(void)
3054{
3055 VM_BUG_ON(!current->mm);
3056 current->memcg_kmem_skip_account--;
3057}
3058
3028static char *memcg_cache_name(struct mem_cgroup *memcg, struct kmem_cache *s) 3059static char *memcg_cache_name(struct mem_cgroup *memcg, struct kmem_cache *s)
3029{ 3060{
3030 char *name; 3061 char *name;
@@ -3084,7 +3115,6 @@ static struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
3084 goto out; 3115 goto out;
3085 3116
3086 new_cachep = kmem_cache_dup(memcg, cachep); 3117 new_cachep = kmem_cache_dup(memcg, cachep);
3087
3088 if (new_cachep == NULL) { 3118 if (new_cachep == NULL) {
3089 new_cachep = cachep; 3119 new_cachep = cachep;
3090 goto out; 3120 goto out;
@@ -3125,8 +3155,8 @@ static void memcg_create_cache_work_func(struct work_struct *w)
3125 * Enqueue the creation of a per-memcg kmem_cache. 3155 * Enqueue the creation of a per-memcg kmem_cache.
3126 * Called with rcu_read_lock. 3156 * Called with rcu_read_lock.
3127 */ 3157 */
3128static void memcg_create_cache_enqueue(struct mem_cgroup *memcg, 3158static void __memcg_create_cache_enqueue(struct mem_cgroup *memcg,
3129 struct kmem_cache *cachep) 3159 struct kmem_cache *cachep)
3130{ 3160{
3131 struct create_work *cw; 3161 struct create_work *cw;
3132 3162
@@ -3147,6 +3177,24 @@ static void memcg_create_cache_enqueue(struct mem_cgroup *memcg,
3147 schedule_work(&cw->work); 3177 schedule_work(&cw->work);
3148} 3178}
3149 3179
3180static void memcg_create_cache_enqueue(struct mem_cgroup *memcg,
3181 struct kmem_cache *cachep)
3182{
3183 /*
3184 * We need to stop accounting when we kmalloc, because if the
3185 * corresponding kmalloc cache is not yet created, the first allocation
3186 * in __memcg_create_cache_enqueue will recurse.
3187 *
3188 * However, it is better to enclose the whole function. Depending on
3189 * the debugging options enabled, INIT_WORK(), for instance, can
3190 * trigger an allocation. This too, will make us recurse. Because at
3191 * this point we can't allow ourselves back into memcg_kmem_get_cache,
3192 * the safest choice is to do it like this, wrapping the whole function.
3193 */
3194 memcg_stop_kmem_account();
3195 __memcg_create_cache_enqueue(memcg, cachep);
3196 memcg_resume_kmem_account();
3197}
3150/* 3198/*
3151 * Return the kmem_cache we're supposed to use for a slab allocation. 3199 * Return the kmem_cache we're supposed to use for a slab allocation.
3152 * We try to use the current memcg's version of the cache. 3200 * We try to use the current memcg's version of the cache.
@@ -3169,6 +3217,9 @@ struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep,
3169 VM_BUG_ON(!cachep->memcg_params); 3217 VM_BUG_ON(!cachep->memcg_params);
3170 VM_BUG_ON(!cachep->memcg_params->is_root_cache); 3218 VM_BUG_ON(!cachep->memcg_params->is_root_cache);
3171 3219
3220 if (!current->mm || current->memcg_kmem_skip_account)
3221 return cachep;
3222
3172 rcu_read_lock(); 3223 rcu_read_lock();
3173 memcg = mem_cgroup_from_task(rcu_dereference(current->mm->owner)); 3224 memcg = mem_cgroup_from_task(rcu_dereference(current->mm->owner));
3174 rcu_read_unlock(); 3225 rcu_read_unlock();