aboutsummaryrefslogtreecommitdiffstats
path: root/mm/slub.c
diff options
context:
space:
mode:
authorVladimir Davydov <vdavydov@parallels.com>2014-12-12 19:56:38 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2014-12-13 15:42:49 -0500
commit8135be5a8012f4c7e95218563855e16c09a8271b (patch)
tree49e85409f82f5973a0cbf21e3e3eac382daa515b /mm/slub.c
parentae6e71d3d900c398bdb346ac25733b2efa9b3752 (diff)
memcg: fix possible use-after-free in memcg_kmem_get_cache()
Suppose task @t that belongs to a memory cgroup @memcg is going to allocate an object from a kmem cache @c. The copy of @c corresponding to @memcg, @mc, is empty. Then if kmem_cache_alloc races with the memory cgroup destruction we can access the memory cgroup's copy of the cache after it was destroyed: CPU0 CPU1 ---- ---- [ current=@t @mc->memcg_params->nr_pages=0 ] kmem_cache_alloc(@c): call memcg_kmem_get_cache(@c); proceed to allocation from @mc: alloc a page for @mc: ... move @t from @memcg destroy @memcg: mem_cgroup_css_offline(@memcg): memcg_unregister_all_caches(@memcg): kmem_cache_destroy(@mc) add page to @mc We could fix this issue by taking a reference to a per-memcg cache, but that would require adding a per-cpu reference counter to per-memcg caches, which would look cumbersome. Instead, let's take a reference to a memory cgroup, which already has a per-cpu reference counter, in the beginning of kmem_cache_alloc to be dropped in the end, and move per memcg caches destruction from css offline to css free. As a side effect, per-memcg caches will be destroyed not one by one, but all at once when the last page accounted to the memory cgroup is freed. This doesn't sound as a high price for code readability though. Note, this patch does add some overhead to the kmem_cache_alloc hot path, but it is pretty negligible - it's just a function call plus a per cpu counter decrement, which is comparable to what we already have in memcg_kmem_get_cache. Besides, it's only relevant if there are memory cgroups with kmem accounting enabled. I don't think we can find a way to handle this race w/o it, because alloc_page called from kmem_cache_alloc may sleep so we can't flush all pending kmallocs w/o reference counting. Signed-off-by: Vladimir Davydov <vdavydov@parallels.com> Acked-by: Christoph Lameter <cl@linux.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Michal Hocko <mhocko@suse.cz> Cc: Pekka Enberg <penberg@kernel.org> Cc: David Rientjes <rientjes@google.com> Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/slub.c')
-rw-r--r--mm/slub.c14
1 files changed, 9 insertions, 5 deletions
diff --git a/mm/slub.c b/mm/slub.c
index 765c5884d03d..fe4db9c17238 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1233,13 +1233,17 @@ static inline void kfree_hook(const void *x)
1233 kmemleak_free(x); 1233 kmemleak_free(x);
1234} 1234}
1235 1235
1236static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags) 1236static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
1237 gfp_t flags)
1237{ 1238{
1238 flags &= gfp_allowed_mask; 1239 flags &= gfp_allowed_mask;
1239 lockdep_trace_alloc(flags); 1240 lockdep_trace_alloc(flags);
1240 might_sleep_if(flags & __GFP_WAIT); 1241 might_sleep_if(flags & __GFP_WAIT);
1241 1242
1242 return should_failslab(s->object_size, flags, s->flags); 1243 if (should_failslab(s->object_size, flags, s->flags))
1244 return NULL;
1245
1246 return memcg_kmem_get_cache(s, flags);
1243} 1247}
1244 1248
1245static inline void slab_post_alloc_hook(struct kmem_cache *s, 1249static inline void slab_post_alloc_hook(struct kmem_cache *s,
@@ -1248,6 +1252,7 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s,
1248 flags &= gfp_allowed_mask; 1252 flags &= gfp_allowed_mask;
1249 kmemcheck_slab_alloc(s, flags, object, slab_ksize(s)); 1253 kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
1250 kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, flags); 1254 kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, flags);
1255 memcg_kmem_put_cache(s);
1251} 1256}
1252 1257
1253static inline void slab_free_hook(struct kmem_cache *s, void *x) 1258static inline void slab_free_hook(struct kmem_cache *s, void *x)
@@ -2384,10 +2389,9 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s,
2384 struct page *page; 2389 struct page *page;
2385 unsigned long tid; 2390 unsigned long tid;
2386 2391
2387 if (slab_pre_alloc_hook(s, gfpflags)) 2392 s = slab_pre_alloc_hook(s, gfpflags);
2393 if (!s)
2388 return NULL; 2394 return NULL;
2389
2390 s = memcg_kmem_get_cache(s, gfpflags);
2391redo: 2395redo:
2392 /* 2396 /*
2393 * Must read kmem_cache cpu data via this cpu ptr. Preemption is 2397 * Must read kmem_cache cpu data via this cpu ptr. Preemption is