diff options
author | Vladimir Davydov <vdavydov@parallels.com> | 2014-12-12 19:56:38 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-12-13 15:42:49 -0500 |
commit | 8135be5a8012f4c7e95218563855e16c09a8271b (patch) | |
tree | 49e85409f82f5973a0cbf21e3e3eac382daa515b /mm/slub.c | |
parent | ae6e71d3d900c398bdb346ac25733b2efa9b3752 (diff) |
memcg: fix possible use-after-free in memcg_kmem_get_cache()
Suppose task @t that belongs to a memory cgroup @memcg is going to
allocate an object from a kmem cache @c. The copy of @c corresponding to
@memcg, @mc, is empty. Then if kmem_cache_alloc races with the memory
cgroup destruction we can access the memory cgroup's copy of the cache
after it was destroyed:
CPU0 CPU1
---- ----
[ current=@t
@mc->memcg_params->nr_pages=0 ]
kmem_cache_alloc(@c):
call memcg_kmem_get_cache(@c);
proceed to allocation from @mc:
alloc a page for @mc:
...
move @t from @memcg
destroy @memcg:
mem_cgroup_css_offline(@memcg):
memcg_unregister_all_caches(@memcg):
kmem_cache_destroy(@mc)
add page to @mc
We could fix this issue by taking a reference to a per-memcg cache, but
that would require adding a per-cpu reference counter to per-memcg caches,
which would look cumbersome.
Instead, let's take a reference to a memory cgroup, which already has a
per-cpu reference counter, in the beginning of kmem_cache_alloc to be
dropped in the end, and move per memcg caches destruction from css offline
to css free. As a side effect, per-memcg caches will be destroyed not one
by one, but all at once when the last page accounted to the memory cgroup
is freed. This doesn't sound as a high price for code readability though.
Note, this patch does add some overhead to the kmem_cache_alloc hot path,
but it is pretty negligible - it's just a function call plus a per cpu
counter decrement, which is comparable to what we already have in
memcg_kmem_get_cache. Besides, it's only relevant if there are memory
cgroups with kmem accounting enabled. I don't think we can find a way to
handle this race w/o it, because alloc_page called from kmem_cache_alloc
may sleep so we can't flush all pending kmallocs w/o reference counting.
Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Acked-by: Christoph Lameter <cl@linux.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/slub.c')
-rw-r--r-- | mm/slub.c | 14 |
1 files changed, 9 insertions, 5 deletions
@@ -1233,13 +1233,17 @@ static inline void kfree_hook(const void *x) | |||
1233 | kmemleak_free(x); | 1233 | kmemleak_free(x); |
1234 | } | 1234 | } |
1235 | 1235 | ||
1236 | static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags) | 1236 | static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s, |
1237 | gfp_t flags) | ||
1237 | { | 1238 | { |
1238 | flags &= gfp_allowed_mask; | 1239 | flags &= gfp_allowed_mask; |
1239 | lockdep_trace_alloc(flags); | 1240 | lockdep_trace_alloc(flags); |
1240 | might_sleep_if(flags & __GFP_WAIT); | 1241 | might_sleep_if(flags & __GFP_WAIT); |
1241 | 1242 | ||
1242 | return should_failslab(s->object_size, flags, s->flags); | 1243 | if (should_failslab(s->object_size, flags, s->flags)) |
1244 | return NULL; | ||
1245 | |||
1246 | return memcg_kmem_get_cache(s, flags); | ||
1243 | } | 1247 | } |
1244 | 1248 | ||
1245 | static inline void slab_post_alloc_hook(struct kmem_cache *s, | 1249 | static inline void slab_post_alloc_hook(struct kmem_cache *s, |
@@ -1248,6 +1252,7 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s, | |||
1248 | flags &= gfp_allowed_mask; | 1252 | flags &= gfp_allowed_mask; |
1249 | kmemcheck_slab_alloc(s, flags, object, slab_ksize(s)); | 1253 | kmemcheck_slab_alloc(s, flags, object, slab_ksize(s)); |
1250 | kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, flags); | 1254 | kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, flags); |
1255 | memcg_kmem_put_cache(s); | ||
1251 | } | 1256 | } |
1252 | 1257 | ||
1253 | static inline void slab_free_hook(struct kmem_cache *s, void *x) | 1258 | static inline void slab_free_hook(struct kmem_cache *s, void *x) |
@@ -2384,10 +2389,9 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s, | |||
2384 | struct page *page; | 2389 | struct page *page; |
2385 | unsigned long tid; | 2390 | unsigned long tid; |
2386 | 2391 | ||
2387 | if (slab_pre_alloc_hook(s, gfpflags)) | 2392 | s = slab_pre_alloc_hook(s, gfpflags); |
2393 | if (!s) | ||
2388 | return NULL; | 2394 | return NULL; |
2389 | |||
2390 | s = memcg_kmem_get_cache(s, gfpflags); | ||
2391 | redo: | 2395 | redo: |
2392 | /* | 2396 | /* |
2393 | * Must read kmem_cache cpu data via this cpu ptr. Preemption is | 2397 | * Must read kmem_cache cpu data via this cpu ptr. Preemption is |