diff options
author | Vladimir Davydov <vdavydov@parallels.com> | 2015-02-12 17:59:01 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-02-12 21:54:09 -0500 |
commit | 05257a1a3dcc196c197714b5c9a8dd35b7f6aefc (patch) | |
tree | 063e4bafec04171990ce3f95f649a9e3c6dcd6a8 | |
parent | dbcf73e26cd0b3d66e6db65ab595e664a55e58ff (diff) |
memcg: add rwsem to synchronize against memcg_caches arrays relocation
We need a stable value of memcg_nr_cache_ids in kmem_cache_create()
(memcg_alloc_cache_params() wants it for root caches), where we only
hold the slab_mutex and no memcg-related locks. As a result, we have to
update memcg_nr_cache_ids under the slab_mutex, which we can only take
on the slab's side (see memcg_update_array_size). This looks awkward
and will become even worse when per-memcg list_lru is introduced, which
also wants stable access to memcg_nr_cache_ids.
To get rid of this dependency between the memcg_nr_cache_ids and the
slab_mutex, this patch introduces a special rwsem. The rwsem is held
for writing during memcg_caches arrays relocation and memcg_nr_cache_ids
updates. Therefore one can take it for reading to get a stable access
to memcg_caches arrays and/or memcg_nr_cache_ids.
Currently the semaphore is taken for reading only from
kmem_cache_create, right before taking the slab_mutex, so right now
there's no much point in using rwsem instead of mutex. However, once
list_lru is made per-memcg it will allow list_lru initializations to
proceed concurrently.
Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Greg Thelen <gthelen@google.com>
Cc: Glauber Costa <glommer@gmail.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | include/linux/memcontrol.h | 12 | ||||
-rw-r--r-- | mm/memcontrol.c | 29 | ||||
-rw-r--r-- | mm/slab_common.c | 9 |
3 files changed, 33 insertions, 17 deletions
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 2607c91230af..dbc4baa3619c 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h | |||
@@ -399,6 +399,8 @@ static inline void sock_release_memcg(struct sock *sk) | |||
399 | extern struct static_key memcg_kmem_enabled_key; | 399 | extern struct static_key memcg_kmem_enabled_key; |
400 | 400 | ||
401 | extern int memcg_nr_cache_ids; | 401 | extern int memcg_nr_cache_ids; |
402 | extern void memcg_get_cache_ids(void); | ||
403 | extern void memcg_put_cache_ids(void); | ||
402 | 404 | ||
403 | /* | 405 | /* |
404 | * Helper macro to loop through all memcg-specific caches. Callers must still | 406 | * Helper macro to loop through all memcg-specific caches. Callers must still |
@@ -434,8 +436,6 @@ void __memcg_kmem_uncharge_pages(struct page *page, int order); | |||
434 | 436 | ||
435 | int memcg_cache_id(struct mem_cgroup *memcg); | 437 | int memcg_cache_id(struct mem_cgroup *memcg); |
436 | 438 | ||
437 | void memcg_update_array_size(int num_groups); | ||
438 | |||
439 | struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep); | 439 | struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep); |
440 | void __memcg_kmem_put_cache(struct kmem_cache *cachep); | 440 | void __memcg_kmem_put_cache(struct kmem_cache *cachep); |
441 | 441 | ||
@@ -569,6 +569,14 @@ static inline int memcg_cache_id(struct mem_cgroup *memcg) | |||
569 | return -1; | 569 | return -1; |
570 | } | 570 | } |
571 | 571 | ||
572 | static inline void memcg_get_cache_ids(void) | ||
573 | { | ||
574 | } | ||
575 | |||
576 | static inline void memcg_put_cache_ids(void) | ||
577 | { | ||
578 | } | ||
579 | |||
572 | static inline struct kmem_cache * | 580 | static inline struct kmem_cache * |
573 | memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp) | 581 | memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp) |
574 | { | 582 | { |
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 8608fa543b84..6706e5fa5ac0 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -544,6 +544,19 @@ static void disarm_sock_keys(struct mem_cgroup *memcg) | |||
544 | static DEFINE_IDA(memcg_cache_ida); | 544 | static DEFINE_IDA(memcg_cache_ida); |
545 | int memcg_nr_cache_ids; | 545 | int memcg_nr_cache_ids; |
546 | 546 | ||
547 | /* Protects memcg_nr_cache_ids */ | ||
548 | static DECLARE_RWSEM(memcg_cache_ids_sem); | ||
549 | |||
550 | void memcg_get_cache_ids(void) | ||
551 | { | ||
552 | down_read(&memcg_cache_ids_sem); | ||
553 | } | ||
554 | |||
555 | void memcg_put_cache_ids(void) | ||
556 | { | ||
557 | up_read(&memcg_cache_ids_sem); | ||
558 | } | ||
559 | |||
547 | /* | 560 | /* |
548 | * MIN_SIZE is different than 1, because we would like to avoid going through | 561 | * MIN_SIZE is different than 1, because we would like to avoid going through |
549 | * the alloc/free process all the time. In a small machine, 4 kmem-limited | 562 | * the alloc/free process all the time. In a small machine, 4 kmem-limited |
@@ -2549,6 +2562,7 @@ static int memcg_alloc_cache_id(void) | |||
2549 | * There's no space for the new id in memcg_caches arrays, | 2562 | * There's no space for the new id in memcg_caches arrays, |
2550 | * so we have to grow them. | 2563 | * so we have to grow them. |
2551 | */ | 2564 | */ |
2565 | down_write(&memcg_cache_ids_sem); | ||
2552 | 2566 | ||
2553 | size = 2 * (id + 1); | 2567 | size = 2 * (id + 1); |
2554 | if (size < MEMCG_CACHES_MIN_SIZE) | 2568 | if (size < MEMCG_CACHES_MIN_SIZE) |
@@ -2557,6 +2571,11 @@ static int memcg_alloc_cache_id(void) | |||
2557 | size = MEMCG_CACHES_MAX_SIZE; | 2571 | size = MEMCG_CACHES_MAX_SIZE; |
2558 | 2572 | ||
2559 | err = memcg_update_all_caches(size); | 2573 | err = memcg_update_all_caches(size); |
2574 | if (!err) | ||
2575 | memcg_nr_cache_ids = size; | ||
2576 | |||
2577 | up_write(&memcg_cache_ids_sem); | ||
2578 | |||
2560 | if (err) { | 2579 | if (err) { |
2561 | ida_simple_remove(&memcg_cache_ida, id); | 2580 | ida_simple_remove(&memcg_cache_ida, id); |
2562 | return err; | 2581 | return err; |
@@ -2569,16 +2588,6 @@ static void memcg_free_cache_id(int id) | |||
2569 | ida_simple_remove(&memcg_cache_ida, id); | 2588 | ida_simple_remove(&memcg_cache_ida, id); |
2570 | } | 2589 | } |
2571 | 2590 | ||
2572 | /* | ||
2573 | * We should update the current array size iff all caches updates succeed. This | ||
2574 | * can only be done from the slab side. The slab mutex needs to be held when | ||
2575 | * calling this. | ||
2576 | */ | ||
2577 | void memcg_update_array_size(int num) | ||
2578 | { | ||
2579 | memcg_nr_cache_ids = num; | ||
2580 | } | ||
2581 | |||
2582 | struct memcg_kmem_cache_create_work { | 2591 | struct memcg_kmem_cache_create_work { |
2583 | struct mem_cgroup *memcg; | 2592 | struct mem_cgroup *memcg; |
2584 | struct kmem_cache *cachep; | 2593 | struct kmem_cache *cachep; |
diff --git a/mm/slab_common.c b/mm/slab_common.c index f8899eedab68..23f5fcde6043 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c | |||
@@ -169,8 +169,8 @@ int memcg_update_all_caches(int num_memcgs) | |||
169 | { | 169 | { |
170 | struct kmem_cache *s; | 170 | struct kmem_cache *s; |
171 | int ret = 0; | 171 | int ret = 0; |
172 | mutex_lock(&slab_mutex); | ||
173 | 172 | ||
173 | mutex_lock(&slab_mutex); | ||
174 | list_for_each_entry(s, &slab_caches, list) { | 174 | list_for_each_entry(s, &slab_caches, list) { |
175 | if (!is_root_cache(s)) | 175 | if (!is_root_cache(s)) |
176 | continue; | 176 | continue; |
@@ -181,11 +181,8 @@ int memcg_update_all_caches(int num_memcgs) | |||
181 | * up to this point in an updated state. | 181 | * up to this point in an updated state. |
182 | */ | 182 | */ |
183 | if (ret) | 183 | if (ret) |
184 | goto out; | 184 | break; |
185 | } | 185 | } |
186 | |||
187 | memcg_update_array_size(num_memcgs); | ||
188 | out: | ||
189 | mutex_unlock(&slab_mutex); | 186 | mutex_unlock(&slab_mutex); |
190 | return ret; | 187 | return ret; |
191 | } | 188 | } |
@@ -369,6 +366,7 @@ kmem_cache_create(const char *name, size_t size, size_t align, | |||
369 | 366 | ||
370 | get_online_cpus(); | 367 | get_online_cpus(); |
371 | get_online_mems(); | 368 | get_online_mems(); |
369 | memcg_get_cache_ids(); | ||
372 | 370 | ||
373 | mutex_lock(&slab_mutex); | 371 | mutex_lock(&slab_mutex); |
374 | 372 | ||
@@ -407,6 +405,7 @@ kmem_cache_create(const char *name, size_t size, size_t align, | |||
407 | out_unlock: | 405 | out_unlock: |
408 | mutex_unlock(&slab_mutex); | 406 | mutex_unlock(&slab_mutex); |
409 | 407 | ||
408 | memcg_put_cache_ids(); | ||
410 | put_online_mems(); | 409 | put_online_mems(); |
411 | put_online_cpus(); | 410 | put_online_cpus(); |
412 | 411 | ||