diff options
author | Vladimir Davydov <vdavydov@parallels.com> | 2014-06-04 19:07:20 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-06-04 19:53:59 -0400 |
commit | 03afc0e25f7fc03537014a770f4c54ebbe63a24c (patch) | |
tree | 520cdb32e6d35cd5b4e61fc5254a151fb03fc24a /mm | |
parent | bfc8c90139ebd049b9801a951db3b9a4a00bed9c (diff) |
slab: get_online_mems for kmem_cache_{create,destroy,shrink}
When we create a sl[au]b cache, we allocate kmem_cache_node structures
for each online NUMA node. To handle nodes taken online/offline, we
register memory hotplug notifier and allocate/free kmem_cache_node
corresponding to the node that changes its state for each kmem cache.
To synchronize between the two paths we hold the slab_mutex during both
the cache creationg/destruction path and while tuning per-node parts of
kmem caches in memory hotplug handler, but that's not quite right,
because it does not guarantee that a newly created cache will have all
kmem_cache_nodes initialized in case it races with memory hotplug. For
instance, in case of slub:
CPU0 CPU1
---- ----
kmem_cache_create: online_pages:
__kmem_cache_create: slab_memory_callback:
slab_mem_going_online_callback:
lock slab_mutex
for each slab_caches list entry
allocate kmem_cache node
unlock slab_mutex
lock slab_mutex
init_kmem_cache_nodes:
for_each_node_state(node, N_NORMAL_MEMORY)
allocate kmem_cache node
add kmem_cache to slab_caches list
unlock slab_mutex
online_pages (continued):
node_states_set_node
As a result we'll get a kmem cache with not all kmem_cache_nodes
allocated.
To avoid issues like that we should hold get/put_online_mems() during
the whole kmem cache creation/destruction/shrink paths, just like we
deal with cpu hotplug. This patch does the trick.
Note, that after it's applied, there is no need in taking the slab_mutex
for kmem_cache_shrink any more, so it is removed from there.
Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Tang Chen <tangchen@cn.fujitsu.com>
Cc: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Cc: Toshi Kani <toshi.kani@hp.com>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Cc: Jiang Liu <liuj97@gmail.com>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Wen Congyang <wency@cn.fujitsu.com>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/slab.c | 26 | ||||
-rw-r--r-- | mm/slab.h | 1 | ||||
-rw-r--r-- | mm/slab_common.c | 35 | ||||
-rw-r--r-- | mm/slob.c | 3 | ||||
-rw-r--r-- | mm/slub.c | 5 |
5 files changed, 39 insertions, 31 deletions
@@ -2480,8 +2480,7 @@ out: | |||
2480 | return nr_freed; | 2480 | return nr_freed; |
2481 | } | 2481 | } |
2482 | 2482 | ||
2483 | /* Called with slab_mutex held to protect against cpu hotplug */ | 2483 | int __kmem_cache_shrink(struct kmem_cache *cachep) |
2484 | static int __cache_shrink(struct kmem_cache *cachep) | ||
2485 | { | 2484 | { |
2486 | int ret = 0, i = 0; | 2485 | int ret = 0, i = 0; |
2487 | struct kmem_cache_node *n; | 2486 | struct kmem_cache_node *n; |
@@ -2502,32 +2501,11 @@ static int __cache_shrink(struct kmem_cache *cachep) | |||
2502 | return (ret ? 1 : 0); | 2501 | return (ret ? 1 : 0); |
2503 | } | 2502 | } |
2504 | 2503 | ||
2505 | /** | ||
2506 | * kmem_cache_shrink - Shrink a cache. | ||
2507 | * @cachep: The cache to shrink. | ||
2508 | * | ||
2509 | * Releases as many slabs as possible for a cache. | ||
2510 | * To help debugging, a zero exit status indicates all slabs were released. | ||
2511 | */ | ||
2512 | int kmem_cache_shrink(struct kmem_cache *cachep) | ||
2513 | { | ||
2514 | int ret; | ||
2515 | BUG_ON(!cachep || in_interrupt()); | ||
2516 | |||
2517 | get_online_cpus(); | ||
2518 | mutex_lock(&slab_mutex); | ||
2519 | ret = __cache_shrink(cachep); | ||
2520 | mutex_unlock(&slab_mutex); | ||
2521 | put_online_cpus(); | ||
2522 | return ret; | ||
2523 | } | ||
2524 | EXPORT_SYMBOL(kmem_cache_shrink); | ||
2525 | |||
2526 | int __kmem_cache_shutdown(struct kmem_cache *cachep) | 2504 | int __kmem_cache_shutdown(struct kmem_cache *cachep) |
2527 | { | 2505 | { |
2528 | int i; | 2506 | int i; |
2529 | struct kmem_cache_node *n; | 2507 | struct kmem_cache_node *n; |
2530 | int rc = __cache_shrink(cachep); | 2508 | int rc = __kmem_cache_shrink(cachep); |
2531 | 2509 | ||
2532 | if (rc) | 2510 | if (rc) |
2533 | return rc; | 2511 | return rc; |
@@ -91,6 +91,7 @@ __kmem_cache_alias(const char *name, size_t size, size_t align, | |||
91 | #define CACHE_CREATE_MASK (SLAB_CORE_FLAGS | SLAB_DEBUG_FLAGS | SLAB_CACHE_FLAGS) | 91 | #define CACHE_CREATE_MASK (SLAB_CORE_FLAGS | SLAB_DEBUG_FLAGS | SLAB_CACHE_FLAGS) |
92 | 92 | ||
93 | int __kmem_cache_shutdown(struct kmem_cache *); | 93 | int __kmem_cache_shutdown(struct kmem_cache *); |
94 | int __kmem_cache_shrink(struct kmem_cache *); | ||
94 | void slab_kmem_cache_release(struct kmem_cache *); | 95 | void slab_kmem_cache_release(struct kmem_cache *); |
95 | 96 | ||
96 | struct seq_file; | 97 | struct seq_file; |
diff --git a/mm/slab_common.c b/mm/slab_common.c index 2834bc2886fd..2dd920dc3776 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c | |||
@@ -205,6 +205,8 @@ kmem_cache_create(const char *name, size_t size, size_t align, | |||
205 | int err; | 205 | int err; |
206 | 206 | ||
207 | get_online_cpus(); | 207 | get_online_cpus(); |
208 | get_online_mems(); | ||
209 | |||
208 | mutex_lock(&slab_mutex); | 210 | mutex_lock(&slab_mutex); |
209 | 211 | ||
210 | err = kmem_cache_sanity_check(name, size); | 212 | err = kmem_cache_sanity_check(name, size); |
@@ -239,6 +241,8 @@ kmem_cache_create(const char *name, size_t size, size_t align, | |||
239 | 241 | ||
240 | out_unlock: | 242 | out_unlock: |
241 | mutex_unlock(&slab_mutex); | 243 | mutex_unlock(&slab_mutex); |
244 | |||
245 | put_online_mems(); | ||
242 | put_online_cpus(); | 246 | put_online_cpus(); |
243 | 247 | ||
244 | if (err) { | 248 | if (err) { |
@@ -272,6 +276,8 @@ void kmem_cache_create_memcg(struct mem_cgroup *memcg, struct kmem_cache *root_c | |||
272 | char *cache_name; | 276 | char *cache_name; |
273 | 277 | ||
274 | get_online_cpus(); | 278 | get_online_cpus(); |
279 | get_online_mems(); | ||
280 | |||
275 | mutex_lock(&slab_mutex); | 281 | mutex_lock(&slab_mutex); |
276 | 282 | ||
277 | /* | 283 | /* |
@@ -295,6 +301,8 @@ void kmem_cache_create_memcg(struct mem_cgroup *memcg, struct kmem_cache *root_c | |||
295 | 301 | ||
296 | out_unlock: | 302 | out_unlock: |
297 | mutex_unlock(&slab_mutex); | 303 | mutex_unlock(&slab_mutex); |
304 | |||
305 | put_online_mems(); | ||
298 | put_online_cpus(); | 306 | put_online_cpus(); |
299 | } | 307 | } |
300 | 308 | ||
@@ -328,6 +336,8 @@ void slab_kmem_cache_release(struct kmem_cache *s) | |||
328 | void kmem_cache_destroy(struct kmem_cache *s) | 336 | void kmem_cache_destroy(struct kmem_cache *s) |
329 | { | 337 | { |
330 | get_online_cpus(); | 338 | get_online_cpus(); |
339 | get_online_mems(); | ||
340 | |||
331 | mutex_lock(&slab_mutex); | 341 | mutex_lock(&slab_mutex); |
332 | 342 | ||
333 | s->refcount--; | 343 | s->refcount--; |
@@ -359,15 +369,36 @@ void kmem_cache_destroy(struct kmem_cache *s) | |||
359 | #else | 369 | #else |
360 | slab_kmem_cache_release(s); | 370 | slab_kmem_cache_release(s); |
361 | #endif | 371 | #endif |
362 | goto out_put_cpus; | 372 | goto out; |
363 | 373 | ||
364 | out_unlock: | 374 | out_unlock: |
365 | mutex_unlock(&slab_mutex); | 375 | mutex_unlock(&slab_mutex); |
366 | out_put_cpus: | 376 | out: |
377 | put_online_mems(); | ||
367 | put_online_cpus(); | 378 | put_online_cpus(); |
368 | } | 379 | } |
369 | EXPORT_SYMBOL(kmem_cache_destroy); | 380 | EXPORT_SYMBOL(kmem_cache_destroy); |
370 | 381 | ||
382 | /** | ||
383 | * kmem_cache_shrink - Shrink a cache. | ||
384 | * @cachep: The cache to shrink. | ||
385 | * | ||
386 | * Releases as many slabs as possible for a cache. | ||
387 | * To help debugging, a zero exit status indicates all slabs were released. | ||
388 | */ | ||
389 | int kmem_cache_shrink(struct kmem_cache *cachep) | ||
390 | { | ||
391 | int ret; | ||
392 | |||
393 | get_online_cpus(); | ||
394 | get_online_mems(); | ||
395 | ret = __kmem_cache_shrink(cachep); | ||
396 | put_online_mems(); | ||
397 | put_online_cpus(); | ||
398 | return ret; | ||
399 | } | ||
400 | EXPORT_SYMBOL(kmem_cache_shrink); | ||
401 | |||
371 | int slab_is_available(void) | 402 | int slab_is_available(void) |
372 | { | 403 | { |
373 | return slab_state >= UP; | 404 | return slab_state >= UP; |
@@ -620,11 +620,10 @@ int __kmem_cache_shutdown(struct kmem_cache *c) | |||
620 | return 0; | 620 | return 0; |
621 | } | 621 | } |
622 | 622 | ||
623 | int kmem_cache_shrink(struct kmem_cache *d) | 623 | int __kmem_cache_shrink(struct kmem_cache *d) |
624 | { | 624 | { |
625 | return 0; | 625 | return 0; |
626 | } | 626 | } |
627 | EXPORT_SYMBOL(kmem_cache_shrink); | ||
628 | 627 | ||
629 | struct kmem_cache kmem_cache_boot = { | 628 | struct kmem_cache kmem_cache_boot = { |
630 | .name = "kmem_cache", | 629 | .name = "kmem_cache", |
@@ -3398,7 +3398,7 @@ EXPORT_SYMBOL(kfree); | |||
3398 | * being allocated from last increasing the chance that the last objects | 3398 | * being allocated from last increasing the chance that the last objects |
3399 | * are freed in them. | 3399 | * are freed in them. |
3400 | */ | 3400 | */ |
3401 | int kmem_cache_shrink(struct kmem_cache *s) | 3401 | int __kmem_cache_shrink(struct kmem_cache *s) |
3402 | { | 3402 | { |
3403 | int node; | 3403 | int node; |
3404 | int i; | 3404 | int i; |
@@ -3454,7 +3454,6 @@ int kmem_cache_shrink(struct kmem_cache *s) | |||
3454 | kfree(slabs_by_inuse); | 3454 | kfree(slabs_by_inuse); |
3455 | return 0; | 3455 | return 0; |
3456 | } | 3456 | } |
3457 | EXPORT_SYMBOL(kmem_cache_shrink); | ||
3458 | 3457 | ||
3459 | static int slab_mem_going_offline_callback(void *arg) | 3458 | static int slab_mem_going_offline_callback(void *arg) |
3460 | { | 3459 | { |
@@ -3462,7 +3461,7 @@ static int slab_mem_going_offline_callback(void *arg) | |||
3462 | 3461 | ||
3463 | mutex_lock(&slab_mutex); | 3462 | mutex_lock(&slab_mutex); |
3464 | list_for_each_entry(s, &slab_caches, list) | 3463 | list_for_each_entry(s, &slab_caches, list) |
3465 | kmem_cache_shrink(s); | 3464 | __kmem_cache_shrink(s); |
3466 | mutex_unlock(&slab_mutex); | 3465 | mutex_unlock(&slab_mutex); |
3467 | 3466 | ||
3468 | return 0; | 3467 | return 0; |