aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVladimir Davydov <vdavydov@parallels.com>2014-06-04 19:07:20 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-06-04 19:53:59 -0400
commit03afc0e25f7fc03537014a770f4c54ebbe63a24c (patch)
tree520cdb32e6d35cd5b4e61fc5254a151fb03fc24a
parentbfc8c90139ebd049b9801a951db3b9a4a00bed9c (diff)
slab: get_online_mems for kmem_cache_{create,destroy,shrink}
When we create a sl[au]b cache, we allocate kmem_cache_node structures for each online NUMA node. To handle nodes taken online/offline, we register memory hotplug notifier and allocate/free kmem_cache_node corresponding to the node that changes its state for each kmem cache. To synchronize between the two paths we hold the slab_mutex during both the cache creationg/destruction path and while tuning per-node parts of kmem caches in memory hotplug handler, but that's not quite right, because it does not guarantee that a newly created cache will have all kmem_cache_nodes initialized in case it races with memory hotplug. For instance, in case of slub: CPU0 CPU1 ---- ---- kmem_cache_create: online_pages: __kmem_cache_create: slab_memory_callback: slab_mem_going_online_callback: lock slab_mutex for each slab_caches list entry allocate kmem_cache node unlock slab_mutex lock slab_mutex init_kmem_cache_nodes: for_each_node_state(node, N_NORMAL_MEMORY) allocate kmem_cache node add kmem_cache to slab_caches list unlock slab_mutex online_pages (continued): node_states_set_node As a result we'll get a kmem cache with not all kmem_cache_nodes allocated. To avoid issues like that we should hold get/put_online_mems() during the whole kmem cache creation/destruction/shrink paths, just like we deal with cpu hotplug. This patch does the trick. Note, that after it's applied, there is no need in taking the slab_mutex for kmem_cache_shrink any more, so it is removed from there. Signed-off-by: Vladimir Davydov <vdavydov@parallels.com> Cc: Christoph Lameter <cl@linux.com> Cc: Pekka Enberg <penberg@kernel.org> Cc: Tang Chen <tangchen@cn.fujitsu.com> Cc: Zhang Yanfei <zhangyanfei@cn.fujitsu.com> Cc: Toshi Kani <toshi.kani@hp.com> Cc: Xishi Qiu <qiuxishi@huawei.com> Cc: Jiang Liu <liuj97@gmail.com> Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com> Cc: David Rientjes <rientjes@google.com> Cc: Wen Congyang <wency@cn.fujitsu.com> Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com> Cc: Lai Jiangshan <laijs@cn.fujitsu.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/slab.c26
-rw-r--r--mm/slab.h1
-rw-r--r--mm/slab_common.c35
-rw-r--r--mm/slob.c3
-rw-r--r--mm/slub.c5
5 files changed, 39 insertions, 31 deletions
diff --git a/mm/slab.c b/mm/slab.c
index 944ac58cfcf8..7067ea7f3927 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2480,8 +2480,7 @@ out:
2480 return nr_freed; 2480 return nr_freed;
2481} 2481}
2482 2482
2483/* Called with slab_mutex held to protect against cpu hotplug */ 2483int __kmem_cache_shrink(struct kmem_cache *cachep)
2484static int __cache_shrink(struct kmem_cache *cachep)
2485{ 2484{
2486 int ret = 0, i = 0; 2485 int ret = 0, i = 0;
2487 struct kmem_cache_node *n; 2486 struct kmem_cache_node *n;
@@ -2502,32 +2501,11 @@ static int __cache_shrink(struct kmem_cache *cachep)
2502 return (ret ? 1 : 0); 2501 return (ret ? 1 : 0);
2503} 2502}
2504 2503
2505/**
2506 * kmem_cache_shrink - Shrink a cache.
2507 * @cachep: The cache to shrink.
2508 *
2509 * Releases as many slabs as possible for a cache.
2510 * To help debugging, a zero exit status indicates all slabs were released.
2511 */
2512int kmem_cache_shrink(struct kmem_cache *cachep)
2513{
2514 int ret;
2515 BUG_ON(!cachep || in_interrupt());
2516
2517 get_online_cpus();
2518 mutex_lock(&slab_mutex);
2519 ret = __cache_shrink(cachep);
2520 mutex_unlock(&slab_mutex);
2521 put_online_cpus();
2522 return ret;
2523}
2524EXPORT_SYMBOL(kmem_cache_shrink);
2525
2526int __kmem_cache_shutdown(struct kmem_cache *cachep) 2504int __kmem_cache_shutdown(struct kmem_cache *cachep)
2527{ 2505{
2528 int i; 2506 int i;
2529 struct kmem_cache_node *n; 2507 struct kmem_cache_node *n;
2530 int rc = __cache_shrink(cachep); 2508 int rc = __kmem_cache_shrink(cachep);
2531 2509
2532 if (rc) 2510 if (rc)
2533 return rc; 2511 return rc;
diff --git a/mm/slab.h b/mm/slab.h
index 863e67b8c8c9..d85d59803d5f 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -91,6 +91,7 @@ __kmem_cache_alias(const char *name, size_t size, size_t align,
91#define CACHE_CREATE_MASK (SLAB_CORE_FLAGS | SLAB_DEBUG_FLAGS | SLAB_CACHE_FLAGS) 91#define CACHE_CREATE_MASK (SLAB_CORE_FLAGS | SLAB_DEBUG_FLAGS | SLAB_CACHE_FLAGS)
92 92
93int __kmem_cache_shutdown(struct kmem_cache *); 93int __kmem_cache_shutdown(struct kmem_cache *);
94int __kmem_cache_shrink(struct kmem_cache *);
94void slab_kmem_cache_release(struct kmem_cache *); 95void slab_kmem_cache_release(struct kmem_cache *);
95 96
96struct seq_file; 97struct seq_file;
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 2834bc2886fd..2dd920dc3776 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -205,6 +205,8 @@ kmem_cache_create(const char *name, size_t size, size_t align,
205 int err; 205 int err;
206 206
207 get_online_cpus(); 207 get_online_cpus();
208 get_online_mems();
209
208 mutex_lock(&slab_mutex); 210 mutex_lock(&slab_mutex);
209 211
210 err = kmem_cache_sanity_check(name, size); 212 err = kmem_cache_sanity_check(name, size);
@@ -239,6 +241,8 @@ kmem_cache_create(const char *name, size_t size, size_t align,
239 241
240out_unlock: 242out_unlock:
241 mutex_unlock(&slab_mutex); 243 mutex_unlock(&slab_mutex);
244
245 put_online_mems();
242 put_online_cpus(); 246 put_online_cpus();
243 247
244 if (err) { 248 if (err) {
@@ -272,6 +276,8 @@ void kmem_cache_create_memcg(struct mem_cgroup *memcg, struct kmem_cache *root_c
272 char *cache_name; 276 char *cache_name;
273 277
274 get_online_cpus(); 278 get_online_cpus();
279 get_online_mems();
280
275 mutex_lock(&slab_mutex); 281 mutex_lock(&slab_mutex);
276 282
277 /* 283 /*
@@ -295,6 +301,8 @@ void kmem_cache_create_memcg(struct mem_cgroup *memcg, struct kmem_cache *root_c
295 301
296out_unlock: 302out_unlock:
297 mutex_unlock(&slab_mutex); 303 mutex_unlock(&slab_mutex);
304
305 put_online_mems();
298 put_online_cpus(); 306 put_online_cpus();
299} 307}
300 308
@@ -328,6 +336,8 @@ void slab_kmem_cache_release(struct kmem_cache *s)
328void kmem_cache_destroy(struct kmem_cache *s) 336void kmem_cache_destroy(struct kmem_cache *s)
329{ 337{
330 get_online_cpus(); 338 get_online_cpus();
339 get_online_mems();
340
331 mutex_lock(&slab_mutex); 341 mutex_lock(&slab_mutex);
332 342
333 s->refcount--; 343 s->refcount--;
@@ -359,15 +369,36 @@ void kmem_cache_destroy(struct kmem_cache *s)
359#else 369#else
360 slab_kmem_cache_release(s); 370 slab_kmem_cache_release(s);
361#endif 371#endif
362 goto out_put_cpus; 372 goto out;
363 373
364out_unlock: 374out_unlock:
365 mutex_unlock(&slab_mutex); 375 mutex_unlock(&slab_mutex);
366out_put_cpus: 376out:
377 put_online_mems();
367 put_online_cpus(); 378 put_online_cpus();
368} 379}
369EXPORT_SYMBOL(kmem_cache_destroy); 380EXPORT_SYMBOL(kmem_cache_destroy);
370 381
382/**
383 * kmem_cache_shrink - Shrink a cache.
384 * @cachep: The cache to shrink.
385 *
386 * Releases as many slabs as possible for a cache.
387 * To help debugging, a zero exit status indicates all slabs were released.
388 */
389int kmem_cache_shrink(struct kmem_cache *cachep)
390{
391 int ret;
392
393 get_online_cpus();
394 get_online_mems();
395 ret = __kmem_cache_shrink(cachep);
396 put_online_mems();
397 put_online_cpus();
398 return ret;
399}
400EXPORT_SYMBOL(kmem_cache_shrink);
401
371int slab_is_available(void) 402int slab_is_available(void)
372{ 403{
373 return slab_state >= UP; 404 return slab_state >= UP;
diff --git a/mm/slob.c b/mm/slob.c
index 730cad45d4be..21980e0f39a8 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -620,11 +620,10 @@ int __kmem_cache_shutdown(struct kmem_cache *c)
620 return 0; 620 return 0;
621} 621}
622 622
623int kmem_cache_shrink(struct kmem_cache *d) 623int __kmem_cache_shrink(struct kmem_cache *d)
624{ 624{
625 return 0; 625 return 0;
626} 626}
627EXPORT_SYMBOL(kmem_cache_shrink);
628 627
629struct kmem_cache kmem_cache_boot = { 628struct kmem_cache kmem_cache_boot = {
630 .name = "kmem_cache", 629 .name = "kmem_cache",
diff --git a/mm/slub.c b/mm/slub.c
index 9cb2501a2960..5d1b653183ab 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3398,7 +3398,7 @@ EXPORT_SYMBOL(kfree);
3398 * being allocated from last increasing the chance that the last objects 3398 * being allocated from last increasing the chance that the last objects
3399 * are freed in them. 3399 * are freed in them.
3400 */ 3400 */
3401int kmem_cache_shrink(struct kmem_cache *s) 3401int __kmem_cache_shrink(struct kmem_cache *s)
3402{ 3402{
3403 int node; 3403 int node;
3404 int i; 3404 int i;
@@ -3454,7 +3454,6 @@ int kmem_cache_shrink(struct kmem_cache *s)
3454 kfree(slabs_by_inuse); 3454 kfree(slabs_by_inuse);
3455 return 0; 3455 return 0;
3456} 3456}
3457EXPORT_SYMBOL(kmem_cache_shrink);
3458 3457
3459static int slab_mem_going_offline_callback(void *arg) 3458static int slab_mem_going_offline_callback(void *arg)
3460{ 3459{
@@ -3462,7 +3461,7 @@ static int slab_mem_going_offline_callback(void *arg)
3462 3461
3463 mutex_lock(&slab_mutex); 3462 mutex_lock(&slab_mutex);
3464 list_for_each_entry(s, &slab_caches, list) 3463 list_for_each_entry(s, &slab_caches, list)
3465 kmem_cache_shrink(s); 3464 __kmem_cache_shrink(s);
3466 mutex_unlock(&slab_mutex); 3465 mutex_unlock(&slab_mutex);
3467 3466
3468 return 0; 3467 return 0;