aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorVladimir Davydov <vdavydov@parallels.com>2015-02-10 17:11:47 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2015-02-10 17:30:34 -0500
commitd5b3cf7139b8770af4ed8bb36a1ab9d290ac39e9 (patch)
treed96432c889fcb6f3790f41651a13a489e6af8f5c /mm
parent3e0350a36414a73c5c2d1e354f8c0ab4ace1296d (diff)
memcg: zap memcg_slab_caches and memcg_slab_mutex
mem_cgroup->memcg_slab_caches is a list of kmem caches corresponding to the given cgroup. Currently, it is only used on css free in order to destroy all caches corresponding to the memory cgroup being freed. The list is protected by memcg_slab_mutex. The mutex is also used to protect kmem_cache->memcg_params->memcg_caches arrays and synchronizes kmem_cache_destroy vs memcg_unregister_all_caches. However, we can perfectly get on without these two. To destroy all caches corresponding to a memory cgroup, we can walk over the global list of kmem caches, slab_caches, and we can do all the synchronization stuff using the slab_mutex instead of the memcg_slab_mutex. This patch therefore gets rid of the memcg_slab_caches and memcg_slab_mutex. Apart from this nice cleanup, it also: - assures that rcu_barrier() is called once at max when a root cache is destroyed or a memory cgroup is freed, no matter how many caches have SLAB_DESTROY_BY_RCU flag set; - fixes the race between kmem_cache_destroy and kmem_cache_create that exists, because memcg_cleanup_cache_params, which is called from kmem_cache_destroy after checking that kmem_cache->refcount=0, releases the slab_mutex, which gives kmem_cache_create a chance to make an alias to a cache doomed to be destroyed. Signed-off-by: Vladimir Davydov <vdavydov@parallels.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Michal Hocko <mhocko@suse.cz> Acked-by: Christoph Lameter <cl@linux.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/memcontrol.c156
-rw-r--r--mm/slab_common.c142
2 files changed, 118 insertions, 180 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index baf7eb27e3ae..f3f8a4f52a0c 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -343,9 +343,6 @@ struct mem_cgroup {
343 struct cg_proto tcp_mem; 343 struct cg_proto tcp_mem;
344#endif 344#endif
345#if defined(CONFIG_MEMCG_KMEM) 345#if defined(CONFIG_MEMCG_KMEM)
346 /* analogous to slab_common's slab_caches list, but per-memcg;
347 * protected by memcg_slab_mutex */
348 struct list_head memcg_slab_caches;
349 /* Index in the kmem_cache->memcg_params->memcg_caches array */ 346 /* Index in the kmem_cache->memcg_params->memcg_caches array */
350 int kmemcg_id; 347 int kmemcg_id;
351#endif 348#endif
@@ -2476,25 +2473,6 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
2476} 2473}
2477 2474
2478#ifdef CONFIG_MEMCG_KMEM 2475#ifdef CONFIG_MEMCG_KMEM
2479/*
2480 * The memcg_slab_mutex is held whenever a per memcg kmem cache is created or
2481 * destroyed. It protects memcg_caches arrays and memcg_slab_caches lists.
2482 */
2483static DEFINE_MUTEX(memcg_slab_mutex);
2484
2485/*
2486 * This is a bit cumbersome, but it is rarely used and avoids a backpointer
2487 * in the memcg_cache_params struct.
2488 */
2489static struct kmem_cache *memcg_params_to_cache(struct memcg_cache_params *p)
2490{
2491 struct kmem_cache *cachep;
2492
2493 VM_BUG_ON(p->is_root_cache);
2494 cachep = p->root_cache;
2495 return cache_from_memcg_idx(cachep, memcg_cache_id(p->memcg));
2496}
2497
2498int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, 2476int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp,
2499 unsigned long nr_pages) 2477 unsigned long nr_pages)
2500{ 2478{
@@ -2578,10 +2556,7 @@ static int memcg_alloc_cache_id(void)
2578 else if (size > MEMCG_CACHES_MAX_SIZE) 2556 else if (size > MEMCG_CACHES_MAX_SIZE)
2579 size = MEMCG_CACHES_MAX_SIZE; 2557 size = MEMCG_CACHES_MAX_SIZE;
2580 2558
2581 mutex_lock(&memcg_slab_mutex);
2582 err = memcg_update_all_caches(size); 2559 err = memcg_update_all_caches(size);
2583 mutex_unlock(&memcg_slab_mutex);
2584
2585 if (err) { 2560 if (err) {
2586 ida_simple_remove(&kmem_limited_groups, id); 2561 ida_simple_remove(&kmem_limited_groups, id);
2587 return err; 2562 return err;
@@ -2604,120 +2579,20 @@ void memcg_update_array_size(int num)
2604 memcg_limited_groups_array_size = num; 2579 memcg_limited_groups_array_size = num;
2605} 2580}
2606 2581
2607static void memcg_register_cache(struct mem_cgroup *memcg, 2582struct memcg_kmem_cache_create_work {
2608 struct kmem_cache *root_cache)
2609{
2610 struct kmem_cache *cachep;
2611 int id;
2612
2613 lockdep_assert_held(&memcg_slab_mutex);
2614
2615 id = memcg_cache_id(memcg);
2616
2617 /*
2618 * Since per-memcg caches are created asynchronously on first
2619 * allocation (see memcg_kmem_get_cache()), several threads can try to
2620 * create the same cache, but only one of them may succeed.
2621 */
2622 if (cache_from_memcg_idx(root_cache, id))
2623 return;
2624
2625 cachep = memcg_create_kmem_cache(memcg, root_cache);
2626 /*
2627 * If we could not create a memcg cache, do not complain, because
2628 * that's not critical at all as we can always proceed with the root
2629 * cache.
2630 */
2631 if (!cachep)
2632 return;
2633
2634 list_add(&cachep->memcg_params->list, &memcg->memcg_slab_caches);
2635
2636 /*
2637 * Since readers won't lock (see cache_from_memcg_idx()), we need a
2638 * barrier here to ensure nobody will see the kmem_cache partially
2639 * initialized.
2640 */
2641 smp_wmb();
2642
2643 BUG_ON(root_cache->memcg_params->memcg_caches[id]);
2644 root_cache->memcg_params->memcg_caches[id] = cachep;
2645}
2646
2647static void memcg_unregister_cache(struct kmem_cache *cachep)
2648{
2649 struct kmem_cache *root_cache;
2650 struct mem_cgroup *memcg;
2651 int id;
2652
2653 lockdep_assert_held(&memcg_slab_mutex);
2654
2655 BUG_ON(is_root_cache(cachep));
2656
2657 root_cache = cachep->memcg_params->root_cache;
2658 memcg = cachep->memcg_params->memcg;
2659 id = memcg_cache_id(memcg);
2660
2661 BUG_ON(root_cache->memcg_params->memcg_caches[id] != cachep);
2662 root_cache->memcg_params->memcg_caches[id] = NULL;
2663
2664 list_del(&cachep->memcg_params->list);
2665
2666 kmem_cache_destroy(cachep);
2667}
2668
2669int __memcg_cleanup_cache_params(struct kmem_cache *s)
2670{
2671 struct kmem_cache *c;
2672 int i, failed = 0;
2673
2674 mutex_lock(&memcg_slab_mutex);
2675 for_each_memcg_cache_index(i) {
2676 c = cache_from_memcg_idx(s, i);
2677 if (!c)
2678 continue;
2679
2680 memcg_unregister_cache(c);
2681
2682 if (cache_from_memcg_idx(s, i))
2683 failed++;
2684 }
2685 mutex_unlock(&memcg_slab_mutex);
2686 return failed;
2687}
2688
2689static void memcg_unregister_all_caches(struct mem_cgroup *memcg)
2690{
2691 struct kmem_cache *cachep;
2692 struct memcg_cache_params *params, *tmp;
2693
2694 if (!memcg_kmem_is_active(memcg))
2695 return;
2696
2697 mutex_lock(&memcg_slab_mutex);
2698 list_for_each_entry_safe(params, tmp, &memcg->memcg_slab_caches, list) {
2699 cachep = memcg_params_to_cache(params);
2700 memcg_unregister_cache(cachep);
2701 }
2702 mutex_unlock(&memcg_slab_mutex);
2703}
2704
2705struct memcg_register_cache_work {
2706 struct mem_cgroup *memcg; 2583 struct mem_cgroup *memcg;
2707 struct kmem_cache *cachep; 2584 struct kmem_cache *cachep;
2708 struct work_struct work; 2585 struct work_struct work;
2709}; 2586};
2710 2587
2711static void memcg_register_cache_func(struct work_struct *w) 2588static void memcg_kmem_cache_create_func(struct work_struct *w)
2712{ 2589{
2713 struct memcg_register_cache_work *cw = 2590 struct memcg_kmem_cache_create_work *cw =
2714 container_of(w, struct memcg_register_cache_work, work); 2591 container_of(w, struct memcg_kmem_cache_create_work, work);
2715 struct mem_cgroup *memcg = cw->memcg; 2592 struct mem_cgroup *memcg = cw->memcg;
2716 struct kmem_cache *cachep = cw->cachep; 2593 struct kmem_cache *cachep = cw->cachep;
2717 2594
2718 mutex_lock(&memcg_slab_mutex); 2595 memcg_create_kmem_cache(memcg, cachep);
2719 memcg_register_cache(memcg, cachep);
2720 mutex_unlock(&memcg_slab_mutex);
2721 2596
2722 css_put(&memcg->css); 2597 css_put(&memcg->css);
2723 kfree(cw); 2598 kfree(cw);
@@ -2726,10 +2601,10 @@ static void memcg_register_cache_func(struct work_struct *w)
2726/* 2601/*
2727 * Enqueue the creation of a per-memcg kmem_cache. 2602 * Enqueue the creation of a per-memcg kmem_cache.
2728 */ 2603 */
2729static void __memcg_schedule_register_cache(struct mem_cgroup *memcg, 2604static void __memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg,
2730 struct kmem_cache *cachep) 2605 struct kmem_cache *cachep)
2731{ 2606{
2732 struct memcg_register_cache_work *cw; 2607 struct memcg_kmem_cache_create_work *cw;
2733 2608
2734 cw = kmalloc(sizeof(*cw), GFP_NOWAIT); 2609 cw = kmalloc(sizeof(*cw), GFP_NOWAIT);
2735 if (!cw) 2610 if (!cw)
@@ -2739,18 +2614,18 @@ static void __memcg_schedule_register_cache(struct mem_cgroup *memcg,
2739 2614
2740 cw->memcg = memcg; 2615 cw->memcg = memcg;
2741 cw->cachep = cachep; 2616 cw->cachep = cachep;
2617 INIT_WORK(&cw->work, memcg_kmem_cache_create_func);
2742 2618
2743 INIT_WORK(&cw->work, memcg_register_cache_func);
2744 schedule_work(&cw->work); 2619 schedule_work(&cw->work);
2745} 2620}
2746 2621
2747static void memcg_schedule_register_cache(struct mem_cgroup *memcg, 2622static void memcg_schedule_kmem_cache_create(struct mem_cgroup *memcg,
2748 struct kmem_cache *cachep) 2623 struct kmem_cache *cachep)
2749{ 2624{
2750 /* 2625 /*
2751 * We need to stop accounting when we kmalloc, because if the 2626 * We need to stop accounting when we kmalloc, because if the
2752 * corresponding kmalloc cache is not yet created, the first allocation 2627 * corresponding kmalloc cache is not yet created, the first allocation
2753 * in __memcg_schedule_register_cache will recurse. 2628 * in __memcg_schedule_kmem_cache_create will recurse.
2754 * 2629 *
2755 * However, it is better to enclose the whole function. Depending on 2630 * However, it is better to enclose the whole function. Depending on
2756 * the debugging options enabled, INIT_WORK(), for instance, can 2631 * the debugging options enabled, INIT_WORK(), for instance, can
@@ -2759,7 +2634,7 @@ static void memcg_schedule_register_cache(struct mem_cgroup *memcg,
2759 * the safest choice is to do it like this, wrapping the whole function. 2634 * the safest choice is to do it like this, wrapping the whole function.
2760 */ 2635 */
2761 current->memcg_kmem_skip_account = 1; 2636 current->memcg_kmem_skip_account = 1;
2762 __memcg_schedule_register_cache(memcg, cachep); 2637 __memcg_schedule_kmem_cache_create(memcg, cachep);
2763 current->memcg_kmem_skip_account = 0; 2638 current->memcg_kmem_skip_account = 0;
2764} 2639}
2765 2640
@@ -2807,7 +2682,7 @@ struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep)
2807 * could happen with the slab_mutex held. So it's better to 2682 * could happen with the slab_mutex held. So it's better to
2808 * defer everything. 2683 * defer everything.
2809 */ 2684 */
2810 memcg_schedule_register_cache(memcg, cachep); 2685 memcg_schedule_kmem_cache_create(memcg, cachep);
2811out: 2686out:
2812 css_put(&memcg->css); 2687 css_put(&memcg->css);
2813 return cachep; 2688 return cachep;
@@ -4136,7 +4011,7 @@ static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
4136 4011
4137static void memcg_destroy_kmem(struct mem_cgroup *memcg) 4012static void memcg_destroy_kmem(struct mem_cgroup *memcg)
4138{ 4013{
4139 memcg_unregister_all_caches(memcg); 4014 memcg_destroy_kmem_caches(memcg);
4140 mem_cgroup_sockets_destroy(memcg); 4015 mem_cgroup_sockets_destroy(memcg);
4141} 4016}
4142#else 4017#else
@@ -4664,7 +4539,6 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
4664 spin_lock_init(&memcg->event_list_lock); 4539 spin_lock_init(&memcg->event_list_lock);
4665#ifdef CONFIG_MEMCG_KMEM 4540#ifdef CONFIG_MEMCG_KMEM
4666 memcg->kmemcg_id = -1; 4541 memcg->kmemcg_id = -1;
4667 INIT_LIST_HEAD(&memcg->memcg_slab_caches);
4668#endif 4542#endif
4669 4543
4670 return &memcg->css; 4544 return &memcg->css;
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 1b782a2d3b3d..6e1e4cf65836 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -425,6 +425,49 @@ out_unlock:
425} 425}
426EXPORT_SYMBOL(kmem_cache_create); 426EXPORT_SYMBOL(kmem_cache_create);
427 427
428static int do_kmem_cache_shutdown(struct kmem_cache *s,
429 struct list_head *release, bool *need_rcu_barrier)
430{
431 if (__kmem_cache_shutdown(s) != 0) {
432 printk(KERN_ERR "kmem_cache_destroy %s: "
433 "Slab cache still has objects\n", s->name);
434 dump_stack();
435 return -EBUSY;
436 }
437
438 if (s->flags & SLAB_DESTROY_BY_RCU)
439 *need_rcu_barrier = true;
440
441#ifdef CONFIG_MEMCG_KMEM
442 if (!is_root_cache(s)) {
443 struct kmem_cache *root_cache = s->memcg_params->root_cache;
444 int memcg_id = memcg_cache_id(s->memcg_params->memcg);
445
446 BUG_ON(root_cache->memcg_params->memcg_caches[memcg_id] != s);
447 root_cache->memcg_params->memcg_caches[memcg_id] = NULL;
448 }
449#endif
450 list_move(&s->list, release);
451 return 0;
452}
453
454static void do_kmem_cache_release(struct list_head *release,
455 bool need_rcu_barrier)
456{
457 struct kmem_cache *s, *s2;
458
459 if (need_rcu_barrier)
460 rcu_barrier();
461
462 list_for_each_entry_safe(s, s2, release, list) {
463#ifdef SLAB_SUPPORTS_SYSFS
464 sysfs_slab_remove(s);
465#else
466 slab_kmem_cache_release(s);
467#endif
468 }
469}
470
428#ifdef CONFIG_MEMCG_KMEM 471#ifdef CONFIG_MEMCG_KMEM
429/* 472/*
430 * memcg_create_kmem_cache - Create a cache for a memory cgroup. 473 * memcg_create_kmem_cache - Create a cache for a memory cgroup.
@@ -435,10 +478,11 @@ EXPORT_SYMBOL(kmem_cache_create);
435 * requests going from @memcg to @root_cache. The new cache inherits properties 478 * requests going from @memcg to @root_cache. The new cache inherits properties
436 * from its parent. 479 * from its parent.
437 */ 480 */
438struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg, 481void memcg_create_kmem_cache(struct mem_cgroup *memcg,
439 struct kmem_cache *root_cache) 482 struct kmem_cache *root_cache)
440{ 483{
441 static char memcg_name_buf[NAME_MAX + 1]; /* protected by slab_mutex */ 484 static char memcg_name_buf[NAME_MAX + 1]; /* protected by slab_mutex */
485 int memcg_id = memcg_cache_id(memcg);
442 struct kmem_cache *s = NULL; 486 struct kmem_cache *s = NULL;
443 char *cache_name; 487 char *cache_name;
444 488
@@ -447,6 +491,14 @@ struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
447 491
448 mutex_lock(&slab_mutex); 492 mutex_lock(&slab_mutex);
449 493
494 /*
495 * Since per-memcg caches are created asynchronously on first
496 * allocation (see memcg_kmem_get_cache()), several threads can try to
497 * create the same cache, but only one of them may succeed.
498 */
499 if (cache_from_memcg_idx(root_cache, memcg_id))
500 goto out_unlock;
501
450 cgroup_name(mem_cgroup_css(memcg)->cgroup, 502 cgroup_name(mem_cgroup_css(memcg)->cgroup,
451 memcg_name_buf, sizeof(memcg_name_buf)); 503 memcg_name_buf, sizeof(memcg_name_buf));
452 cache_name = kasprintf(GFP_KERNEL, "%s(%d:%s)", root_cache->name, 504 cache_name = kasprintf(GFP_KERNEL, "%s(%d:%s)", root_cache->name,
@@ -458,49 +510,73 @@ struct kmem_cache *memcg_create_kmem_cache(struct mem_cgroup *memcg,
458 root_cache->size, root_cache->align, 510 root_cache->size, root_cache->align,
459 root_cache->flags, root_cache->ctor, 511 root_cache->flags, root_cache->ctor,
460 memcg, root_cache); 512 memcg, root_cache);
513 /*
514 * If we could not create a memcg cache, do not complain, because
515 * that's not critical at all as we can always proceed with the root
516 * cache.
517 */
461 if (IS_ERR(s)) { 518 if (IS_ERR(s)) {
462 kfree(cache_name); 519 kfree(cache_name);
463 s = NULL; 520 goto out_unlock;
464 } 521 }
465 522
523 /*
524 * Since readers won't lock (see cache_from_memcg_idx()), we need a
525 * barrier here to ensure nobody will see the kmem_cache partially
526 * initialized.
527 */
528 smp_wmb();
529 root_cache->memcg_params->memcg_caches[memcg_id] = s;
530
466out_unlock: 531out_unlock:
467 mutex_unlock(&slab_mutex); 532 mutex_unlock(&slab_mutex);
468 533
469 put_online_mems(); 534 put_online_mems();
470 put_online_cpus(); 535 put_online_cpus();
471
472 return s;
473} 536}
474 537
475static int memcg_cleanup_cache_params(struct kmem_cache *s) 538void memcg_destroy_kmem_caches(struct mem_cgroup *memcg)
476{ 539{
477 int rc; 540 LIST_HEAD(release);
541 bool need_rcu_barrier = false;
542 struct kmem_cache *s, *s2;
478 543
479 if (!s->memcg_params || 544 get_online_cpus();
480 !s->memcg_params->is_root_cache) 545 get_online_mems();
481 return 0;
482 546
483 mutex_unlock(&slab_mutex);
484 rc = __memcg_cleanup_cache_params(s);
485 mutex_lock(&slab_mutex); 547 mutex_lock(&slab_mutex);
548 list_for_each_entry_safe(s, s2, &slab_caches, list) {
549 if (is_root_cache(s) || s->memcg_params->memcg != memcg)
550 continue;
551 /*
552 * The cgroup is about to be freed and therefore has no charges
553 * left. Hence, all its caches must be empty by now.
554 */
555 BUG_ON(do_kmem_cache_shutdown(s, &release, &need_rcu_barrier));
556 }
557 mutex_unlock(&slab_mutex);
486 558
487 return rc; 559 put_online_mems();
488} 560 put_online_cpus();
489#else 561
490static int memcg_cleanup_cache_params(struct kmem_cache *s) 562 do_kmem_cache_release(&release, need_rcu_barrier);
491{
492 return 0;
493} 563}
494#endif /* CONFIG_MEMCG_KMEM */ 564#endif /* CONFIG_MEMCG_KMEM */
495 565
496void slab_kmem_cache_release(struct kmem_cache *s) 566void slab_kmem_cache_release(struct kmem_cache *s)
497{ 567{
568 memcg_free_cache_params(s);
498 kfree(s->name); 569 kfree(s->name);
499 kmem_cache_free(kmem_cache, s); 570 kmem_cache_free(kmem_cache, s);
500} 571}
501 572
502void kmem_cache_destroy(struct kmem_cache *s) 573void kmem_cache_destroy(struct kmem_cache *s)
503{ 574{
575 int i;
576 LIST_HEAD(release);
577 bool need_rcu_barrier = false;
578 bool busy = false;
579
504 get_online_cpus(); 580 get_online_cpus();
505 get_online_mems(); 581 get_online_mems();
506 582
@@ -510,35 +586,23 @@ void kmem_cache_destroy(struct kmem_cache *s)
510 if (s->refcount) 586 if (s->refcount)
511 goto out_unlock; 587 goto out_unlock;
512 588
513 if (memcg_cleanup_cache_params(s) != 0) 589 for_each_memcg_cache_index(i) {
514 goto out_unlock; 590 struct kmem_cache *c = cache_from_memcg_idx(s, i);
515 591
516 if (__kmem_cache_shutdown(s) != 0) { 592 if (c && do_kmem_cache_shutdown(c, &release, &need_rcu_barrier))
517 printk(KERN_ERR "kmem_cache_destroy %s: " 593 busy = true;
518 "Slab cache still has objects\n", s->name);
519 dump_stack();
520 goto out_unlock;
521 } 594 }
522 595
523 list_del(&s->list); 596 if (!busy)
524 597 do_kmem_cache_shutdown(s, &release, &need_rcu_barrier);
525 mutex_unlock(&slab_mutex);
526 if (s->flags & SLAB_DESTROY_BY_RCU)
527 rcu_barrier();
528
529 memcg_free_cache_params(s);
530#ifdef SLAB_SUPPORTS_SYSFS
531 sysfs_slab_remove(s);
532#else
533 slab_kmem_cache_release(s);
534#endif
535 goto out;
536 598
537out_unlock: 599out_unlock:
538 mutex_unlock(&slab_mutex); 600 mutex_unlock(&slab_mutex);
539out: 601
540 put_online_mems(); 602 put_online_mems();
541 put_online_cpus(); 603 put_online_cpus();
604
605 do_kmem_cache_release(&release, need_rcu_barrier);
542} 606}
543EXPORT_SYMBOL(kmem_cache_destroy); 607EXPORT_SYMBOL(kmem_cache_destroy);
544 608