aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c180
1 files changed, 35 insertions, 145 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 85df503ec023..ef91e856c7e4 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -296,7 +296,6 @@ struct mem_cgroup {
296 * Should the accounting and control be hierarchical, per subtree? 296 * Should the accounting and control be hierarchical, per subtree?
297 */ 297 */
298 bool use_hierarchy; 298 bool use_hierarchy;
299 unsigned long kmem_account_flags; /* See KMEM_ACCOUNTED_*, below */
300 299
301 bool oom_lock; 300 bool oom_lock;
302 atomic_t under_oom; 301 atomic_t under_oom;
@@ -366,22 +365,11 @@ struct mem_cgroup {
366 /* WARNING: nodeinfo must be the last member here */ 365 /* WARNING: nodeinfo must be the last member here */
367}; 366};
368 367
369/* internal only representation about the status of kmem accounting. */
370enum {
371 KMEM_ACCOUNTED_ACTIVE, /* accounted by this cgroup itself */
372};
373
374#ifdef CONFIG_MEMCG_KMEM 368#ifdef CONFIG_MEMCG_KMEM
375static inline void memcg_kmem_set_active(struct mem_cgroup *memcg)
376{
377 set_bit(KMEM_ACCOUNTED_ACTIVE, &memcg->kmem_account_flags);
378}
379
380static bool memcg_kmem_is_active(struct mem_cgroup *memcg) 369static bool memcg_kmem_is_active(struct mem_cgroup *memcg)
381{ 370{
382 return test_bit(KMEM_ACCOUNTED_ACTIVE, &memcg->kmem_account_flags); 371 return memcg->kmemcg_id >= 0;
383} 372}
384
385#endif 373#endif
386 374
387/* Stuffs for move charges at task migration. */ 375/* Stuffs for move charges at task migration. */
@@ -1571,7 +1559,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
1571 * select it. The goal is to allow it to allocate so that it may 1559 * select it. The goal is to allow it to allocate so that it may
1572 * quickly exit and free its memory. 1560 * quickly exit and free its memory.
1573 */ 1561 */
1574 if (fatal_signal_pending(current) || current->flags & PF_EXITING) { 1562 if (fatal_signal_pending(current) || task_will_free_mem(current)) {
1575 set_thread_flag(TIF_MEMDIE); 1563 set_thread_flag(TIF_MEMDIE);
1576 return; 1564 return;
1577 } 1565 }
@@ -1628,6 +1616,8 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
1628 NULL, "Memory cgroup out of memory"); 1616 NULL, "Memory cgroup out of memory");
1629} 1617}
1630 1618
1619#if MAX_NUMNODES > 1
1620
1631/** 1621/**
1632 * test_mem_cgroup_node_reclaimable 1622 * test_mem_cgroup_node_reclaimable
1633 * @memcg: the target memcg 1623 * @memcg: the target memcg
@@ -1650,7 +1640,6 @@ static bool test_mem_cgroup_node_reclaimable(struct mem_cgroup *memcg,
1650 return false; 1640 return false;
1651 1641
1652} 1642}
1653#if MAX_NUMNODES > 1
1654 1643
1655/* 1644/*
1656 * Always updating the nodemask is not very good - even if we have an empty 1645 * Always updating the nodemask is not very good - even if we have an empty
@@ -2646,7 +2635,6 @@ static void memcg_register_cache(struct mem_cgroup *memcg,
2646 if (!cachep) 2635 if (!cachep)
2647 return; 2636 return;
2648 2637
2649 css_get(&memcg->css);
2650 list_add(&cachep->memcg_params->list, &memcg->memcg_slab_caches); 2638 list_add(&cachep->memcg_params->list, &memcg->memcg_slab_caches);
2651 2639
2652 /* 2640 /*
@@ -2680,40 +2668,6 @@ static void memcg_unregister_cache(struct kmem_cache *cachep)
2680 list_del(&cachep->memcg_params->list); 2668 list_del(&cachep->memcg_params->list);
2681 2669
2682 kmem_cache_destroy(cachep); 2670 kmem_cache_destroy(cachep);
2683
2684 /* drop the reference taken in memcg_register_cache */
2685 css_put(&memcg->css);
2686}
2687
2688/*
2689 * During the creation a new cache, we need to disable our accounting mechanism
2690 * altogether. This is true even if we are not creating, but rather just
2691 * enqueing new caches to be created.
2692 *
2693 * This is because that process will trigger allocations; some visible, like
2694 * explicit kmallocs to auxiliary data structures, name strings and internal
2695 * cache structures; some well concealed, like INIT_WORK() that can allocate
2696 * objects during debug.
2697 *
2698 * If any allocation happens during memcg_kmem_get_cache, we will recurse back
2699 * to it. This may not be a bounded recursion: since the first cache creation
2700 * failed to complete (waiting on the allocation), we'll just try to create the
2701 * cache again, failing at the same point.
2702 *
2703 * memcg_kmem_get_cache is prepared to abort after seeing a positive count of
2704 * memcg_kmem_skip_account. So we enclose anything that might allocate memory
2705 * inside the following two functions.
2706 */
2707static inline void memcg_stop_kmem_account(void)
2708{
2709 VM_BUG_ON(!current->mm);
2710 current->memcg_kmem_skip_account++;
2711}
2712
2713static inline void memcg_resume_kmem_account(void)
2714{
2715 VM_BUG_ON(!current->mm);
2716 current->memcg_kmem_skip_account--;
2717} 2671}
2718 2672
2719int __memcg_cleanup_cache_params(struct kmem_cache *s) 2673int __memcg_cleanup_cache_params(struct kmem_cache *s)
@@ -2747,9 +2701,7 @@ static void memcg_unregister_all_caches(struct mem_cgroup *memcg)
2747 mutex_lock(&memcg_slab_mutex); 2701 mutex_lock(&memcg_slab_mutex);
2748 list_for_each_entry_safe(params, tmp, &memcg->memcg_slab_caches, list) { 2702 list_for_each_entry_safe(params, tmp, &memcg->memcg_slab_caches, list) {
2749 cachep = memcg_params_to_cache(params); 2703 cachep = memcg_params_to_cache(params);
2750 kmem_cache_shrink(cachep); 2704 memcg_unregister_cache(cachep);
2751 if (atomic_read(&cachep->memcg_params->nr_pages) == 0)
2752 memcg_unregister_cache(cachep);
2753 } 2705 }
2754 mutex_unlock(&memcg_slab_mutex); 2706 mutex_unlock(&memcg_slab_mutex);
2755} 2707}
@@ -2784,10 +2736,10 @@ static void __memcg_schedule_register_cache(struct mem_cgroup *memcg,
2784 struct memcg_register_cache_work *cw; 2736 struct memcg_register_cache_work *cw;
2785 2737
2786 cw = kmalloc(sizeof(*cw), GFP_NOWAIT); 2738 cw = kmalloc(sizeof(*cw), GFP_NOWAIT);
2787 if (cw == NULL) { 2739 if (!cw)
2788 css_put(&memcg->css);
2789 return; 2740 return;
2790 } 2741
2742 css_get(&memcg->css);
2791 2743
2792 cw->memcg = memcg; 2744 cw->memcg = memcg;
2793 cw->cachep = cachep; 2745 cw->cachep = cachep;
@@ -2810,20 +2762,16 @@ static void memcg_schedule_register_cache(struct mem_cgroup *memcg,
2810 * this point we can't allow ourselves back into memcg_kmem_get_cache, 2762 * this point we can't allow ourselves back into memcg_kmem_get_cache,
2811 * the safest choice is to do it like this, wrapping the whole function. 2763 * the safest choice is to do it like this, wrapping the whole function.
2812 */ 2764 */
2813 memcg_stop_kmem_account(); 2765 current->memcg_kmem_skip_account = 1;
2814 __memcg_schedule_register_cache(memcg, cachep); 2766 __memcg_schedule_register_cache(memcg, cachep);
2815 memcg_resume_kmem_account(); 2767 current->memcg_kmem_skip_account = 0;
2816} 2768}
2817 2769
2818int __memcg_charge_slab(struct kmem_cache *cachep, gfp_t gfp, int order) 2770int __memcg_charge_slab(struct kmem_cache *cachep, gfp_t gfp, int order)
2819{ 2771{
2820 unsigned int nr_pages = 1 << order; 2772 unsigned int nr_pages = 1 << order;
2821 int res;
2822 2773
2823 res = memcg_charge_kmem(cachep->memcg_params->memcg, gfp, nr_pages); 2774 return memcg_charge_kmem(cachep->memcg_params->memcg, gfp, nr_pages);
2824 if (!res)
2825 atomic_add(nr_pages, &cachep->memcg_params->nr_pages);
2826 return res;
2827} 2775}
2828 2776
2829void __memcg_uncharge_slab(struct kmem_cache *cachep, int order) 2777void __memcg_uncharge_slab(struct kmem_cache *cachep, int order)
@@ -2831,7 +2779,6 @@ void __memcg_uncharge_slab(struct kmem_cache *cachep, int order)
2831 unsigned int nr_pages = 1 << order; 2779 unsigned int nr_pages = 1 << order;
2832 2780
2833 memcg_uncharge_kmem(cachep->memcg_params->memcg, nr_pages); 2781 memcg_uncharge_kmem(cachep->memcg_params->memcg, nr_pages);
2834 atomic_sub(nr_pages, &cachep->memcg_params->nr_pages);
2835} 2782}
2836 2783
2837/* 2784/*
@@ -2847,8 +2794,7 @@ void __memcg_uncharge_slab(struct kmem_cache *cachep, int order)
2847 * Can't be called in interrupt context or from kernel threads. 2794 * Can't be called in interrupt context or from kernel threads.
2848 * This function needs to be called with rcu_read_lock() held. 2795 * This function needs to be called with rcu_read_lock() held.
2849 */ 2796 */
2850struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep, 2797struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep)
2851 gfp_t gfp)
2852{ 2798{
2853 struct mem_cgroup *memcg; 2799 struct mem_cgroup *memcg;
2854 struct kmem_cache *memcg_cachep; 2800 struct kmem_cache *memcg_cachep;
@@ -2856,25 +2802,16 @@ struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep,
2856 VM_BUG_ON(!cachep->memcg_params); 2802 VM_BUG_ON(!cachep->memcg_params);
2857 VM_BUG_ON(!cachep->memcg_params->is_root_cache); 2803 VM_BUG_ON(!cachep->memcg_params->is_root_cache);
2858 2804
2859 if (!current->mm || current->memcg_kmem_skip_account) 2805 if (current->memcg_kmem_skip_account)
2860 return cachep; 2806 return cachep;
2861 2807
2862 rcu_read_lock(); 2808 memcg = get_mem_cgroup_from_mm(current->mm);
2863 memcg = mem_cgroup_from_task(rcu_dereference(current->mm->owner));
2864
2865 if (!memcg_kmem_is_active(memcg)) 2809 if (!memcg_kmem_is_active(memcg))
2866 goto out; 2810 goto out;
2867 2811
2868 memcg_cachep = cache_from_memcg_idx(cachep, memcg_cache_id(memcg)); 2812 memcg_cachep = cache_from_memcg_idx(cachep, memcg_cache_id(memcg));
2869 if (likely(memcg_cachep)) { 2813 if (likely(memcg_cachep))
2870 cachep = memcg_cachep; 2814 return memcg_cachep;
2871 goto out;
2872 }
2873
2874 /* The corresponding put will be done in the workqueue. */
2875 if (!css_tryget_online(&memcg->css))
2876 goto out;
2877 rcu_read_unlock();
2878 2815
2879 /* 2816 /*
2880 * If we are in a safe context (can wait, and not in interrupt 2817 * If we are in a safe context (can wait, and not in interrupt
@@ -2889,12 +2826,17 @@ struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep,
2889 * defer everything. 2826 * defer everything.
2890 */ 2827 */
2891 memcg_schedule_register_cache(memcg, cachep); 2828 memcg_schedule_register_cache(memcg, cachep);
2892 return cachep;
2893out: 2829out:
2894 rcu_read_unlock(); 2830 css_put(&memcg->css);
2895 return cachep; 2831 return cachep;
2896} 2832}
2897 2833
2834void __memcg_kmem_put_cache(struct kmem_cache *cachep)
2835{
2836 if (!is_root_cache(cachep))
2837 css_put(&cachep->memcg_params->memcg->css);
2838}
2839
2898/* 2840/*
2899 * We need to verify if the allocation against current->mm->owner's memcg is 2841 * We need to verify if the allocation against current->mm->owner's memcg is
2900 * possible for the given order. But the page is not allocated yet, so we'll 2842 * possible for the given order. But the page is not allocated yet, so we'll
@@ -2917,34 +2859,6 @@ __memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **_memcg, int order)
2917 2859
2918 *_memcg = NULL; 2860 *_memcg = NULL;
2919 2861
2920 /*
2921 * Disabling accounting is only relevant for some specific memcg
2922 * internal allocations. Therefore we would initially not have such
2923 * check here, since direct calls to the page allocator that are
2924 * accounted to kmemcg (alloc_kmem_pages and friends) only happen
2925 * outside memcg core. We are mostly concerned with cache allocations,
2926 * and by having this test at memcg_kmem_get_cache, we are already able
2927 * to relay the allocation to the root cache and bypass the memcg cache
2928 * altogether.
2929 *
2930 * There is one exception, though: the SLUB allocator does not create
2931 * large order caches, but rather service large kmallocs directly from
2932 * the page allocator. Therefore, the following sequence when backed by
2933 * the SLUB allocator:
2934 *
2935 * memcg_stop_kmem_account();
2936 * kmalloc(<large_number>)
2937 * memcg_resume_kmem_account();
2938 *
2939 * would effectively ignore the fact that we should skip accounting,
2940 * since it will drive us directly to this function without passing
2941 * through the cache selector memcg_kmem_get_cache. Such large
2942 * allocations are extremely rare but can happen, for instance, for the
2943 * cache arrays. We bring this test here.
2944 */
2945 if (!current->mm || current->memcg_kmem_skip_account)
2946 return true;
2947
2948 memcg = get_mem_cgroup_from_mm(current->mm); 2862 memcg = get_mem_cgroup_from_mm(current->mm);
2949 2863
2950 if (!memcg_kmem_is_active(memcg)) { 2864 if (!memcg_kmem_is_active(memcg)) {
@@ -2985,10 +2899,6 @@ void __memcg_kmem_uncharge_pages(struct page *page, int order)
2985 memcg_uncharge_kmem(memcg, 1 << order); 2899 memcg_uncharge_kmem(memcg, 1 << order);
2986 page->mem_cgroup = NULL; 2900 page->mem_cgroup = NULL;
2987} 2901}
2988#else
2989static inline void memcg_unregister_all_caches(struct mem_cgroup *memcg)
2990{
2991}
2992#endif /* CONFIG_MEMCG_KMEM */ 2902#endif /* CONFIG_MEMCG_KMEM */
2993 2903
2994#ifdef CONFIG_TRANSPARENT_HUGEPAGE 2904#ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -3539,12 +3449,6 @@ static int memcg_activate_kmem(struct mem_cgroup *memcg,
3539 return 0; 3449 return 0;
3540 3450
3541 /* 3451 /*
3542 * We are going to allocate memory for data shared by all memory
3543 * cgroups so let's stop accounting here.
3544 */
3545 memcg_stop_kmem_account();
3546
3547 /*
3548 * For simplicity, we won't allow this to be disabled. It also can't 3452 * For simplicity, we won't allow this to be disabled. It also can't
3549 * be changed if the cgroup has children already, or if tasks had 3453 * be changed if the cgroup has children already, or if tasks had
3550 * already joined. 3454 * already joined.
@@ -3570,25 +3474,22 @@ static int memcg_activate_kmem(struct mem_cgroup *memcg,
3570 goto out; 3474 goto out;
3571 } 3475 }
3572 3476
3573 memcg->kmemcg_id = memcg_id;
3574 INIT_LIST_HEAD(&memcg->memcg_slab_caches);
3575
3576 /* 3477 /*
3577 * We couldn't have accounted to this cgroup, because it hasn't got the 3478 * We couldn't have accounted to this cgroup, because it hasn't got
3578 * active bit set yet, so this should succeed. 3479 * activated yet, so this should succeed.
3579 */ 3480 */
3580 err = page_counter_limit(&memcg->kmem, nr_pages); 3481 err = page_counter_limit(&memcg->kmem, nr_pages);
3581 VM_BUG_ON(err); 3482 VM_BUG_ON(err);
3582 3483
3583 static_key_slow_inc(&memcg_kmem_enabled_key); 3484 static_key_slow_inc(&memcg_kmem_enabled_key);
3584 /* 3485 /*
3585 * Setting the active bit after enabling static branching will 3486 * A memory cgroup is considered kmem-active as soon as it gets
3487 * kmemcg_id. Setting the id after enabling static branching will
3586 * guarantee no one starts accounting before all call sites are 3488 * guarantee no one starts accounting before all call sites are
3587 * patched. 3489 * patched.
3588 */ 3490 */
3589 memcg_kmem_set_active(memcg); 3491 memcg->kmemcg_id = memcg_id;
3590out: 3492out:
3591 memcg_resume_kmem_account();
3592 return err; 3493 return err;
3593} 3494}
3594 3495
@@ -3791,11 +3692,6 @@ static int memcg_numa_stat_show(struct seq_file *m, void *v)
3791} 3692}
3792#endif /* CONFIG_NUMA */ 3693#endif /* CONFIG_NUMA */
3793 3694
3794static inline void mem_cgroup_lru_names_not_uptodate(void)
3795{
3796 BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_lru_names) != NR_LRU_LISTS);
3797}
3798
3799static int memcg_stat_show(struct seq_file *m, void *v) 3695static int memcg_stat_show(struct seq_file *m, void *v)
3800{ 3696{
3801 struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m)); 3697 struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
@@ -3803,6 +3699,8 @@ static int memcg_stat_show(struct seq_file *m, void *v)
3803 struct mem_cgroup *mi; 3699 struct mem_cgroup *mi;
3804 unsigned int i; 3700 unsigned int i;
3805 3701
3702 BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_lru_names) != NR_LRU_LISTS);
3703
3806 for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) { 3704 for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {
3807 if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account) 3705 if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account)
3808 continue; 3706 continue;
@@ -4259,7 +4157,6 @@ static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
4259{ 4157{
4260 int ret; 4158 int ret;
4261 4159
4262 memcg->kmemcg_id = -1;
4263 ret = memcg_propagate_kmem(memcg); 4160 ret = memcg_propagate_kmem(memcg);
4264 if (ret) 4161 if (ret)
4265 return ret; 4162 return ret;
@@ -4269,6 +4166,7 @@ static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
4269 4166
4270static void memcg_destroy_kmem(struct mem_cgroup *memcg) 4167static void memcg_destroy_kmem(struct mem_cgroup *memcg)
4271{ 4168{
4169 memcg_unregister_all_caches(memcg);
4272 mem_cgroup_sockets_destroy(memcg); 4170 mem_cgroup_sockets_destroy(memcg);
4273} 4171}
4274#else 4172#else
@@ -4724,17 +4622,6 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
4724 4622
4725 free_percpu(memcg->stat); 4623 free_percpu(memcg->stat);
4726 4624
4727 /*
4728 * We need to make sure that (at least for now), the jump label
4729 * destruction code runs outside of the cgroup lock. This is because
4730 * get_online_cpus(), which is called from the static_branch update,
4731 * can't be called inside the cgroup_lock. cpusets are the ones
4732 * enforcing this dependency, so if they ever change, we might as well.
4733 *
4734 * schedule_work() will guarantee this happens. Be careful if you need
4735 * to move this code around, and make sure it is outside
4736 * the cgroup_lock.
4737 */
4738 disarm_static_keys(memcg); 4625 disarm_static_keys(memcg);
4739 kfree(memcg); 4626 kfree(memcg);
4740} 4627}
@@ -4804,6 +4691,10 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
4804 vmpressure_init(&memcg->vmpressure); 4691 vmpressure_init(&memcg->vmpressure);
4805 INIT_LIST_HEAD(&memcg->event_list); 4692 INIT_LIST_HEAD(&memcg->event_list);
4806 spin_lock_init(&memcg->event_list_lock); 4693 spin_lock_init(&memcg->event_list_lock);
4694#ifdef CONFIG_MEMCG_KMEM
4695 memcg->kmemcg_id = -1;
4696 INIT_LIST_HEAD(&memcg->memcg_slab_caches);
4697#endif
4807 4698
4808 return &memcg->css; 4699 return &memcg->css;
4809 4700
@@ -4885,7 +4776,6 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
4885 } 4776 }
4886 spin_unlock(&memcg->event_list_lock); 4777 spin_unlock(&memcg->event_list_lock);
4887 4778
4888 memcg_unregister_all_caches(memcg);
4889 vmpressure_cleanup(&memcg->vmpressure); 4779 vmpressure_cleanup(&memcg->vmpressure);
4890} 4780}
4891 4781