aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
authorJohannes Weiner <hannes@cmpxchg.org>2014-04-07 18:37:45 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-04-07 19:35:57 -0400
commit6d1fdc48938cd51a3964778d78f27cb26c8eb55d (patch)
treed91eba708cf20e7cda0913bb3489df0ae31f5315 /mm/memcontrol.c
parentb6b6cc72bc404c952968530d7df4c3a4ab82b65b (diff)
memcg: sanitize __mem_cgroup_try_charge() call protocol
Some callsites pass a memcg directly, some callsites pass an mm that then has to be translated to a memcg. This makes for a terrible function interface. Just push the mm-to-memcg translation into the respective callsites and always pass a memcg to mem_cgroup_try_charge(). [mhocko@suse.cz: add charge mm helper] Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Acked-by: Michal Hocko <mhocko@suse.cz> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c207
1 files changed, 102 insertions, 105 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 7480022d4655..038b037f8d67 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2575,7 +2575,7 @@ static int memcg_cpu_hotplug_callback(struct notifier_block *nb,
2575} 2575}
2576 2576
2577 2577
2578/* See __mem_cgroup_try_charge() for details */ 2578/* See mem_cgroup_try_charge() for details */
2579enum { 2579enum {
2580 CHARGE_OK, /* success */ 2580 CHARGE_OK, /* success */
2581 CHARGE_RETRY, /* need to retry but retry is not bad */ 2581 CHARGE_RETRY, /* need to retry but retry is not bad */
@@ -2648,45 +2648,34 @@ static int mem_cgroup_do_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
2648 return CHARGE_NOMEM; 2648 return CHARGE_NOMEM;
2649} 2649}
2650 2650
2651/* 2651/**
2652 * __mem_cgroup_try_charge() does 2652 * mem_cgroup_try_charge - try charging a memcg
2653 * 1. detect memcg to be charged against from passed *mm and *ptr, 2653 * @memcg: memcg to charge
2654 * 2. update res_counter 2654 * @nr_pages: number of pages to charge
2655 * 3. call memory reclaim if necessary. 2655 * @oom: trigger OOM if reclaim fails
2656 *
2657 * In some special case, if the task is fatal, fatal_signal_pending() or
2658 * has TIF_MEMDIE, this function returns -EINTR while writing root_mem_cgroup
2659 * to *ptr. There are two reasons for this. 1: fatal threads should quit as soon
2660 * as possible without any hazards. 2: all pages should have a valid
2661 * pc->mem_cgroup. If mm is NULL and the caller doesn't pass a valid memcg
2662 * pointer, that is treated as a charge to root_mem_cgroup.
2663 *
2664 * So __mem_cgroup_try_charge() will return
2665 * 0 ... on success, filling *ptr with a valid memcg pointer.
2666 * -ENOMEM ... charge failure because of resource limits.
2667 * -EINTR ... if thread is fatal. *ptr is filled with root_mem_cgroup.
2668 * 2656 *
2669 * Unlike the exported interface, an "oom" parameter is added. if oom==true, 2657 * Returns 0 if @memcg was charged successfully, -EINTR if the charge
2670 * the oom-killer can be invoked. 2658 * was bypassed to root_mem_cgroup, and -ENOMEM if the charge failed.
2671 */ 2659 */
2672static int __mem_cgroup_try_charge(struct mm_struct *mm, 2660static int mem_cgroup_try_charge(struct mem_cgroup *memcg,
2673 gfp_t gfp_mask, 2661 gfp_t gfp_mask,
2674 unsigned int nr_pages, 2662 unsigned int nr_pages,
2675 struct mem_cgroup **ptr, 2663 bool oom)
2676 bool oom)
2677{ 2664{
2678 unsigned int batch = max(CHARGE_BATCH, nr_pages); 2665 unsigned int batch = max(CHARGE_BATCH, nr_pages);
2679 int nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES; 2666 int nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES;
2680 struct mem_cgroup *memcg = NULL;
2681 int ret; 2667 int ret;
2682 2668
2669 if (mem_cgroup_is_root(memcg))
2670 goto done;
2683 /* 2671 /*
2684 * Unlike gloval-vm's OOM-kill, we're not in memory shortage 2672 * Unlike in global OOM situations, memcg is not in a physical
2685 * in system level. So, allow to go ahead dying process in addition to 2673 * memory shortage. Allow dying and OOM-killed tasks to
2686 * MEMDIE process. 2674 * bypass the last charges so that they can exit quickly and
2675 * free their memory.
2687 */ 2676 */
2688 if (unlikely(test_thread_flag(TIF_MEMDIE) 2677 if (unlikely(test_thread_flag(TIF_MEMDIE) ||
2689 || fatal_signal_pending(current))) 2678 fatal_signal_pending(current)))
2690 goto bypass; 2679 goto bypass;
2691 2680
2692 if (unlikely(task_in_memcg_oom(current))) 2681 if (unlikely(task_in_memcg_oom(current)))
@@ -2695,14 +2684,6 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm,
2695 if (gfp_mask & __GFP_NOFAIL) 2684 if (gfp_mask & __GFP_NOFAIL)
2696 oom = false; 2685 oom = false;
2697again: 2686again:
2698 if (*ptr) { /* css should be a valid one */
2699 memcg = *ptr;
2700 css_get(&memcg->css);
2701 } else {
2702 memcg = get_mem_cgroup_from_mm(mm);
2703 }
2704 if (mem_cgroup_is_root(memcg))
2705 goto done;
2706 if (consume_stock(memcg, nr_pages)) 2687 if (consume_stock(memcg, nr_pages))
2707 goto done; 2688 goto done;
2708 2689
@@ -2710,10 +2691,8 @@ again:
2710 bool invoke_oom = oom && !nr_oom_retries; 2691 bool invoke_oom = oom && !nr_oom_retries;
2711 2692
2712 /* If killed, bypass charge */ 2693 /* If killed, bypass charge */
2713 if (fatal_signal_pending(current)) { 2694 if (fatal_signal_pending(current))
2714 css_put(&memcg->css);
2715 goto bypass; 2695 goto bypass;
2716 }
2717 2696
2718 ret = mem_cgroup_do_charge(memcg, gfp_mask, batch, 2697 ret = mem_cgroup_do_charge(memcg, gfp_mask, batch,
2719 nr_pages, invoke_oom); 2698 nr_pages, invoke_oom);
@@ -2722,17 +2701,12 @@ again:
2722 break; 2701 break;
2723 case CHARGE_RETRY: /* not in OOM situation but retry */ 2702 case CHARGE_RETRY: /* not in OOM situation but retry */
2724 batch = nr_pages; 2703 batch = nr_pages;
2725 css_put(&memcg->css);
2726 memcg = NULL;
2727 goto again; 2704 goto again;
2728 case CHARGE_WOULDBLOCK: /* !__GFP_WAIT */ 2705 case CHARGE_WOULDBLOCK: /* !__GFP_WAIT */
2729 css_put(&memcg->css);
2730 goto nomem; 2706 goto nomem;
2731 case CHARGE_NOMEM: /* OOM routine works */ 2707 case CHARGE_NOMEM: /* OOM routine works */
2732 if (!oom || invoke_oom) { 2708 if (!oom || invoke_oom)
2733 css_put(&memcg->css);
2734 goto nomem; 2709 goto nomem;
2735 }
2736 nr_oom_retries--; 2710 nr_oom_retries--;
2737 break; 2711 break;
2738 } 2712 }
@@ -2741,19 +2715,43 @@ again:
2741 if (batch > nr_pages) 2715 if (batch > nr_pages)
2742 refill_stock(memcg, batch - nr_pages); 2716 refill_stock(memcg, batch - nr_pages);
2743done: 2717done:
2744 css_put(&memcg->css);
2745 *ptr = memcg;
2746 return 0; 2718 return 0;
2747nomem: 2719nomem:
2748 if (!(gfp_mask & __GFP_NOFAIL)) { 2720 if (!(gfp_mask & __GFP_NOFAIL))
2749 *ptr = NULL;
2750 return -ENOMEM; 2721 return -ENOMEM;
2751 }
2752bypass: 2722bypass:
2753 *ptr = root_mem_cgroup;
2754 return -EINTR; 2723 return -EINTR;
2755} 2724}
2756 2725
2726/**
2727 * mem_cgroup_try_charge_mm - try charging a mm
2728 * @mm: mm_struct to charge
2729 * @nr_pages: number of pages to charge
2730 * @oom: trigger OOM if reclaim fails
2731 *
2732 * Returns the charged mem_cgroup associated with the given mm_struct or
2733 * NULL the charge failed.
2734 */
2735static struct mem_cgroup *mem_cgroup_try_charge_mm(struct mm_struct *mm,
2736 gfp_t gfp_mask,
2737 unsigned int nr_pages,
2738 bool oom)
2739
2740{
2741 struct mem_cgroup *memcg;
2742 int ret;
2743
2744 memcg = get_mem_cgroup_from_mm(mm);
2745 ret = mem_cgroup_try_charge(memcg, gfp_mask, nr_pages, oom);
2746 css_put(&memcg->css);
2747 if (ret == -EINTR)
2748 memcg = root_mem_cgroup;
2749 else if (ret)
2750 memcg = NULL;
2751
2752 return memcg;
2753}
2754
2757/* 2755/*
2758 * Somemtimes we have to undo a charge we got by try_charge(). 2756 * Somemtimes we have to undo a charge we got by try_charge().
2759 * This function is for that and do uncharge, put css's refcnt. 2757 * This function is for that and do uncharge, put css's refcnt.
@@ -2949,20 +2947,17 @@ static int mem_cgroup_slabinfo_read(struct seq_file *m, void *v)
2949static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size) 2947static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
2950{ 2948{
2951 struct res_counter *fail_res; 2949 struct res_counter *fail_res;
2952 struct mem_cgroup *_memcg;
2953 int ret = 0; 2950 int ret = 0;
2954 2951
2955 ret = res_counter_charge(&memcg->kmem, size, &fail_res); 2952 ret = res_counter_charge(&memcg->kmem, size, &fail_res);
2956 if (ret) 2953 if (ret)
2957 return ret; 2954 return ret;
2958 2955
2959 _memcg = memcg; 2956 ret = mem_cgroup_try_charge(memcg, gfp, size >> PAGE_SHIFT,
2960 ret = __mem_cgroup_try_charge(NULL, gfp, size >> PAGE_SHIFT, 2957 oom_gfp_allowed(gfp));
2961 &_memcg, oom_gfp_allowed(gfp));
2962
2963 if (ret == -EINTR) { 2958 if (ret == -EINTR) {
2964 /* 2959 /*
2965 * __mem_cgroup_try_charge() chosed to bypass to root due to 2960 * mem_cgroup_try_charge() chosed to bypass to root due to
2966 * OOM kill or fatal signal. Since our only options are to 2961 * OOM kill or fatal signal. Since our only options are to
2967 * either fail the allocation or charge it to this cgroup, do 2962 * either fail the allocation or charge it to this cgroup, do
2968 * it as a temporary condition. But we can't fail. From a 2963 * it as a temporary condition. But we can't fail. From a
@@ -2972,7 +2967,7 @@ static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
2972 * 2967 *
2973 * This condition will only trigger if the task entered 2968 * This condition will only trigger if the task entered
2974 * memcg_charge_kmem in a sane state, but was OOM-killed during 2969 * memcg_charge_kmem in a sane state, but was OOM-killed during
2975 * __mem_cgroup_try_charge() above. Tasks that were already 2970 * mem_cgroup_try_charge() above. Tasks that were already
2976 * dying when the allocation triggers should have been already 2971 * dying when the allocation triggers should have been already
2977 * directed to the root cgroup in memcontrol.h 2972 * directed to the root cgroup in memcontrol.h
2978 */ 2973 */
@@ -3826,10 +3821,9 @@ out:
3826int mem_cgroup_newpage_charge(struct page *page, 3821int mem_cgroup_newpage_charge(struct page *page,
3827 struct mm_struct *mm, gfp_t gfp_mask) 3822 struct mm_struct *mm, gfp_t gfp_mask)
3828{ 3823{
3829 struct mem_cgroup *memcg = NULL;
3830 unsigned int nr_pages = 1; 3824 unsigned int nr_pages = 1;
3825 struct mem_cgroup *memcg;
3831 bool oom = true; 3826 bool oom = true;
3832 int ret;
3833 3827
3834 if (mem_cgroup_disabled()) 3828 if (mem_cgroup_disabled())
3835 return 0; 3829 return 0;
@@ -3848,9 +3842,9 @@ int mem_cgroup_newpage_charge(struct page *page,
3848 oom = false; 3842 oom = false;
3849 } 3843 }
3850 3844
3851 ret = __mem_cgroup_try_charge(mm, gfp_mask, nr_pages, &memcg, oom); 3845 memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, nr_pages, oom);
3852 if (ret == -ENOMEM) 3846 if (!memcg)
3853 return ret; 3847 return -ENOMEM;
3854 __mem_cgroup_commit_charge(memcg, page, nr_pages, 3848 __mem_cgroup_commit_charge(memcg, page, nr_pages,
3855 MEM_CGROUP_CHARGE_TYPE_ANON, false); 3849 MEM_CGROUP_CHARGE_TYPE_ANON, false);
3856 return 0; 3850 return 0;
@@ -3867,7 +3861,7 @@ static int __mem_cgroup_try_charge_swapin(struct mm_struct *mm,
3867 gfp_t mask, 3861 gfp_t mask,
3868 struct mem_cgroup **memcgp) 3862 struct mem_cgroup **memcgp)
3869{ 3863{
3870 struct mem_cgroup *memcg; 3864 struct mem_cgroup *memcg = NULL;
3871 struct page_cgroup *pc; 3865 struct page_cgroup *pc;
3872 int ret; 3866 int ret;
3873 3867
@@ -3880,31 +3874,29 @@ static int __mem_cgroup_try_charge_swapin(struct mm_struct *mm,
3880 * in turn serializes uncharging. 3874 * in turn serializes uncharging.
3881 */ 3875 */
3882 if (PageCgroupUsed(pc)) 3876 if (PageCgroupUsed(pc))
3883 return 0; 3877 goto out;
3884 if (!do_swap_account) 3878 if (do_swap_account)
3885 goto charge_cur_mm; 3879 memcg = try_get_mem_cgroup_from_page(page);
3886 memcg = try_get_mem_cgroup_from_page(page);
3887 if (!memcg) 3880 if (!memcg)
3888 goto charge_cur_mm; 3881 memcg = get_mem_cgroup_from_mm(mm);
3889 *memcgp = memcg; 3882 ret = mem_cgroup_try_charge(memcg, mask, 1, true);
3890 ret = __mem_cgroup_try_charge(NULL, mask, 1, memcgp, true);
3891 css_put(&memcg->css); 3883 css_put(&memcg->css);
3892 if (ret == -EINTR) 3884 if (ret == -EINTR)
3893 ret = 0; 3885 memcg = root_mem_cgroup;
3894 return ret; 3886 else if (ret)
3895charge_cur_mm: 3887 return ret;
3896 ret = __mem_cgroup_try_charge(mm, mask, 1, memcgp, true); 3888out:
3897 if (ret == -EINTR) 3889 *memcgp = memcg;
3898 ret = 0; 3890 return 0;
3899 return ret;
3900} 3891}
3901 3892
3902int mem_cgroup_try_charge_swapin(struct mm_struct *mm, struct page *page, 3893int mem_cgroup_try_charge_swapin(struct mm_struct *mm, struct page *page,
3903 gfp_t gfp_mask, struct mem_cgroup **memcgp) 3894 gfp_t gfp_mask, struct mem_cgroup **memcgp)
3904{ 3895{
3905 *memcgp = NULL; 3896 if (mem_cgroup_disabled()) {
3906 if (mem_cgroup_disabled()) 3897 *memcgp = NULL;
3907 return 0; 3898 return 0;
3899 }
3908 /* 3900 /*
3909 * A racing thread's fault, or swapoff, may have already 3901 * A racing thread's fault, or swapoff, may have already
3910 * updated the pte, and even removed page from swap cache: in 3902 * updated the pte, and even removed page from swap cache: in
@@ -3912,12 +3904,13 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm, struct page *page,
3912 * there's also a KSM case which does need to charge the page. 3904 * there's also a KSM case which does need to charge the page.
3913 */ 3905 */
3914 if (!PageSwapCache(page)) { 3906 if (!PageSwapCache(page)) {
3915 int ret; 3907 struct mem_cgroup *memcg;
3916 3908
3917 ret = __mem_cgroup_try_charge(mm, gfp_mask, 1, memcgp, true); 3909 memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1, true);
3918 if (ret == -EINTR) 3910 if (!memcg)
3919 ret = 0; 3911 return -ENOMEM;
3920 return ret; 3912 *memcgp = memcg;
3913 return 0;
3921 } 3914 }
3922 return __mem_cgroup_try_charge_swapin(mm, page, gfp_mask, memcgp); 3915 return __mem_cgroup_try_charge_swapin(mm, page, gfp_mask, memcgp);
3923} 3916}
@@ -3964,8 +3957,8 @@ void mem_cgroup_commit_charge_swapin(struct page *page,
3964int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, 3957int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
3965 gfp_t gfp_mask) 3958 gfp_t gfp_mask)
3966{ 3959{
3967 struct mem_cgroup *memcg = NULL;
3968 enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE; 3960 enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE;
3961 struct mem_cgroup *memcg;
3969 int ret; 3962 int ret;
3970 3963
3971 if (mem_cgroup_disabled()) 3964 if (mem_cgroup_disabled())
@@ -3973,23 +3966,28 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
3973 if (PageCompound(page)) 3966 if (PageCompound(page))
3974 return 0; 3967 return 0;
3975 3968
3976 if (!PageSwapCache(page)) { 3969 if (PageSwapCache(page)) { /* shmem */
3977 /*
3978 * Page cache insertions can happen without an actual
3979 * task context, e.g. during disk probing on boot.
3980 */
3981 if (!mm)
3982 memcg = root_mem_cgroup;
3983 ret = __mem_cgroup_try_charge(mm, gfp_mask, 1, &memcg, true);
3984 if (ret != -ENOMEM)
3985 __mem_cgroup_commit_charge(memcg, page, 1, type, false);
3986 } else { /* page is swapcache/shmem */
3987 ret = __mem_cgroup_try_charge_swapin(mm, page, 3970 ret = __mem_cgroup_try_charge_swapin(mm, page,
3988 gfp_mask, &memcg); 3971 gfp_mask, &memcg);
3989 if (!ret) 3972 if (ret)
3990 __mem_cgroup_commit_charge_swapin(page, memcg, type); 3973 return ret;
3974 __mem_cgroup_commit_charge_swapin(page, memcg, type);
3975 return 0;
3991 } 3976 }
3992 return ret; 3977
3978 /*
3979 * Page cache insertions can happen without an actual mm
3980 * context, e.g. during disk probing on boot.
3981 */
3982 if (unlikely(!mm))
3983 memcg = root_mem_cgroup;
3984 else {
3985 memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1, true);
3986 if (!memcg)
3987 return -ENOMEM;
3988 }
3989 __mem_cgroup_commit_charge(memcg, page, 1, type, false);
3990 return 0;
3993} 3991}
3994 3992
3995static void mem_cgroup_do_uncharge(struct mem_cgroup *memcg, 3993static void mem_cgroup_do_uncharge(struct mem_cgroup *memcg,
@@ -6601,8 +6599,7 @@ one_by_one:
6601 batch_count = PRECHARGE_COUNT_AT_ONCE; 6599 batch_count = PRECHARGE_COUNT_AT_ONCE;
6602 cond_resched(); 6600 cond_resched();
6603 } 6601 }
6604 ret = __mem_cgroup_try_charge(NULL, 6602 ret = mem_cgroup_try_charge(memcg, GFP_KERNEL, 1, false);
6605 GFP_KERNEL, 1, &memcg, false);
6606 if (ret) 6603 if (ret)
6607 /* mem_cgroup_clear_mc() will do uncharge later */ 6604 /* mem_cgroup_clear_mc() will do uncharge later */
6608 return ret; 6605 return ret;