aboutsummaryrefslogtreecommitdiffstats
path: root/mm/memcontrol.c
diff options
context:
space:
mode:
authorJohannes Weiner <hannes@cmpxchg.org>2014-08-08 17:19:20 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-08-08 18:57:17 -0400
commit00501b531c4723972aa11d6d4ebcf8d6552007c8 (patch)
treeb3ad4850d58f137cf87b8424412d962fb251839f /mm/memcontrol.c
parent4449a51a7c281602d3a385044ab928322a122a02 (diff)
mm: memcontrol: rewrite charge API
These patches rework memcg charge lifetime to integrate more naturally with the lifetime of user pages. This drastically simplifies the code and reduces charging and uncharging overhead. The most expensive part of charging and uncharging is the page_cgroup bit spinlock, which is removed entirely after this series. Here are the top-10 profile entries of a stress test that reads a 128G sparse file on a freshly booted box, without even a dedicated cgroup (i.e. executing in the root memcg). Before: 15.36% cat [kernel.kallsyms] [k] copy_user_generic_string 13.31% cat [kernel.kallsyms] [k] memset 11.48% cat [kernel.kallsyms] [k] do_mpage_readpage 4.23% cat [kernel.kallsyms] [k] get_page_from_freelist 2.38% cat [kernel.kallsyms] [k] put_page 2.32% cat [kernel.kallsyms] [k] __mem_cgroup_commit_charge 2.18% kswapd0 [kernel.kallsyms] [k] __mem_cgroup_uncharge_common 1.92% kswapd0 [kernel.kallsyms] [k] shrink_page_list 1.86% cat [kernel.kallsyms] [k] __radix_tree_lookup 1.62% cat [kernel.kallsyms] [k] __pagevec_lru_add_fn After: 15.67% cat [kernel.kallsyms] [k] copy_user_generic_string 13.48% cat [kernel.kallsyms] [k] memset 11.42% cat [kernel.kallsyms] [k] do_mpage_readpage 3.98% cat [kernel.kallsyms] [k] get_page_from_freelist 2.46% cat [kernel.kallsyms] [k] put_page 2.13% kswapd0 [kernel.kallsyms] [k] shrink_page_list 1.88% cat [kernel.kallsyms] [k] __radix_tree_lookup 1.67% cat [kernel.kallsyms] [k] __pagevec_lru_add_fn 1.39% kswapd0 [kernel.kallsyms] [k] free_pcppages_bulk 1.30% cat [kernel.kallsyms] [k] kfree As you can see, the memcg footprint has shrunk quite a bit. text data bss dec hex filename 37970 9892 400 48262 bc86 mm/memcontrol.o.old 35239 9892 400 45531 b1db mm/memcontrol.o This patch (of 4): The memcg charge API charges pages before they are rmapped - i.e. have an actual "type" - and so every callsite needs its own set of charge and uncharge functions to know what type is being operated on. Worse, uncharge has to happen from a context that is still type-specific, rather than at the end of the page's lifetime with exclusive access, and so requires a lot of synchronization. Rewrite the charge API to provide a generic set of try_charge(), commit_charge() and cancel_charge() transaction operations, much like what's currently done for swap-in: mem_cgroup_try_charge() attempts to reserve a charge, reclaiming pages from the memcg if necessary. mem_cgroup_commit_charge() commits the page to the charge once it has a valid page->mapping and PageAnon() reliably tells the type. mem_cgroup_cancel_charge() aborts the transaction. This reduces the charge API and enables subsequent patches to drastically simplify uncharging. As pages need to be committed after rmap is established but before they are added to the LRU, page_add_new_anon_rmap() must stop doing LRU additions again. Revive lru_cache_add_active_or_unevictable(). [hughd@google.com: fix shmem_unuse] [hughd@google.com: Add comments on the private use of -EAGAIN] Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Acked-by: Michal Hocko <mhocko@suse.cz> Cc: Tejun Heo <tj@kernel.org> Cc: Vladimir Davydov <vdavydov@parallels.com> Signed-off-by: Hugh Dickins <hughd@google.com> Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r--mm/memcontrol.c407
1 files changed, 167 insertions, 240 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 90dc501eaf3f..1cbe1e54ff5f 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2551,17 +2551,8 @@ static int memcg_cpu_hotplug_callback(struct notifier_block *nb,
2551 return NOTIFY_OK; 2551 return NOTIFY_OK;
2552} 2552}
2553 2553
2554/** 2554static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
2555 * mem_cgroup_try_charge - try charging a memcg 2555 unsigned int nr_pages)
2556 * @memcg: memcg to charge
2557 * @nr_pages: number of pages to charge
2558 *
2559 * Returns 0 if @memcg was charged successfully, -EINTR if the charge
2560 * was bypassed to root_mem_cgroup, and -ENOMEM if the charge failed.
2561 */
2562static int mem_cgroup_try_charge(struct mem_cgroup *memcg,
2563 gfp_t gfp_mask,
2564 unsigned int nr_pages)
2565{ 2556{
2566 unsigned int batch = max(CHARGE_BATCH, nr_pages); 2557 unsigned int batch = max(CHARGE_BATCH, nr_pages);
2567 int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; 2558 int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
@@ -2660,41 +2651,7 @@ done:
2660 return ret; 2651 return ret;
2661} 2652}
2662 2653
2663/** 2654static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
2664 * mem_cgroup_try_charge_mm - try charging a mm
2665 * @mm: mm_struct to charge
2666 * @nr_pages: number of pages to charge
2667 * @oom: trigger OOM if reclaim fails
2668 *
2669 * Returns the charged mem_cgroup associated with the given mm_struct or
2670 * NULL the charge failed.
2671 */
2672static struct mem_cgroup *mem_cgroup_try_charge_mm(struct mm_struct *mm,
2673 gfp_t gfp_mask,
2674 unsigned int nr_pages)
2675
2676{
2677 struct mem_cgroup *memcg;
2678 int ret;
2679
2680 memcg = get_mem_cgroup_from_mm(mm);
2681 ret = mem_cgroup_try_charge(memcg, gfp_mask, nr_pages);
2682 css_put(&memcg->css);
2683 if (ret == -EINTR)
2684 memcg = root_mem_cgroup;
2685 else if (ret)
2686 memcg = NULL;
2687
2688 return memcg;
2689}
2690
2691/*
2692 * Somemtimes we have to undo a charge we got by try_charge().
2693 * This function is for that and do uncharge, put css's refcnt.
2694 * gotten by try_charge().
2695 */
2696static void __mem_cgroup_cancel_charge(struct mem_cgroup *memcg,
2697 unsigned int nr_pages)
2698{ 2655{
2699 unsigned long bytes = nr_pages * PAGE_SIZE; 2656 unsigned long bytes = nr_pages * PAGE_SIZE;
2700 2657
@@ -2760,17 +2717,13 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
2760 return memcg; 2717 return memcg;
2761} 2718}
2762 2719
2763static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg, 2720static void commit_charge(struct page *page, struct mem_cgroup *memcg,
2764 struct page *page, 2721 unsigned int nr_pages, bool anon, bool lrucare)
2765 unsigned int nr_pages,
2766 enum charge_type ctype,
2767 bool lrucare)
2768{ 2722{
2769 struct page_cgroup *pc = lookup_page_cgroup(page); 2723 struct page_cgroup *pc = lookup_page_cgroup(page);
2770 struct zone *uninitialized_var(zone); 2724 struct zone *uninitialized_var(zone);
2771 struct lruvec *lruvec; 2725 struct lruvec *lruvec;
2772 bool was_on_lru = false; 2726 bool was_on_lru = false;
2773 bool anon;
2774 2727
2775 lock_page_cgroup(pc); 2728 lock_page_cgroup(pc);
2776 VM_BUG_ON_PAGE(PageCgroupUsed(pc), page); 2729 VM_BUG_ON_PAGE(PageCgroupUsed(pc), page);
@@ -2807,11 +2760,6 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
2807 spin_unlock_irq(&zone->lru_lock); 2760 spin_unlock_irq(&zone->lru_lock);
2808 } 2761 }
2809 2762
2810 if (ctype == MEM_CGROUP_CHARGE_TYPE_ANON)
2811 anon = true;
2812 else
2813 anon = false;
2814
2815 mem_cgroup_charge_statistics(memcg, page, anon, nr_pages); 2763 mem_cgroup_charge_statistics(memcg, page, anon, nr_pages);
2816 unlock_page_cgroup(pc); 2764 unlock_page_cgroup(pc);
2817 2765
@@ -2882,21 +2830,21 @@ static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
2882 if (ret) 2830 if (ret)
2883 return ret; 2831 return ret;
2884 2832
2885 ret = mem_cgroup_try_charge(memcg, gfp, size >> PAGE_SHIFT); 2833 ret = try_charge(memcg, gfp, size >> PAGE_SHIFT);
2886 if (ret == -EINTR) { 2834 if (ret == -EINTR) {
2887 /* 2835 /*
2888 * mem_cgroup_try_charge() chosed to bypass to root due to 2836 * try_charge() chose to bypass to root due to OOM kill or
2889 * OOM kill or fatal signal. Since our only options are to 2837 * fatal signal. Since our only options are to either fail
2890 * either fail the allocation or charge it to this cgroup, do 2838 * the allocation or charge it to this cgroup, do it as a
2891 * it as a temporary condition. But we can't fail. From a 2839 * temporary condition. But we can't fail. From a kmem/slab
2892 * kmem/slab perspective, the cache has already been selected, 2840 * perspective, the cache has already been selected, by
2893 * by mem_cgroup_kmem_get_cache(), so it is too late to change 2841 * mem_cgroup_kmem_get_cache(), so it is too late to change
2894 * our minds. 2842 * our minds.
2895 * 2843 *
2896 * This condition will only trigger if the task entered 2844 * This condition will only trigger if the task entered
2897 * memcg_charge_kmem in a sane state, but was OOM-killed during 2845 * memcg_charge_kmem in a sane state, but was OOM-killed
2898 * mem_cgroup_try_charge() above. Tasks that were already 2846 * during try_charge() above. Tasks that were already dying
2899 * dying when the allocation triggers should have been already 2847 * when the allocation triggers should have been already
2900 * directed to the root cgroup in memcontrol.h 2848 * directed to the root cgroup in memcontrol.h
2901 */ 2849 */
2902 res_counter_charge_nofail(&memcg->res, size, &fail_res); 2850 res_counter_charge_nofail(&memcg->res, size, &fail_res);
@@ -3618,164 +3566,6 @@ out:
3618 return ret; 3566 return ret;
3619} 3567}
3620 3568
3621int mem_cgroup_charge_anon(struct page *page,
3622 struct mm_struct *mm, gfp_t gfp_mask)
3623{
3624 unsigned int nr_pages = 1;
3625 struct mem_cgroup *memcg;
3626
3627 if (mem_cgroup_disabled())
3628 return 0;
3629
3630 VM_BUG_ON_PAGE(page_mapped(page), page);
3631 VM_BUG_ON_PAGE(page->mapping && !PageAnon(page), page);
3632 VM_BUG_ON(!mm);
3633
3634 if (PageTransHuge(page)) {
3635 nr_pages <<= compound_order(page);
3636 VM_BUG_ON_PAGE(!PageTransHuge(page), page);
3637 }
3638
3639 memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, nr_pages);
3640 if (!memcg)
3641 return -ENOMEM;
3642 __mem_cgroup_commit_charge(memcg, page, nr_pages,
3643 MEM_CGROUP_CHARGE_TYPE_ANON, false);
3644 return 0;
3645}
3646
3647/*
3648 * While swap-in, try_charge -> commit or cancel, the page is locked.
3649 * And when try_charge() successfully returns, one refcnt to memcg without
3650 * struct page_cgroup is acquired. This refcnt will be consumed by
3651 * "commit()" or removed by "cancel()"
3652 */
3653static int __mem_cgroup_try_charge_swapin(struct mm_struct *mm,
3654 struct page *page,
3655 gfp_t mask,
3656 struct mem_cgroup **memcgp)
3657{
3658 struct mem_cgroup *memcg = NULL;
3659 struct page_cgroup *pc;
3660 int ret;
3661
3662 pc = lookup_page_cgroup(page);
3663 /*
3664 * Every swap fault against a single page tries to charge the
3665 * page, bail as early as possible. shmem_unuse() encounters
3666 * already charged pages, too. The USED bit is protected by
3667 * the page lock, which serializes swap cache removal, which
3668 * in turn serializes uncharging.
3669 */
3670 if (PageCgroupUsed(pc))
3671 goto out;
3672 if (do_swap_account)
3673 memcg = try_get_mem_cgroup_from_page(page);
3674 if (!memcg)
3675 memcg = get_mem_cgroup_from_mm(mm);
3676 ret = mem_cgroup_try_charge(memcg, mask, 1);
3677 css_put(&memcg->css);
3678 if (ret == -EINTR)
3679 memcg = root_mem_cgroup;
3680 else if (ret)
3681 return ret;
3682out:
3683 *memcgp = memcg;
3684 return 0;
3685}
3686
3687int mem_cgroup_try_charge_swapin(struct mm_struct *mm, struct page *page,
3688 gfp_t gfp_mask, struct mem_cgroup **memcgp)
3689{
3690 if (mem_cgroup_disabled()) {
3691 *memcgp = NULL;
3692 return 0;
3693 }
3694 /*
3695 * A racing thread's fault, or swapoff, may have already
3696 * updated the pte, and even removed page from swap cache: in
3697 * those cases unuse_pte()'s pte_same() test will fail; but
3698 * there's also a KSM case which does need to charge the page.
3699 */
3700 if (!PageSwapCache(page)) {
3701 struct mem_cgroup *memcg;
3702
3703 memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1);
3704 if (!memcg)
3705 return -ENOMEM;
3706 *memcgp = memcg;
3707 return 0;
3708 }
3709 return __mem_cgroup_try_charge_swapin(mm, page, gfp_mask, memcgp);
3710}
3711
3712void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg)
3713{
3714 if (mem_cgroup_disabled())
3715 return;
3716 if (!memcg)
3717 return;
3718 __mem_cgroup_cancel_charge(memcg, 1);
3719}
3720
3721static void
3722__mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *memcg,
3723 enum charge_type ctype)
3724{
3725 if (mem_cgroup_disabled())
3726 return;
3727 if (!memcg)
3728 return;
3729
3730 __mem_cgroup_commit_charge(memcg, page, 1, ctype, true);
3731 /*
3732 * Now swap is on-memory. This means this page may be
3733 * counted both as mem and swap....double count.
3734 * Fix it by uncharging from memsw. Basically, this SwapCache is stable
3735 * under lock_page(). But in do_swap_page()::memory.c, reuse_swap_page()
3736 * may call delete_from_swap_cache() before reach here.
3737 */
3738 if (do_swap_account && PageSwapCache(page)) {
3739 swp_entry_t ent = {.val = page_private(page)};
3740 mem_cgroup_uncharge_swap(ent);
3741 }
3742}
3743
3744void mem_cgroup_commit_charge_swapin(struct page *page,
3745 struct mem_cgroup *memcg)
3746{
3747 __mem_cgroup_commit_charge_swapin(page, memcg,
3748 MEM_CGROUP_CHARGE_TYPE_ANON);
3749}
3750
3751int mem_cgroup_charge_file(struct page *page, struct mm_struct *mm,
3752 gfp_t gfp_mask)
3753{
3754 enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE;
3755 struct mem_cgroup *memcg;
3756 int ret;
3757
3758 if (mem_cgroup_disabled())
3759 return 0;
3760 if (PageCompound(page))
3761 return 0;
3762
3763 if (PageSwapCache(page)) { /* shmem */
3764 ret = __mem_cgroup_try_charge_swapin(mm, page,
3765 gfp_mask, &memcg);
3766 if (ret)
3767 return ret;
3768 __mem_cgroup_commit_charge_swapin(page, memcg, type);
3769 return 0;
3770 }
3771
3772 memcg = mem_cgroup_try_charge_mm(mm, gfp_mask, 1);
3773 if (!memcg)
3774 return -ENOMEM;
3775 __mem_cgroup_commit_charge(memcg, page, 1, type, false);
3776 return 0;
3777}
3778
3779static void mem_cgroup_do_uncharge(struct mem_cgroup *memcg, 3569static void mem_cgroup_do_uncharge(struct mem_cgroup *memcg,
3780 unsigned int nr_pages, 3570 unsigned int nr_pages,
3781 const enum charge_type ctype) 3571 const enum charge_type ctype)
@@ -4122,7 +3912,6 @@ void mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
4122 struct mem_cgroup *memcg = NULL; 3912 struct mem_cgroup *memcg = NULL;
4123 unsigned int nr_pages = 1; 3913 unsigned int nr_pages = 1;
4124 struct page_cgroup *pc; 3914 struct page_cgroup *pc;
4125 enum charge_type ctype;
4126 3915
4127 *memcgp = NULL; 3916 *memcgp = NULL;
4128 3917
@@ -4184,16 +3973,12 @@ void mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
4184 * page. In the case new page is migrated but not remapped, new page's 3973 * page. In the case new page is migrated but not remapped, new page's
4185 * mapcount will be finally 0 and we call uncharge in end_migration(). 3974 * mapcount will be finally 0 and we call uncharge in end_migration().
4186 */ 3975 */
4187 if (PageAnon(page))
4188 ctype = MEM_CGROUP_CHARGE_TYPE_ANON;
4189 else
4190 ctype = MEM_CGROUP_CHARGE_TYPE_CACHE;
4191 /* 3976 /*
4192 * The page is committed to the memcg, but it's not actually 3977 * The page is committed to the memcg, but it's not actually
4193 * charged to the res_counter since we plan on replacing the 3978 * charged to the res_counter since we plan on replacing the
4194 * old one and only one page is going to be left afterwards. 3979 * old one and only one page is going to be left afterwards.
4195 */ 3980 */
4196 __mem_cgroup_commit_charge(memcg, newpage, nr_pages, ctype, false); 3981 commit_charge(newpage, memcg, nr_pages, PageAnon(page), false);
4197} 3982}
4198 3983
4199/* remove redundant charge if migration failed*/ 3984/* remove redundant charge if migration failed*/
@@ -4252,7 +4037,6 @@ void mem_cgroup_replace_page_cache(struct page *oldpage,
4252{ 4037{
4253 struct mem_cgroup *memcg = NULL; 4038 struct mem_cgroup *memcg = NULL;
4254 struct page_cgroup *pc; 4039 struct page_cgroup *pc;
4255 enum charge_type type = MEM_CGROUP_CHARGE_TYPE_CACHE;
4256 4040
4257 if (mem_cgroup_disabled()) 4041 if (mem_cgroup_disabled())
4258 return; 4042 return;
@@ -4278,7 +4062,7 @@ void mem_cgroup_replace_page_cache(struct page *oldpage,
4278 * the newpage may be on LRU(or pagevec for LRU) already. We lock 4062 * the newpage may be on LRU(or pagevec for LRU) already. We lock
4279 * LRU while we overwrite pc->mem_cgroup. 4063 * LRU while we overwrite pc->mem_cgroup.
4280 */ 4064 */
4281 __mem_cgroup_commit_charge(memcg, newpage, 1, type, true); 4065 commit_charge(newpage, memcg, 1, false, true);
4282} 4066}
4283 4067
4284#ifdef CONFIG_DEBUG_VM 4068#ifdef CONFIG_DEBUG_VM
@@ -6319,20 +6103,19 @@ static int mem_cgroup_do_precharge(unsigned long count)
6319 int ret; 6103 int ret;
6320 6104
6321 /* Try a single bulk charge without reclaim first */ 6105 /* Try a single bulk charge without reclaim first */
6322 ret = mem_cgroup_try_charge(mc.to, GFP_KERNEL & ~__GFP_WAIT, count); 6106 ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_WAIT, count);
6323 if (!ret) { 6107 if (!ret) {
6324 mc.precharge += count; 6108 mc.precharge += count;
6325 return ret; 6109 return ret;
6326 } 6110 }
6327 if (ret == -EINTR) { 6111 if (ret == -EINTR) {
6328 __mem_cgroup_cancel_charge(root_mem_cgroup, count); 6112 cancel_charge(root_mem_cgroup, count);
6329 return ret; 6113 return ret;
6330 } 6114 }
6331 6115
6332 /* Try charges one by one with reclaim */ 6116 /* Try charges one by one with reclaim */
6333 while (count--) { 6117 while (count--) {
6334 ret = mem_cgroup_try_charge(mc.to, 6118 ret = try_charge(mc.to, GFP_KERNEL & ~__GFP_NORETRY, 1);
6335 GFP_KERNEL & ~__GFP_NORETRY, 1);
6336 /* 6119 /*
6337 * In case of failure, any residual charges against 6120 * In case of failure, any residual charges against
6338 * mc.to will be dropped by mem_cgroup_clear_mc() 6121 * mc.to will be dropped by mem_cgroup_clear_mc()
@@ -6340,7 +6123,7 @@ static int mem_cgroup_do_precharge(unsigned long count)
6340 * bypassed to root right away or they'll be lost. 6123 * bypassed to root right away or they'll be lost.
6341 */ 6124 */
6342 if (ret == -EINTR) 6125 if (ret == -EINTR)
6343 __mem_cgroup_cancel_charge(root_mem_cgroup, 1); 6126 cancel_charge(root_mem_cgroup, 1);
6344 if (ret) 6127 if (ret)
6345 return ret; 6128 return ret;
6346 mc.precharge++; 6129 mc.precharge++;
@@ -6609,7 +6392,7 @@ static void __mem_cgroup_clear_mc(void)
6609 6392
6610 /* we must uncharge all the leftover precharges from mc.to */ 6393 /* we must uncharge all the leftover precharges from mc.to */
6611 if (mc.precharge) { 6394 if (mc.precharge) {
6612 __mem_cgroup_cancel_charge(mc.to, mc.precharge); 6395 cancel_charge(mc.to, mc.precharge);
6613 mc.precharge = 0; 6396 mc.precharge = 0;
6614 } 6397 }
6615 /* 6398 /*
@@ -6617,7 +6400,7 @@ static void __mem_cgroup_clear_mc(void)
6617 * we must uncharge here. 6400 * we must uncharge here.
6618 */ 6401 */
6619 if (mc.moved_charge) { 6402 if (mc.moved_charge) {
6620 __mem_cgroup_cancel_charge(mc.from, mc.moved_charge); 6403 cancel_charge(mc.from, mc.moved_charge);
6621 mc.moved_charge = 0; 6404 mc.moved_charge = 0;
6622 } 6405 }
6623 /* we must fixup refcnts and charges */ 6406 /* we must fixup refcnts and charges */
@@ -6946,6 +6729,150 @@ static void __init enable_swap_cgroup(void)
6946} 6729}
6947#endif 6730#endif
6948 6731
6732/**
6733 * mem_cgroup_try_charge - try charging a page
6734 * @page: page to charge
6735 * @mm: mm context of the victim
6736 * @gfp_mask: reclaim mode
6737 * @memcgp: charged memcg return
6738 *
6739 * Try to charge @page to the memcg that @mm belongs to, reclaiming
6740 * pages according to @gfp_mask if necessary.
6741 *
6742 * Returns 0 on success, with *@memcgp pointing to the charged memcg.
6743 * Otherwise, an error code is returned.
6744 *
6745 * After page->mapping has been set up, the caller must finalize the
6746 * charge with mem_cgroup_commit_charge(). Or abort the transaction
6747 * with mem_cgroup_cancel_charge() in case page instantiation fails.
6748 */
6749int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
6750 gfp_t gfp_mask, struct mem_cgroup **memcgp)
6751{
6752 struct mem_cgroup *memcg = NULL;
6753 unsigned int nr_pages = 1;
6754 int ret = 0;
6755
6756 if (mem_cgroup_disabled())
6757 goto out;
6758
6759 if (PageSwapCache(page)) {
6760 struct page_cgroup *pc = lookup_page_cgroup(page);
6761 /*
6762 * Every swap fault against a single page tries to charge the
6763 * page, bail as early as possible. shmem_unuse() encounters
6764 * already charged pages, too. The USED bit is protected by
6765 * the page lock, which serializes swap cache removal, which
6766 * in turn serializes uncharging.
6767 */
6768 if (PageCgroupUsed(pc))
6769 goto out;
6770 }
6771
6772 if (PageTransHuge(page)) {
6773 nr_pages <<= compound_order(page);
6774 VM_BUG_ON_PAGE(!PageTransHuge(page), page);
6775 }
6776
6777 if (do_swap_account && PageSwapCache(page))
6778 memcg = try_get_mem_cgroup_from_page(page);
6779 if (!memcg)
6780 memcg = get_mem_cgroup_from_mm(mm);
6781
6782 ret = try_charge(memcg, gfp_mask, nr_pages);
6783
6784 css_put(&memcg->css);
6785
6786 if (ret == -EINTR) {
6787 memcg = root_mem_cgroup;
6788 ret = 0;
6789 }
6790out:
6791 *memcgp = memcg;
6792 return ret;
6793}
6794
6795/**
6796 * mem_cgroup_commit_charge - commit a page charge
6797 * @page: page to charge
6798 * @memcg: memcg to charge the page to
6799 * @lrucare: page might be on LRU already
6800 *
6801 * Finalize a charge transaction started by mem_cgroup_try_charge(),
6802 * after page->mapping has been set up. This must happen atomically
6803 * as part of the page instantiation, i.e. under the page table lock
6804 * for anonymous pages, under the page lock for page and swap cache.
6805 *
6806 * In addition, the page must not be on the LRU during the commit, to
6807 * prevent racing with task migration. If it might be, use @lrucare.
6808 *
6809 * Use mem_cgroup_cancel_charge() to cancel the transaction instead.
6810 */
6811void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
6812 bool lrucare)
6813{
6814 unsigned int nr_pages = 1;
6815
6816 VM_BUG_ON_PAGE(!page->mapping, page);
6817 VM_BUG_ON_PAGE(PageLRU(page) && !lrucare, page);
6818
6819 if (mem_cgroup_disabled())
6820 return;
6821 /*
6822 * Swap faults will attempt to charge the same page multiple
6823 * times. But reuse_swap_page() might have removed the page
6824 * from swapcache already, so we can't check PageSwapCache().
6825 */
6826 if (!memcg)
6827 return;
6828
6829 if (PageTransHuge(page)) {
6830 nr_pages <<= compound_order(page);
6831 VM_BUG_ON_PAGE(!PageTransHuge(page), page);
6832 }
6833
6834 commit_charge(page, memcg, nr_pages, PageAnon(page), lrucare);
6835
6836 if (do_swap_account && PageSwapCache(page)) {
6837 swp_entry_t entry = { .val = page_private(page) };
6838 /*
6839 * The swap entry might not get freed for a long time,
6840 * let's not wait for it. The page already received a
6841 * memory+swap charge, drop the swap entry duplicate.
6842 */
6843 mem_cgroup_uncharge_swap(entry);
6844 }
6845}
6846
6847/**
6848 * mem_cgroup_cancel_charge - cancel a page charge
6849 * @page: page to charge
6850 * @memcg: memcg to charge the page to
6851 *
6852 * Cancel a charge transaction started by mem_cgroup_try_charge().
6853 */
6854void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg)
6855{
6856 unsigned int nr_pages = 1;
6857
6858 if (mem_cgroup_disabled())
6859 return;
6860 /*
6861 * Swap faults will attempt to charge the same page multiple
6862 * times. But reuse_swap_page() might have removed the page
6863 * from swapcache already, so we can't check PageSwapCache().
6864 */
6865 if (!memcg)
6866 return;
6867
6868 if (PageTransHuge(page)) {
6869 nr_pages <<= compound_order(page);
6870 VM_BUG_ON_PAGE(!PageTransHuge(page), page);
6871 }
6872
6873 cancel_charge(memcg, nr_pages);
6874}
6875
6949/* 6876/*
6950 * subsys_initcall() for memory controller. 6877 * subsys_initcall() for memory controller.
6951 * 6878 *