aboutsummaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/filemap.c4
-rw-r--r--mm/memcontrol.c828
-rw-r--r--mm/memory.c2
-rw-r--r--mm/migrate.c38
-rw-r--r--mm/rmap.c1
-rw-r--r--mm/shmem.c8
-rw-r--r--mm/swap.c6
-rw-r--r--mm/swap_state.c8
-rw-r--r--mm/swapfile.c7
-rw-r--r--mm/truncate.c9
-rw-r--r--mm/vmscan.c12
-rw-r--r--mm/zswap.c2
12 files changed, 355 insertions, 570 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index 349a40e35545..f501b56ec2c6 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -234,7 +234,6 @@ void delete_from_page_cache(struct page *page)
234 spin_lock_irq(&mapping->tree_lock); 234 spin_lock_irq(&mapping->tree_lock);
235 __delete_from_page_cache(page, NULL); 235 __delete_from_page_cache(page, NULL);
236 spin_unlock_irq(&mapping->tree_lock); 236 spin_unlock_irq(&mapping->tree_lock);
237 mem_cgroup_uncharge_cache_page(page);
238 237
239 if (freepage) 238 if (freepage)
240 freepage(page); 239 freepage(page);
@@ -490,8 +489,7 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
490 if (PageSwapBacked(new)) 489 if (PageSwapBacked(new))
491 __inc_zone_page_state(new, NR_SHMEM); 490 __inc_zone_page_state(new, NR_SHMEM);
492 spin_unlock_irq(&mapping->tree_lock); 491 spin_unlock_irq(&mapping->tree_lock);
493 /* mem_cgroup codes must not be called under tree_lock */ 492 mem_cgroup_migrate(old, new, true);
494 mem_cgroup_replace_page_cache(old, new);
495 radix_tree_preload_end(); 493 radix_tree_preload_end();
496 if (freepage) 494 if (freepage)
497 freepage(old); 495 freepage(old);
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 1cbe1e54ff5f..9106f1b12f56 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -754,9 +754,11 @@ static void __mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz,
754static void mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz, 754static void mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz,
755 struct mem_cgroup_tree_per_zone *mctz) 755 struct mem_cgroup_tree_per_zone *mctz)
756{ 756{
757 spin_lock(&mctz->lock); 757 unsigned long flags;
758
759 spin_lock_irqsave(&mctz->lock, flags);
758 __mem_cgroup_remove_exceeded(mz, mctz); 760 __mem_cgroup_remove_exceeded(mz, mctz);
759 spin_unlock(&mctz->lock); 761 spin_unlock_irqrestore(&mctz->lock, flags);
760} 762}
761 763
762 764
@@ -779,7 +781,9 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
779 * mem is over its softlimit. 781 * mem is over its softlimit.
780 */ 782 */
781 if (excess || mz->on_tree) { 783 if (excess || mz->on_tree) {
782 spin_lock(&mctz->lock); 784 unsigned long flags;
785
786 spin_lock_irqsave(&mctz->lock, flags);
783 /* if on-tree, remove it */ 787 /* if on-tree, remove it */
784 if (mz->on_tree) 788 if (mz->on_tree)
785 __mem_cgroup_remove_exceeded(mz, mctz); 789 __mem_cgroup_remove_exceeded(mz, mctz);
@@ -788,7 +792,7 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
788 * If excess is 0, no tree ops. 792 * If excess is 0, no tree ops.
789 */ 793 */
790 __mem_cgroup_insert_exceeded(mz, mctz, excess); 794 __mem_cgroup_insert_exceeded(mz, mctz, excess);
791 spin_unlock(&mctz->lock); 795 spin_unlock_irqrestore(&mctz->lock, flags);
792 } 796 }
793 } 797 }
794} 798}
@@ -839,9 +843,9 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz)
839{ 843{
840 struct mem_cgroup_per_zone *mz; 844 struct mem_cgroup_per_zone *mz;
841 845
842 spin_lock(&mctz->lock); 846 spin_lock_irq(&mctz->lock);
843 mz = __mem_cgroup_largest_soft_limit_node(mctz); 847 mz = __mem_cgroup_largest_soft_limit_node(mctz);
844 spin_unlock(&mctz->lock); 848 spin_unlock_irq(&mctz->lock);
845 return mz; 849 return mz;
846} 850}
847 851
@@ -882,13 +886,6 @@ static long mem_cgroup_read_stat(struct mem_cgroup *memcg,
882 return val; 886 return val;
883} 887}
884 888
885static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg,
886 bool charge)
887{
888 int val = (charge) ? 1 : -1;
889 this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_SWAP], val);
890}
891
892static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg, 889static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg,
893 enum mem_cgroup_events_index idx) 890 enum mem_cgroup_events_index idx)
894{ 891{
@@ -909,13 +906,13 @@ static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg,
909 906
910static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg, 907static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg,
911 struct page *page, 908 struct page *page,
912 bool anon, int nr_pages) 909 int nr_pages)
913{ 910{
914 /* 911 /*
915 * Here, RSS means 'mapped anon' and anon's SwapCache. Shmem/tmpfs is 912 * Here, RSS means 'mapped anon' and anon's SwapCache. Shmem/tmpfs is
916 * counted as CACHE even if it's on ANON LRU. 913 * counted as CACHE even if it's on ANON LRU.
917 */ 914 */
918 if (anon) 915 if (PageAnon(page))
919 __this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_RSS], 916 __this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_RSS],
920 nr_pages); 917 nr_pages);
921 else 918 else
@@ -1013,7 +1010,6 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg,
1013 */ 1010 */
1014static void memcg_check_events(struct mem_cgroup *memcg, struct page *page) 1011static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
1015{ 1012{
1016 preempt_disable();
1017 /* threshold event is triggered in finer grain than soft limit */ 1013 /* threshold event is triggered in finer grain than soft limit */
1018 if (unlikely(mem_cgroup_event_ratelimit(memcg, 1014 if (unlikely(mem_cgroup_event_ratelimit(memcg,
1019 MEM_CGROUP_TARGET_THRESH))) { 1015 MEM_CGROUP_TARGET_THRESH))) {
@@ -1026,8 +1022,6 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
1026 do_numainfo = mem_cgroup_event_ratelimit(memcg, 1022 do_numainfo = mem_cgroup_event_ratelimit(memcg,
1027 MEM_CGROUP_TARGET_NUMAINFO); 1023 MEM_CGROUP_TARGET_NUMAINFO);
1028#endif 1024#endif
1029 preempt_enable();
1030
1031 mem_cgroup_threshold(memcg); 1025 mem_cgroup_threshold(memcg);
1032 if (unlikely(do_softlimit)) 1026 if (unlikely(do_softlimit))
1033 mem_cgroup_update_tree(memcg, page); 1027 mem_cgroup_update_tree(memcg, page);
@@ -1035,8 +1029,7 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page)
1035 if (unlikely(do_numainfo)) 1029 if (unlikely(do_numainfo))
1036 atomic_inc(&memcg->numainfo_events); 1030 atomic_inc(&memcg->numainfo_events);
1037#endif 1031#endif
1038 } else 1032 }
1039 preempt_enable();
1040} 1033}
1041 1034
1042struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p) 1035struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p)
@@ -1347,20 +1340,6 @@ out:
1347 return lruvec; 1340 return lruvec;
1348} 1341}
1349 1342
1350/*
1351 * Following LRU functions are allowed to be used without PCG_LOCK.
1352 * Operations are called by routine of global LRU independently from memcg.
1353 * What we have to take care of here is validness of pc->mem_cgroup.
1354 *
1355 * Changes to pc->mem_cgroup happens when
1356 * 1. charge
1357 * 2. moving account
1358 * In typical case, "charge" is done before add-to-lru. Exception is SwapCache.
1359 * It is added to LRU before charge.
1360 * If PCG_USED bit is not set, page_cgroup is not added to this private LRU.
1361 * When moving account, the page is not on LRU. It's isolated.
1362 */
1363
1364/** 1343/**
1365 * mem_cgroup_page_lruvec - return lruvec for adding an lru page 1344 * mem_cgroup_page_lruvec - return lruvec for adding an lru page
1366 * @page: the page 1345 * @page: the page
@@ -2261,22 +2240,14 @@ cleanup:
2261 * 2240 *
2262 * Notes: Race condition 2241 * Notes: Race condition
2263 * 2242 *
2264 * We usually use lock_page_cgroup() for accessing page_cgroup member but 2243 * Charging occurs during page instantiation, while the page is
2265 * it tends to be costly. But considering some conditions, we doesn't need 2244 * unmapped and locked in page migration, or while the page table is
2266 * to do so _always_. 2245 * locked in THP migration. No race is possible.
2267 *
2268 * Considering "charge", lock_page_cgroup() is not required because all
2269 * file-stat operations happen after a page is attached to radix-tree. There
2270 * are no race with "charge".
2271 * 2246 *
2272 * Considering "uncharge", we know that memcg doesn't clear pc->mem_cgroup 2247 * Uncharge happens to pages with zero references, no race possible.
2273 * at "uncharge" intentionally. So, we always see valid pc->mem_cgroup even
2274 * if there are race with "uncharge". Statistics itself is properly handled
2275 * by flags.
2276 * 2248 *
2277 * Considering "move", this is an only case we see a race. To make the race 2249 * Charge moving between groups is protected by checking mm->moving
2278 * small, we check memcg->moving_account and detect there are possibility 2250 * account and taking the move_lock in the slowpath.
2279 * of race or not. If there is, we take a lock.
2280 */ 2251 */
2281 2252
2282void __mem_cgroup_begin_update_page_stat(struct page *page, 2253void __mem_cgroup_begin_update_page_stat(struct page *page,
@@ -2689,6 +2660,16 @@ static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
2689 return mem_cgroup_from_id(id); 2660 return mem_cgroup_from_id(id);
2690} 2661}
2691 2662
2663/*
2664 * try_get_mem_cgroup_from_page - look up page's memcg association
2665 * @page: the page
2666 *
2667 * Look up, get a css reference, and return the memcg that owns @page.
2668 *
2669 * The page must be locked to prevent racing with swap-in and page
2670 * cache charges. If coming from an unlocked page table, the caller
2671 * must ensure the page is on the LRU or this can race with charging.
2672 */
2692struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) 2673struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
2693{ 2674{
2694 struct mem_cgroup *memcg = NULL; 2675 struct mem_cgroup *memcg = NULL;
@@ -2699,7 +2680,6 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
2699 VM_BUG_ON_PAGE(!PageLocked(page), page); 2680 VM_BUG_ON_PAGE(!PageLocked(page), page);
2700 2681
2701 pc = lookup_page_cgroup(page); 2682 pc = lookup_page_cgroup(page);
2702 lock_page_cgroup(pc);
2703 if (PageCgroupUsed(pc)) { 2683 if (PageCgroupUsed(pc)) {
2704 memcg = pc->mem_cgroup; 2684 memcg = pc->mem_cgroup;
2705 if (memcg && !css_tryget_online(&memcg->css)) 2685 if (memcg && !css_tryget_online(&memcg->css))
@@ -2713,19 +2693,46 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
2713 memcg = NULL; 2693 memcg = NULL;
2714 rcu_read_unlock(); 2694 rcu_read_unlock();
2715 } 2695 }
2716 unlock_page_cgroup(pc);
2717 return memcg; 2696 return memcg;
2718} 2697}
2719 2698
2699static void lock_page_lru(struct page *page, int *isolated)
2700{
2701 struct zone *zone = page_zone(page);
2702
2703 spin_lock_irq(&zone->lru_lock);
2704 if (PageLRU(page)) {
2705 struct lruvec *lruvec;
2706
2707 lruvec = mem_cgroup_page_lruvec(page, zone);
2708 ClearPageLRU(page);
2709 del_page_from_lru_list(page, lruvec, page_lru(page));
2710 *isolated = 1;
2711 } else
2712 *isolated = 0;
2713}
2714
2715static void unlock_page_lru(struct page *page, int isolated)
2716{
2717 struct zone *zone = page_zone(page);
2718
2719 if (isolated) {
2720 struct lruvec *lruvec;
2721
2722 lruvec = mem_cgroup_page_lruvec(page, zone);
2723 VM_BUG_ON_PAGE(PageLRU(page), page);
2724 SetPageLRU(page);
2725 add_page_to_lru_list(page, lruvec, page_lru(page));
2726 }
2727 spin_unlock_irq(&zone->lru_lock);
2728}
2729
2720static void commit_charge(struct page *page, struct mem_cgroup *memcg, 2730static void commit_charge(struct page *page, struct mem_cgroup *memcg,
2721 unsigned int nr_pages, bool anon, bool lrucare) 2731 unsigned int nr_pages, bool lrucare)
2722{ 2732{
2723 struct page_cgroup *pc = lookup_page_cgroup(page); 2733 struct page_cgroup *pc = lookup_page_cgroup(page);
2724 struct zone *uninitialized_var(zone); 2734 int isolated;
2725 struct lruvec *lruvec;
2726 bool was_on_lru = false;
2727 2735
2728 lock_page_cgroup(pc);
2729 VM_BUG_ON_PAGE(PageCgroupUsed(pc), page); 2736 VM_BUG_ON_PAGE(PageCgroupUsed(pc), page);
2730 /* 2737 /*
2731 * we don't need page_cgroup_lock about tail pages, becase they are not 2738 * we don't need page_cgroup_lock about tail pages, becase they are not
@@ -2736,39 +2743,38 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
2736 * In some cases, SwapCache and FUSE(splice_buf->radixtree), the page 2743 * In some cases, SwapCache and FUSE(splice_buf->radixtree), the page
2737 * may already be on some other mem_cgroup's LRU. Take care of it. 2744 * may already be on some other mem_cgroup's LRU. Take care of it.
2738 */ 2745 */
2739 if (lrucare) { 2746 if (lrucare)
2740 zone = page_zone(page); 2747 lock_page_lru(page, &isolated);
2741 spin_lock_irq(&zone->lru_lock);
2742 if (PageLRU(page)) {
2743 lruvec = mem_cgroup_zone_lruvec(zone, pc->mem_cgroup);
2744 ClearPageLRU(page);
2745 del_page_from_lru_list(page, lruvec, page_lru(page));
2746 was_on_lru = true;
2747 }
2748 }
2749 2748
2749 /*
2750 * Nobody should be changing or seriously looking at
2751 * pc->mem_cgroup and pc->flags at this point:
2752 *
2753 * - the page is uncharged
2754 *
2755 * - the page is off-LRU
2756 *
2757 * - an anonymous fault has exclusive page access, except for
2758 * a locked page table
2759 *
2760 * - a page cache insertion, a swapin fault, or a migration
2761 * have the page locked
2762 */
2750 pc->mem_cgroup = memcg; 2763 pc->mem_cgroup = memcg;
2751 SetPageCgroupUsed(pc); 2764 pc->flags = PCG_USED | PCG_MEM | (do_swap_account ? PCG_MEMSW : 0);
2752
2753 if (lrucare) {
2754 if (was_on_lru) {
2755 lruvec = mem_cgroup_zone_lruvec(zone, pc->mem_cgroup);
2756 VM_BUG_ON_PAGE(PageLRU(page), page);
2757 SetPageLRU(page);
2758 add_page_to_lru_list(page, lruvec, page_lru(page));
2759 }
2760 spin_unlock_irq(&zone->lru_lock);
2761 }
2762 2765
2763 mem_cgroup_charge_statistics(memcg, page, anon, nr_pages); 2766 if (lrucare)
2764 unlock_page_cgroup(pc); 2767 unlock_page_lru(page, isolated);
2765 2768
2769 local_irq_disable();
2770 mem_cgroup_charge_statistics(memcg, page, nr_pages);
2766 /* 2771 /*
2767 * "charge_statistics" updated event counter. Then, check it. 2772 * "charge_statistics" updated event counter. Then, check it.
2768 * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree. 2773 * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree.
2769 * if they exceeds softlimit. 2774 * if they exceeds softlimit.
2770 */ 2775 */
2771 memcg_check_events(memcg, page); 2776 memcg_check_events(memcg, page);
2777 local_irq_enable();
2772} 2778}
2773 2779
2774static DEFINE_MUTEX(set_limit_mutex); 2780static DEFINE_MUTEX(set_limit_mutex);
@@ -3395,7 +3401,6 @@ static inline void memcg_unregister_all_caches(struct mem_cgroup *memcg)
3395 3401
3396#ifdef CONFIG_TRANSPARENT_HUGEPAGE 3402#ifdef CONFIG_TRANSPARENT_HUGEPAGE
3397 3403
3398#define PCGF_NOCOPY_AT_SPLIT (1 << PCG_LOCK | 1 << PCG_MIGRATION)
3399/* 3404/*
3400 * Because tail pages are not marked as "used", set it. We're under 3405 * Because tail pages are not marked as "used", set it. We're under
3401 * zone->lru_lock, 'splitting on pmd' and compound_lock. 3406 * zone->lru_lock, 'splitting on pmd' and compound_lock.
@@ -3416,7 +3421,7 @@ void mem_cgroup_split_huge_fixup(struct page *head)
3416 for (i = 1; i < HPAGE_PMD_NR; i++) { 3421 for (i = 1; i < HPAGE_PMD_NR; i++) {
3417 pc = head_pc + i; 3422 pc = head_pc + i;
3418 pc->mem_cgroup = memcg; 3423 pc->mem_cgroup = memcg;
3419 pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT; 3424 pc->flags = head_pc->flags;
3420 } 3425 }
3421 __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE], 3426 __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE],
3422 HPAGE_PMD_NR); 3427 HPAGE_PMD_NR);
@@ -3446,7 +3451,6 @@ static int mem_cgroup_move_account(struct page *page,
3446{ 3451{
3447 unsigned long flags; 3452 unsigned long flags;
3448 int ret; 3453 int ret;
3449 bool anon = PageAnon(page);
3450 3454
3451 VM_BUG_ON(from == to); 3455 VM_BUG_ON(from == to);
3452 VM_BUG_ON_PAGE(PageLRU(page), page); 3456 VM_BUG_ON_PAGE(PageLRU(page), page);
@@ -3460,15 +3464,21 @@ static int mem_cgroup_move_account(struct page *page,
3460 if (nr_pages > 1 && !PageTransHuge(page)) 3464 if (nr_pages > 1 && !PageTransHuge(page))
3461 goto out; 3465 goto out;
3462 3466
3463 lock_page_cgroup(pc); 3467 /*
3468 * Prevent mem_cgroup_migrate() from looking at pc->mem_cgroup
3469 * of its source page while we change it: page migration takes
3470 * both pages off the LRU, but page cache replacement doesn't.
3471 */
3472 if (!trylock_page(page))
3473 goto out;
3464 3474
3465 ret = -EINVAL; 3475 ret = -EINVAL;
3466 if (!PageCgroupUsed(pc) || pc->mem_cgroup != from) 3476 if (!PageCgroupUsed(pc) || pc->mem_cgroup != from)
3467 goto unlock; 3477 goto out_unlock;
3468 3478
3469 move_lock_mem_cgroup(from, &flags); 3479 move_lock_mem_cgroup(from, &flags);
3470 3480
3471 if (!anon && page_mapped(page)) { 3481 if (!PageAnon(page) && page_mapped(page)) {
3472 __this_cpu_sub(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED], 3482 __this_cpu_sub(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED],
3473 nr_pages); 3483 nr_pages);
3474 __this_cpu_add(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED], 3484 __this_cpu_add(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED],
@@ -3482,20 +3492,25 @@ static int mem_cgroup_move_account(struct page *page,
3482 nr_pages); 3492 nr_pages);
3483 } 3493 }
3484 3494
3485 mem_cgroup_charge_statistics(from, page, anon, -nr_pages); 3495 /*
3496 * It is safe to change pc->mem_cgroup here because the page
3497 * is referenced, charged, and isolated - we can't race with
3498 * uncharging, charging, migration, or LRU putback.
3499 */
3486 3500
3487 /* caller should have done css_get */ 3501 /* caller should have done css_get */
3488 pc->mem_cgroup = to; 3502 pc->mem_cgroup = to;
3489 mem_cgroup_charge_statistics(to, page, anon, nr_pages);
3490 move_unlock_mem_cgroup(from, &flags); 3503 move_unlock_mem_cgroup(from, &flags);
3491 ret = 0; 3504 ret = 0;
3492unlock: 3505
3493 unlock_page_cgroup(pc); 3506 local_irq_disable();
3494 /* 3507 mem_cgroup_charge_statistics(to, page, nr_pages);
3495 * check events
3496 */
3497 memcg_check_events(to, page); 3508 memcg_check_events(to, page);
3509 mem_cgroup_charge_statistics(from, page, -nr_pages);
3498 memcg_check_events(from, page); 3510 memcg_check_events(from, page);
3511 local_irq_enable();
3512out_unlock:
3513 unlock_page(page);
3499out: 3514out:
3500 return ret; 3515 return ret;
3501} 3516}
@@ -3566,193 +3581,6 @@ out:
3566 return ret; 3581 return ret;
3567} 3582}
3568 3583
3569static void mem_cgroup_do_uncharge(struct mem_cgroup *memcg,
3570 unsigned int nr_pages,
3571 const enum charge_type ctype)
3572{
3573 struct memcg_batch_info *batch = NULL;
3574 bool uncharge_memsw = true;
3575
3576 /* If swapout, usage of swap doesn't decrease */
3577 if (!do_swap_account || ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT)
3578 uncharge_memsw = false;
3579
3580 batch = &current->memcg_batch;
3581 /*
3582 * In usual, we do css_get() when we remember memcg pointer.
3583 * But in this case, we keep res->usage until end of a series of
3584 * uncharges. Then, it's ok to ignore memcg's refcnt.
3585 */
3586 if (!batch->memcg)
3587 batch->memcg = memcg;
3588 /*
3589 * do_batch > 0 when unmapping pages or inode invalidate/truncate.
3590 * In those cases, all pages freed continuously can be expected to be in
3591 * the same cgroup and we have chance to coalesce uncharges.
3592 * But we do uncharge one by one if this is killed by OOM(TIF_MEMDIE)
3593 * because we want to do uncharge as soon as possible.
3594 */
3595
3596 if (!batch->do_batch || test_thread_flag(TIF_MEMDIE))
3597 goto direct_uncharge;
3598
3599 if (nr_pages > 1)
3600 goto direct_uncharge;
3601
3602 /*
3603 * In typical case, batch->memcg == mem. This means we can
3604 * merge a series of uncharges to an uncharge of res_counter.
3605 * If not, we uncharge res_counter ony by one.
3606 */
3607 if (batch->memcg != memcg)
3608 goto direct_uncharge;
3609 /* remember freed charge and uncharge it later */
3610 batch->nr_pages++;
3611 if (uncharge_memsw)
3612 batch->memsw_nr_pages++;
3613 return;
3614direct_uncharge:
3615 res_counter_uncharge(&memcg->res, nr_pages * PAGE_SIZE);
3616 if (uncharge_memsw)
3617 res_counter_uncharge(&memcg->memsw, nr_pages * PAGE_SIZE);
3618 if (unlikely(batch->memcg != memcg))
3619 memcg_oom_recover(memcg);
3620}
3621
3622/*
3623 * uncharge if !page_mapped(page)
3624 */
3625static struct mem_cgroup *
3626__mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype,
3627 bool end_migration)
3628{
3629 struct mem_cgroup *memcg = NULL;
3630 unsigned int nr_pages = 1;
3631 struct page_cgroup *pc;
3632 bool anon;
3633
3634 if (mem_cgroup_disabled())
3635 return NULL;
3636
3637 if (PageTransHuge(page)) {
3638 nr_pages <<= compound_order(page);
3639 VM_BUG_ON_PAGE(!PageTransHuge(page), page);
3640 }
3641 /*
3642 * Check if our page_cgroup is valid
3643 */
3644 pc = lookup_page_cgroup(page);
3645 if (unlikely(!PageCgroupUsed(pc)))
3646 return NULL;
3647
3648 lock_page_cgroup(pc);
3649
3650 memcg = pc->mem_cgroup;
3651
3652 if (!PageCgroupUsed(pc))
3653 goto unlock_out;
3654
3655 anon = PageAnon(page);
3656
3657 switch (ctype) {
3658 case MEM_CGROUP_CHARGE_TYPE_ANON:
3659 /*
3660 * Generally PageAnon tells if it's the anon statistics to be
3661 * updated; but sometimes e.g. mem_cgroup_uncharge_page() is
3662 * used before page reached the stage of being marked PageAnon.
3663 */
3664 anon = true;
3665 /* fallthrough */
3666 case MEM_CGROUP_CHARGE_TYPE_DROP:
3667 /* See mem_cgroup_prepare_migration() */
3668 if (page_mapped(page))
3669 goto unlock_out;
3670 /*
3671 * Pages under migration may not be uncharged. But
3672 * end_migration() /must/ be the one uncharging the
3673 * unused post-migration page and so it has to call
3674 * here with the migration bit still set. See the
3675 * res_counter handling below.
3676 */
3677 if (!end_migration && PageCgroupMigration(pc))
3678 goto unlock_out;
3679 break;
3680 case MEM_CGROUP_CHARGE_TYPE_SWAPOUT:
3681 if (!PageAnon(page)) { /* Shared memory */
3682 if (page->mapping && !page_is_file_cache(page))
3683 goto unlock_out;
3684 } else if (page_mapped(page)) /* Anon */
3685 goto unlock_out;
3686 break;
3687 default:
3688 break;
3689 }
3690
3691 mem_cgroup_charge_statistics(memcg, page, anon, -nr_pages);
3692
3693 ClearPageCgroupUsed(pc);
3694 /*
3695 * pc->mem_cgroup is not cleared here. It will be accessed when it's
3696 * freed from LRU. This is safe because uncharged page is expected not
3697 * to be reused (freed soon). Exception is SwapCache, it's handled by
3698 * special functions.
3699 */
3700
3701 unlock_page_cgroup(pc);
3702 /*
3703 * even after unlock, we have memcg->res.usage here and this memcg
3704 * will never be freed, so it's safe to call css_get().
3705 */
3706 memcg_check_events(memcg, page);
3707 if (do_swap_account && ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) {
3708 mem_cgroup_swap_statistics(memcg, true);
3709 css_get(&memcg->css);
3710 }
3711 /*
3712 * Migration does not charge the res_counter for the
3713 * replacement page, so leave it alone when phasing out the
3714 * page that is unused after the migration.
3715 */
3716 if (!end_migration)
3717 mem_cgroup_do_uncharge(memcg, nr_pages, ctype);
3718
3719 return memcg;
3720
3721unlock_out:
3722 unlock_page_cgroup(pc);
3723 return NULL;
3724}
3725
3726void mem_cgroup_uncharge_page(struct page *page)
3727{
3728 /* early check. */
3729 if (page_mapped(page))
3730 return;
3731 VM_BUG_ON_PAGE(page->mapping && !PageAnon(page), page);
3732 /*
3733 * If the page is in swap cache, uncharge should be deferred
3734 * to the swap path, which also properly accounts swap usage
3735 * and handles memcg lifetime.
3736 *
3737 * Note that this check is not stable and reclaim may add the
3738 * page to swap cache at any time after this. However, if the
3739 * page is not in swap cache by the time page->mapcount hits
3740 * 0, there won't be any page table references to the swap
3741 * slot, and reclaim will free it and not actually write the
3742 * page to disk.
3743 */
3744 if (PageSwapCache(page))
3745 return;
3746 __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_ANON, false);
3747}
3748
3749void mem_cgroup_uncharge_cache_page(struct page *page)
3750{
3751 VM_BUG_ON_PAGE(page_mapped(page), page);
3752 VM_BUG_ON_PAGE(page->mapping, page);
3753 __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE, false);
3754}
3755
3756/* 3584/*
3757 * Batch_start/batch_end is called in unmap_page_range/invlidate/trucate. 3585 * Batch_start/batch_end is called in unmap_page_range/invlidate/trucate.
3758 * In that cases, pages are freed continuously and we can expect pages 3586 * In that cases, pages are freed continuously and we can expect pages
@@ -3763,6 +3591,9 @@ void mem_cgroup_uncharge_cache_page(struct page *page)
3763 3591
3764void mem_cgroup_uncharge_start(void) 3592void mem_cgroup_uncharge_start(void)
3765{ 3593{
3594 unsigned long flags;
3595
3596 local_irq_save(flags);
3766 current->memcg_batch.do_batch++; 3597 current->memcg_batch.do_batch++;
3767 /* We can do nest. */ 3598 /* We can do nest. */
3768 if (current->memcg_batch.do_batch == 1) { 3599 if (current->memcg_batch.do_batch == 1) {
@@ -3770,21 +3601,18 @@ void mem_cgroup_uncharge_start(void)
3770 current->memcg_batch.nr_pages = 0; 3601 current->memcg_batch.nr_pages = 0;
3771 current->memcg_batch.memsw_nr_pages = 0; 3602 current->memcg_batch.memsw_nr_pages = 0;
3772 } 3603 }
3604 local_irq_restore(flags);
3773} 3605}
3774 3606
3775void mem_cgroup_uncharge_end(void) 3607void mem_cgroup_uncharge_end(void)
3776{ 3608{
3777 struct memcg_batch_info *batch = &current->memcg_batch; 3609 struct memcg_batch_info *batch = &current->memcg_batch;
3610 unsigned long flags;
3778 3611
3779 if (!batch->do_batch) 3612 local_irq_save(flags);
3780 return; 3613 VM_BUG_ON(!batch->do_batch);
3781 3614 if (--batch->do_batch) /* If stacked, do nothing */
3782 batch->do_batch--; 3615 goto out;
3783 if (batch->do_batch) /* If stacked, do nothing. */
3784 return;
3785
3786 if (!batch->memcg)
3787 return;
3788 /* 3616 /*
3789 * This "batch->memcg" is valid without any css_get/put etc... 3617 * This "batch->memcg" is valid without any css_get/put etc...
3790 * bacause we hide charges behind us. 3618 * bacause we hide charges behind us.
@@ -3796,61 +3624,16 @@ void mem_cgroup_uncharge_end(void)
3796 res_counter_uncharge(&batch->memcg->memsw, 3624 res_counter_uncharge(&batch->memcg->memsw,
3797 batch->memsw_nr_pages * PAGE_SIZE); 3625 batch->memsw_nr_pages * PAGE_SIZE);
3798 memcg_oom_recover(batch->memcg); 3626 memcg_oom_recover(batch->memcg);
3799 /* forget this pointer (for sanity check) */ 3627out:
3800 batch->memcg = NULL; 3628 local_irq_restore(flags);
3801}
3802
3803#ifdef CONFIG_SWAP
3804/*
3805 * called after __delete_from_swap_cache() and drop "page" account.
3806 * memcg information is recorded to swap_cgroup of "ent"
3807 */
3808void
3809mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)
3810{
3811 struct mem_cgroup *memcg;
3812 int ctype = MEM_CGROUP_CHARGE_TYPE_SWAPOUT;
3813
3814 if (!swapout) /* this was a swap cache but the swap is unused ! */
3815 ctype = MEM_CGROUP_CHARGE_TYPE_DROP;
3816
3817 memcg = __mem_cgroup_uncharge_common(page, ctype, false);
3818
3819 /*
3820 * record memcg information, if swapout && memcg != NULL,
3821 * css_get() was called in uncharge().
3822 */
3823 if (do_swap_account && swapout && memcg)
3824 swap_cgroup_record(ent, mem_cgroup_id(memcg));
3825} 3629}
3826#endif
3827 3630
3828#ifdef CONFIG_MEMCG_SWAP 3631#ifdef CONFIG_MEMCG_SWAP
3829/* 3632static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg,
3830 * called from swap_entry_free(). remove record in swap_cgroup and 3633 bool charge)
3831 * uncharge "memsw" account.
3832 */
3833void mem_cgroup_uncharge_swap(swp_entry_t ent)
3834{ 3634{
3835 struct mem_cgroup *memcg; 3635 int val = (charge) ? 1 : -1;
3836 unsigned short id; 3636 this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_SWAP], val);
3837
3838 if (!do_swap_account)
3839 return;
3840
3841 id = swap_cgroup_record(ent, 0);
3842 rcu_read_lock();
3843 memcg = mem_cgroup_lookup(id);
3844 if (memcg) {
3845 /*
3846 * We uncharge this because swap is freed. This memcg can
3847 * be obsolete one. We avoid calling css_tryget_online().
3848 */
3849 res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
3850 mem_cgroup_swap_statistics(memcg, false);
3851 css_put(&memcg->css);
3852 }
3853 rcu_read_unlock();
3854} 3637}
3855 3638
3856/** 3639/**
@@ -3902,169 +3685,6 @@ static inline int mem_cgroup_move_swap_account(swp_entry_t entry,
3902} 3685}
3903#endif 3686#endif
3904 3687
3905/*
3906 * Before starting migration, account PAGE_SIZE to mem_cgroup that the old
3907 * page belongs to.
3908 */
3909void mem_cgroup_prepare_migration(struct page *page, struct page *newpage,
3910 struct mem_cgroup **memcgp)
3911{
3912 struct mem_cgroup *memcg = NULL;
3913 unsigned int nr_pages = 1;
3914 struct page_cgroup *pc;
3915
3916 *memcgp = NULL;
3917
3918 if (mem_cgroup_disabled())
3919 return;
3920
3921 if (PageTransHuge(page))
3922 nr_pages <<= compound_order(page);
3923
3924 pc = lookup_page_cgroup(page);
3925 lock_page_cgroup(pc);
3926 if (PageCgroupUsed(pc)) {
3927 memcg = pc->mem_cgroup;
3928 css_get(&memcg->css);
3929 /*
3930 * At migrating an anonymous page, its mapcount goes down
3931 * to 0 and uncharge() will be called. But, even if it's fully
3932 * unmapped, migration may fail and this page has to be
3933 * charged again. We set MIGRATION flag here and delay uncharge
3934 * until end_migration() is called
3935 *
3936 * Corner Case Thinking
3937 * A)
3938 * When the old page was mapped as Anon and it's unmap-and-freed
3939 * while migration was ongoing.
3940 * If unmap finds the old page, uncharge() of it will be delayed
3941 * until end_migration(). If unmap finds a new page, it's
3942 * uncharged when it make mapcount to be 1->0. If unmap code
3943 * finds swap_migration_entry, the new page will not be mapped
3944 * and end_migration() will find it(mapcount==0).
3945 *
3946 * B)
3947 * When the old page was mapped but migraion fails, the kernel
3948 * remaps it. A charge for it is kept by MIGRATION flag even
3949 * if mapcount goes down to 0. We can do remap successfully
3950 * without charging it again.
3951 *
3952 * C)
3953 * The "old" page is under lock_page() until the end of
3954 * migration, so, the old page itself will not be swapped-out.
3955 * If the new page is swapped out before end_migraton, our
3956 * hook to usual swap-out path will catch the event.
3957 */
3958 if (PageAnon(page))
3959 SetPageCgroupMigration(pc);
3960 }
3961 unlock_page_cgroup(pc);
3962 /*
3963 * If the page is not charged at this point,
3964 * we return here.
3965 */
3966 if (!memcg)
3967 return;
3968
3969 *memcgp = memcg;
3970 /*
3971 * We charge new page before it's used/mapped. So, even if unlock_page()
3972 * is called before end_migration, we can catch all events on this new
3973 * page. In the case new page is migrated but not remapped, new page's
3974 * mapcount will be finally 0 and we call uncharge in end_migration().
3975 */
3976 /*
3977 * The page is committed to the memcg, but it's not actually
3978 * charged to the res_counter since we plan on replacing the
3979 * old one and only one page is going to be left afterwards.
3980 */
3981 commit_charge(newpage, memcg, nr_pages, PageAnon(page), false);
3982}
3983
3984/* remove redundant charge if migration failed*/
3985void mem_cgroup_end_migration(struct mem_cgroup *memcg,
3986 struct page *oldpage, struct page *newpage, bool migration_ok)
3987{
3988 struct page *used, *unused;
3989 struct page_cgroup *pc;
3990 bool anon;
3991
3992 if (!memcg)
3993 return;
3994
3995 if (!migration_ok) {
3996 used = oldpage;
3997 unused = newpage;
3998 } else {
3999 used = newpage;
4000 unused = oldpage;
4001 }
4002 anon = PageAnon(used);
4003 __mem_cgroup_uncharge_common(unused,
4004 anon ? MEM_CGROUP_CHARGE_TYPE_ANON
4005 : MEM_CGROUP_CHARGE_TYPE_CACHE,
4006 true);
4007 css_put(&memcg->css);
4008 /*
4009 * We disallowed uncharge of pages under migration because mapcount
4010 * of the page goes down to zero, temporarly.
4011 * Clear the flag and check the page should be charged.
4012 */
4013 pc = lookup_page_cgroup(oldpage);
4014 lock_page_cgroup(pc);
4015 ClearPageCgroupMigration(pc);
4016 unlock_page_cgroup(pc);
4017
4018 /*
4019 * If a page is a file cache, radix-tree replacement is very atomic
4020 * and we can skip this check. When it was an Anon page, its mapcount
4021 * goes down to 0. But because we added MIGRATION flage, it's not
4022 * uncharged yet. There are several case but page->mapcount check
4023 * and USED bit check in mem_cgroup_uncharge_page() will do enough
4024 * check. (see prepare_charge() also)
4025 */
4026 if (anon)
4027 mem_cgroup_uncharge_page(used);
4028}
4029
4030/*
4031 * At replace page cache, newpage is not under any memcg but it's on
4032 * LRU. So, this function doesn't touch res_counter but handles LRU
4033 * in correct way. Both pages are locked so we cannot race with uncharge.
4034 */
4035void mem_cgroup_replace_page_cache(struct page *oldpage,
4036 struct page *newpage)
4037{
4038 struct mem_cgroup *memcg = NULL;
4039 struct page_cgroup *pc;
4040
4041 if (mem_cgroup_disabled())
4042 return;
4043
4044 pc = lookup_page_cgroup(oldpage);
4045 /* fix accounting on old pages */
4046 lock_page_cgroup(pc);
4047 if (PageCgroupUsed(pc)) {
4048 memcg = pc->mem_cgroup;
4049 mem_cgroup_charge_statistics(memcg, oldpage, false, -1);
4050 ClearPageCgroupUsed(pc);
4051 }
4052 unlock_page_cgroup(pc);
4053
4054 /*
4055 * When called from shmem_replace_page(), in some cases the
4056 * oldpage has already been charged, and in some cases not.
4057 */
4058 if (!memcg)
4059 return;
4060 /*
4061 * Even if newpage->mapping was NULL before starting replacement,
4062 * the newpage may be on LRU(or pagevec for LRU) already. We lock
4063 * LRU while we overwrite pc->mem_cgroup.
4064 */
4065 commit_charge(newpage, memcg, 1, false, true);
4066}
4067
4068#ifdef CONFIG_DEBUG_VM 3688#ifdef CONFIG_DEBUG_VM
4069static struct page_cgroup *lookup_page_cgroup_used(struct page *page) 3689static struct page_cgroup *lookup_page_cgroup_used(struct page *page)
4070{ 3690{
@@ -4263,7 +3883,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
4263 gfp_mask, &nr_scanned); 3883 gfp_mask, &nr_scanned);
4264 nr_reclaimed += reclaimed; 3884 nr_reclaimed += reclaimed;
4265 *total_scanned += nr_scanned; 3885 *total_scanned += nr_scanned;
4266 spin_lock(&mctz->lock); 3886 spin_lock_irq(&mctz->lock);
4267 3887
4268 /* 3888 /*
4269 * If we failed to reclaim anything from this memory cgroup 3889 * If we failed to reclaim anything from this memory cgroup
@@ -4303,7 +3923,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
4303 */ 3923 */
4304 /* If excess == 0, no tree ops */ 3924 /* If excess == 0, no tree ops */
4305 __mem_cgroup_insert_exceeded(mz, mctz, excess); 3925 __mem_cgroup_insert_exceeded(mz, mctz, excess);
4306 spin_unlock(&mctz->lock); 3926 spin_unlock_irq(&mctz->lock);
4307 css_put(&mz->memcg->css); 3927 css_put(&mz->memcg->css);
4308 loop++; 3928 loop++;
4309 /* 3929 /*
@@ -6265,9 +5885,9 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,
6265 if (page) { 5885 if (page) {
6266 pc = lookup_page_cgroup(page); 5886 pc = lookup_page_cgroup(page);
6267 /* 5887 /*
6268 * Do only loose check w/o page_cgroup lock. 5888 * Do only loose check w/o serialization.
6269 * mem_cgroup_move_account() checks the pc is valid or not under 5889 * mem_cgroup_move_account() checks the pc is valid or
6270 * the lock. 5890 * not under LRU exclusion.
6271 */ 5891 */
6272 if (PageCgroupUsed(pc) && pc->mem_cgroup == mc.from) { 5892 if (PageCgroupUsed(pc) && pc->mem_cgroup == mc.from) {
6273 ret = MC_TARGET_PAGE; 5893 ret = MC_TARGET_PAGE;
@@ -6729,6 +6349,67 @@ static void __init enable_swap_cgroup(void)
6729} 6349}
6730#endif 6350#endif
6731 6351
6352#ifdef CONFIG_MEMCG_SWAP
6353/**
6354 * mem_cgroup_swapout - transfer a memsw charge to swap
6355 * @page: page whose memsw charge to transfer
6356 * @entry: swap entry to move the charge to
6357 *
6358 * Transfer the memsw charge of @page to @entry.
6359 */
6360void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
6361{
6362 struct page_cgroup *pc;
6363 unsigned short oldid;
6364
6365 VM_BUG_ON_PAGE(PageLRU(page), page);
6366 VM_BUG_ON_PAGE(page_count(page), page);
6367
6368 if (!do_swap_account)
6369 return;
6370
6371 pc = lookup_page_cgroup(page);
6372
6373 /* Readahead page, never charged */
6374 if (!PageCgroupUsed(pc))
6375 return;
6376
6377 VM_BUG_ON_PAGE(!(pc->flags & PCG_MEMSW), page);
6378
6379 oldid = swap_cgroup_record(entry, mem_cgroup_id(pc->mem_cgroup));
6380 VM_BUG_ON_PAGE(oldid, page);
6381
6382 pc->flags &= ~PCG_MEMSW;
6383 css_get(&pc->mem_cgroup->css);
6384 mem_cgroup_swap_statistics(pc->mem_cgroup, true);
6385}
6386
6387/**
6388 * mem_cgroup_uncharge_swap - uncharge a swap entry
6389 * @entry: swap entry to uncharge
6390 *
6391 * Drop the memsw charge associated with @entry.
6392 */
6393void mem_cgroup_uncharge_swap(swp_entry_t entry)
6394{
6395 struct mem_cgroup *memcg;
6396 unsigned short id;
6397
6398 if (!do_swap_account)
6399 return;
6400
6401 id = swap_cgroup_record(entry, 0);
6402 rcu_read_lock();
6403 memcg = mem_cgroup_lookup(id);
6404 if (memcg) {
6405 res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
6406 mem_cgroup_swap_statistics(memcg, false);
6407 css_put(&memcg->css);
6408 }
6409 rcu_read_unlock();
6410}
6411#endif
6412
6732/** 6413/**
6733 * mem_cgroup_try_charge - try charging a page 6414 * mem_cgroup_try_charge - try charging a page
6734 * @page: page to charge 6415 * @page: page to charge
@@ -6831,7 +6512,7 @@ void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg,
6831 VM_BUG_ON_PAGE(!PageTransHuge(page), page); 6512 VM_BUG_ON_PAGE(!PageTransHuge(page), page);
6832 } 6513 }
6833 6514
6834 commit_charge(page, memcg, nr_pages, PageAnon(page), lrucare); 6515 commit_charge(page, memcg, nr_pages, lrucare);
6835 6516
6836 if (do_swap_account && PageSwapCache(page)) { 6517 if (do_swap_account && PageSwapCache(page)) {
6837 swp_entry_t entry = { .val = page_private(page) }; 6518 swp_entry_t entry = { .val = page_private(page) };
@@ -6873,6 +6554,139 @@ void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg)
6873 cancel_charge(memcg, nr_pages); 6554 cancel_charge(memcg, nr_pages);
6874} 6555}
6875 6556
6557/**
6558 * mem_cgroup_uncharge - uncharge a page
6559 * @page: page to uncharge
6560 *
6561 * Uncharge a page previously charged with mem_cgroup_try_charge() and
6562 * mem_cgroup_commit_charge().
6563 */
6564void mem_cgroup_uncharge(struct page *page)
6565{
6566 struct memcg_batch_info *batch;
6567 unsigned int nr_pages = 1;
6568 struct mem_cgroup *memcg;
6569 struct page_cgroup *pc;
6570 unsigned long pc_flags;
6571 unsigned long flags;
6572
6573 VM_BUG_ON_PAGE(PageLRU(page), page);
6574 VM_BUG_ON_PAGE(page_count(page), page);
6575
6576 if (mem_cgroup_disabled())
6577 return;
6578
6579 pc = lookup_page_cgroup(page);
6580
6581 /* Every final put_page() ends up here */
6582 if (!PageCgroupUsed(pc))
6583 return;
6584
6585 if (PageTransHuge(page)) {
6586 nr_pages <<= compound_order(page);
6587 VM_BUG_ON_PAGE(!PageTransHuge(page), page);
6588 }
6589 /*
6590 * Nobody should be changing or seriously looking at
6591 * pc->mem_cgroup and pc->flags at this point, we have fully
6592 * exclusive access to the page.
6593 */
6594 memcg = pc->mem_cgroup;
6595 pc_flags = pc->flags;
6596 pc->flags = 0;
6597
6598 local_irq_save(flags);
6599
6600 if (nr_pages > 1)
6601 goto direct;
6602 if (unlikely(test_thread_flag(TIF_MEMDIE)))
6603 goto direct;
6604 batch = &current->memcg_batch;
6605 if (!batch->do_batch)
6606 goto direct;
6607 if (batch->memcg && batch->memcg != memcg)
6608 goto direct;
6609 if (!batch->memcg)
6610 batch->memcg = memcg;
6611 if (pc_flags & PCG_MEM)
6612 batch->nr_pages++;
6613 if (pc_flags & PCG_MEMSW)
6614 batch->memsw_nr_pages++;
6615 goto out;
6616direct:
6617 if (pc_flags & PCG_MEM)
6618 res_counter_uncharge(&memcg->res, nr_pages * PAGE_SIZE);
6619 if (pc_flags & PCG_MEMSW)
6620 res_counter_uncharge(&memcg->memsw, nr_pages * PAGE_SIZE);
6621 memcg_oom_recover(memcg);
6622out:
6623 mem_cgroup_charge_statistics(memcg, page, -nr_pages);
6624 memcg_check_events(memcg, page);
6625
6626 local_irq_restore(flags);
6627}
6628
6629/**
6630 * mem_cgroup_migrate - migrate a charge to another page
6631 * @oldpage: currently charged page
6632 * @newpage: page to transfer the charge to
6633 * @lrucare: both pages might be on the LRU already
6634 *
6635 * Migrate the charge from @oldpage to @newpage.
6636 *
6637 * Both pages must be locked, @newpage->mapping must be set up.
6638 */
6639void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
6640 bool lrucare)
6641{
6642 unsigned int nr_pages = 1;
6643 struct page_cgroup *pc;
6644 int isolated;
6645
6646 VM_BUG_ON_PAGE(!PageLocked(oldpage), oldpage);
6647 VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
6648 VM_BUG_ON_PAGE(!lrucare && PageLRU(oldpage), oldpage);
6649 VM_BUG_ON_PAGE(!lrucare && PageLRU(newpage), newpage);
6650 VM_BUG_ON_PAGE(PageAnon(oldpage) != PageAnon(newpage), newpage);
6651
6652 if (mem_cgroup_disabled())
6653 return;
6654
6655 /* Page cache replacement: new page already charged? */
6656 pc = lookup_page_cgroup(newpage);
6657 if (PageCgroupUsed(pc))
6658 return;
6659
6660 /* Re-entrant migration: old page already uncharged? */
6661 pc = lookup_page_cgroup(oldpage);
6662 if (!PageCgroupUsed(pc))
6663 return;
6664
6665 VM_BUG_ON_PAGE(!(pc->flags & PCG_MEM), oldpage);
6666 VM_BUG_ON_PAGE(do_swap_account && !(pc->flags & PCG_MEMSW), oldpage);
6667
6668 if (PageTransHuge(oldpage)) {
6669 nr_pages <<= compound_order(oldpage);
6670 VM_BUG_ON_PAGE(!PageTransHuge(oldpage), oldpage);
6671 VM_BUG_ON_PAGE(!PageTransHuge(newpage), newpage);
6672 }
6673
6674 if (lrucare)
6675 lock_page_lru(oldpage, &isolated);
6676
6677 pc->flags = 0;
6678
6679 if (lrucare)
6680 unlock_page_lru(oldpage, isolated);
6681
6682 local_irq_disable();
6683 mem_cgroup_charge_statistics(pc->mem_cgroup, oldpage, -nr_pages);
6684 memcg_check_events(pc->mem_cgroup, oldpage);
6685 local_irq_enable();
6686
6687 commit_charge(newpage, pc->mem_cgroup, nr_pages, lrucare);
6688}
6689
6876/* 6690/*
6877 * subsys_initcall() for memory controller. 6691 * subsys_initcall() for memory controller.
6878 * 6692 *
diff --git a/mm/memory.c b/mm/memory.c
index 6d7648773dc4..2a899e4e82ba 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1292,7 +1292,6 @@ static void unmap_page_range(struct mmu_gather *tlb,
1292 details = NULL; 1292 details = NULL;
1293 1293
1294 BUG_ON(addr >= end); 1294 BUG_ON(addr >= end);
1295 mem_cgroup_uncharge_start();
1296 tlb_start_vma(tlb, vma); 1295 tlb_start_vma(tlb, vma);
1297 pgd = pgd_offset(vma->vm_mm, addr); 1296 pgd = pgd_offset(vma->vm_mm, addr);
1298 do { 1297 do {
@@ -1302,7 +1301,6 @@ static void unmap_page_range(struct mmu_gather *tlb,
1302 next = zap_pud_range(tlb, vma, pgd, addr, next, details); 1301 next = zap_pud_range(tlb, vma, pgd, addr, next, details);
1303 } while (pgd++, addr = next, addr != end); 1302 } while (pgd++, addr = next, addr != end);
1304 tlb_end_vma(tlb, vma); 1303 tlb_end_vma(tlb, vma);
1305 mem_cgroup_uncharge_end();
1306} 1304}
1307 1305
1308 1306
diff --git a/mm/migrate.c b/mm/migrate.c
index be6dbf995c0c..f78ec9bd454d 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -780,6 +780,7 @@ static int move_to_new_page(struct page *newpage, struct page *page,
780 if (rc != MIGRATEPAGE_SUCCESS) { 780 if (rc != MIGRATEPAGE_SUCCESS) {
781 newpage->mapping = NULL; 781 newpage->mapping = NULL;
782 } else { 782 } else {
783 mem_cgroup_migrate(page, newpage, false);
783 if (remap_swapcache) 784 if (remap_swapcache)
784 remove_migration_ptes(page, newpage); 785 remove_migration_ptes(page, newpage);
785 page->mapping = NULL; 786 page->mapping = NULL;
@@ -795,7 +796,6 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
795{ 796{
796 int rc = -EAGAIN; 797 int rc = -EAGAIN;
797 int remap_swapcache = 1; 798 int remap_swapcache = 1;
798 struct mem_cgroup *mem;
799 struct anon_vma *anon_vma = NULL; 799 struct anon_vma *anon_vma = NULL;
800 800
801 if (!trylock_page(page)) { 801 if (!trylock_page(page)) {
@@ -821,9 +821,6 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
821 lock_page(page); 821 lock_page(page);
822 } 822 }
823 823
824 /* charge against new page */
825 mem_cgroup_prepare_migration(page, newpage, &mem);
826
827 if (PageWriteback(page)) { 824 if (PageWriteback(page)) {
828 /* 825 /*
829 * Only in the case of a full synchronous migration is it 826 * Only in the case of a full synchronous migration is it
@@ -833,10 +830,10 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
833 */ 830 */
834 if (mode != MIGRATE_SYNC) { 831 if (mode != MIGRATE_SYNC) {
835 rc = -EBUSY; 832 rc = -EBUSY;
836 goto uncharge; 833 goto out_unlock;
837 } 834 }
838 if (!force) 835 if (!force)
839 goto uncharge; 836 goto out_unlock;
840 wait_on_page_writeback(page); 837 wait_on_page_writeback(page);
841 } 838 }
842 /* 839 /*
@@ -872,7 +869,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
872 */ 869 */
873 remap_swapcache = 0; 870 remap_swapcache = 0;
874 } else { 871 } else {
875 goto uncharge; 872 goto out_unlock;
876 } 873 }
877 } 874 }
878 875
@@ -885,7 +882,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
885 * the page migration right away (proteced by page lock). 882 * the page migration right away (proteced by page lock).
886 */ 883 */
887 rc = balloon_page_migrate(newpage, page, mode); 884 rc = balloon_page_migrate(newpage, page, mode);
888 goto uncharge; 885 goto out_unlock;
889 } 886 }
890 887
891 /* 888 /*
@@ -904,7 +901,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
904 VM_BUG_ON_PAGE(PageAnon(page), page); 901 VM_BUG_ON_PAGE(PageAnon(page), page);
905 if (page_has_private(page)) { 902 if (page_has_private(page)) {
906 try_to_free_buffers(page); 903 try_to_free_buffers(page);
907 goto uncharge; 904 goto out_unlock;
908 } 905 }
909 goto skip_unmap; 906 goto skip_unmap;
910 } 907 }
@@ -923,10 +920,7 @@ skip_unmap:
923 if (anon_vma) 920 if (anon_vma)
924 put_anon_vma(anon_vma); 921 put_anon_vma(anon_vma);
925 922
926uncharge: 923out_unlock:
927 mem_cgroup_end_migration(mem, page, newpage,
928 (rc == MIGRATEPAGE_SUCCESS ||
929 rc == MIGRATEPAGE_BALLOON_SUCCESS));
930 unlock_page(page); 924 unlock_page(page);
931out: 925out:
932 return rc; 926 return rc;
@@ -1786,7 +1780,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,
1786 pg_data_t *pgdat = NODE_DATA(node); 1780 pg_data_t *pgdat = NODE_DATA(node);
1787 int isolated = 0; 1781 int isolated = 0;
1788 struct page *new_page = NULL; 1782 struct page *new_page = NULL;
1789 struct mem_cgroup *memcg = NULL;
1790 int page_lru = page_is_file_cache(page); 1783 int page_lru = page_is_file_cache(page);
1791 unsigned long mmun_start = address & HPAGE_PMD_MASK; 1784 unsigned long mmun_start = address & HPAGE_PMD_MASK;
1792 unsigned long mmun_end = mmun_start + HPAGE_PMD_SIZE; 1785 unsigned long mmun_end = mmun_start + HPAGE_PMD_SIZE;
@@ -1852,15 +1845,6 @@ fail_putback:
1852 goto out_unlock; 1845 goto out_unlock;
1853 } 1846 }
1854 1847
1855 /*
1856 * Traditional migration needs to prepare the memcg charge
1857 * transaction early to prevent the old page from being
1858 * uncharged when installing migration entries. Here we can
1859 * save the potential rollback and start the charge transfer
1860 * only when migration is already known to end successfully.
1861 */
1862 mem_cgroup_prepare_migration(page, new_page, &memcg);
1863
1864 orig_entry = *pmd; 1848 orig_entry = *pmd;
1865 entry = mk_pmd(new_page, vma->vm_page_prot); 1849 entry = mk_pmd(new_page, vma->vm_page_prot);
1866 entry = pmd_mkhuge(entry); 1850 entry = pmd_mkhuge(entry);
@@ -1888,14 +1872,10 @@ fail_putback:
1888 goto fail_putback; 1872 goto fail_putback;
1889 } 1873 }
1890 1874
1875 mem_cgroup_migrate(page, new_page, false);
1876
1891 page_remove_rmap(page); 1877 page_remove_rmap(page);
1892 1878
1893 /*
1894 * Finish the charge transaction under the page table lock to
1895 * prevent split_huge_page() from dividing up the charge
1896 * before it's fully transferred to the new page.
1897 */
1898 mem_cgroup_end_migration(memcg, page, new_page, true);
1899 spin_unlock(ptl); 1879 spin_unlock(ptl);
1900 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); 1880 mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);
1901 1881
diff --git a/mm/rmap.c b/mm/rmap.c
index f56b5ed78128..3e8491c504f8 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1089,7 +1089,6 @@ void page_remove_rmap(struct page *page)
1089 if (unlikely(PageHuge(page))) 1089 if (unlikely(PageHuge(page)))
1090 goto out; 1090 goto out;
1091 if (anon) { 1091 if (anon) {
1092 mem_cgroup_uncharge_page(page);
1093 if (PageTransHuge(page)) 1092 if (PageTransHuge(page))
1094 __dec_zone_page_state(page, 1093 __dec_zone_page_state(page,
1095 NR_ANON_TRANSPARENT_HUGEPAGES); 1094 NR_ANON_TRANSPARENT_HUGEPAGES);
diff --git a/mm/shmem.c b/mm/shmem.c
index 1f1a8085538b..6dc80d298f9d 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -419,7 +419,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
419 pvec.pages, indices); 419 pvec.pages, indices);
420 if (!pvec.nr) 420 if (!pvec.nr)
421 break; 421 break;
422 mem_cgroup_uncharge_start();
423 for (i = 0; i < pagevec_count(&pvec); i++) { 422 for (i = 0; i < pagevec_count(&pvec); i++) {
424 struct page *page = pvec.pages[i]; 423 struct page *page = pvec.pages[i];
425 424
@@ -447,7 +446,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
447 } 446 }
448 pagevec_remove_exceptionals(&pvec); 447 pagevec_remove_exceptionals(&pvec);
449 pagevec_release(&pvec); 448 pagevec_release(&pvec);
450 mem_cgroup_uncharge_end();
451 cond_resched(); 449 cond_resched();
452 index++; 450 index++;
453 } 451 }
@@ -495,7 +493,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
495 index = start; 493 index = start;
496 continue; 494 continue;
497 } 495 }
498 mem_cgroup_uncharge_start();
499 for (i = 0; i < pagevec_count(&pvec); i++) { 496 for (i = 0; i < pagevec_count(&pvec); i++) {
500 struct page *page = pvec.pages[i]; 497 struct page *page = pvec.pages[i];
501 498
@@ -531,7 +528,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
531 } 528 }
532 pagevec_remove_exceptionals(&pvec); 529 pagevec_remove_exceptionals(&pvec);
533 pagevec_release(&pvec); 530 pagevec_release(&pvec);
534 mem_cgroup_uncharge_end();
535 index++; 531 index++;
536 } 532 }
537 533
@@ -835,7 +831,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
835 } 831 }
836 832
837 mutex_unlock(&shmem_swaplist_mutex); 833 mutex_unlock(&shmem_swaplist_mutex);
838 swapcache_free(swap, NULL); 834 swapcache_free(swap);
839redirty: 835redirty:
840 set_page_dirty(page); 836 set_page_dirty(page);
841 if (wbc->for_reclaim) 837 if (wbc->for_reclaim)
@@ -1008,7 +1004,7 @@ static int shmem_replace_page(struct page **pagep, gfp_t gfp,
1008 */ 1004 */
1009 oldpage = newpage; 1005 oldpage = newpage;
1010 } else { 1006 } else {
1011 mem_cgroup_replace_page_cache(oldpage, newpage); 1007 mem_cgroup_migrate(oldpage, newpage, false);
1012 lru_cache_add_anon(newpage); 1008 lru_cache_add_anon(newpage);
1013 *pagep = newpage; 1009 *pagep = newpage;
1014 } 1010 }
diff --git a/mm/swap.c b/mm/swap.c
index 3baca701bb78..00523fffa5ed 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -62,6 +62,7 @@ static void __page_cache_release(struct page *page)
62 del_page_from_lru_list(page, lruvec, page_off_lru(page)); 62 del_page_from_lru_list(page, lruvec, page_off_lru(page));
63 spin_unlock_irqrestore(&zone->lru_lock, flags); 63 spin_unlock_irqrestore(&zone->lru_lock, flags);
64 } 64 }
65 mem_cgroup_uncharge(page);
65} 66}
66 67
67static void __put_single_page(struct page *page) 68static void __put_single_page(struct page *page)
@@ -907,6 +908,8 @@ void release_pages(struct page **pages, int nr, bool cold)
907 struct lruvec *lruvec; 908 struct lruvec *lruvec;
908 unsigned long uninitialized_var(flags); 909 unsigned long uninitialized_var(flags);
909 910
911 mem_cgroup_uncharge_start();
912
910 for (i = 0; i < nr; i++) { 913 for (i = 0; i < nr; i++) {
911 struct page *page = pages[i]; 914 struct page *page = pages[i];
912 915
@@ -938,6 +941,7 @@ void release_pages(struct page **pages, int nr, bool cold)
938 __ClearPageLRU(page); 941 __ClearPageLRU(page);
939 del_page_from_lru_list(page, lruvec, page_off_lru(page)); 942 del_page_from_lru_list(page, lruvec, page_off_lru(page));
940 } 943 }
944 mem_cgroup_uncharge(page);
941 945
942 /* Clear Active bit in case of parallel mark_page_accessed */ 946 /* Clear Active bit in case of parallel mark_page_accessed */
943 __ClearPageActive(page); 947 __ClearPageActive(page);
@@ -947,6 +951,8 @@ void release_pages(struct page **pages, int nr, bool cold)
947 if (zone) 951 if (zone)
948 spin_unlock_irqrestore(&zone->lru_lock, flags); 952 spin_unlock_irqrestore(&zone->lru_lock, flags);
949 953
954 mem_cgroup_uncharge_end();
955
950 free_hot_cold_page_list(&pages_to_free, cold); 956 free_hot_cold_page_list(&pages_to_free, cold);
951} 957}
952EXPORT_SYMBOL(release_pages); 958EXPORT_SYMBOL(release_pages);
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 2972eee184a4..e160151da6b8 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -176,7 +176,7 @@ int add_to_swap(struct page *page, struct list_head *list)
176 176
177 if (unlikely(PageTransHuge(page))) 177 if (unlikely(PageTransHuge(page)))
178 if (unlikely(split_huge_page_to_list(page, list))) { 178 if (unlikely(split_huge_page_to_list(page, list))) {
179 swapcache_free(entry, NULL); 179 swapcache_free(entry);
180 return 0; 180 return 0;
181 } 181 }
182 182
@@ -202,7 +202,7 @@ int add_to_swap(struct page *page, struct list_head *list)
202 * add_to_swap_cache() doesn't return -EEXIST, so we can safely 202 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
203 * clear SWAP_HAS_CACHE flag. 203 * clear SWAP_HAS_CACHE flag.
204 */ 204 */
205 swapcache_free(entry, NULL); 205 swapcache_free(entry);
206 return 0; 206 return 0;
207 } 207 }
208} 208}
@@ -225,7 +225,7 @@ void delete_from_swap_cache(struct page *page)
225 __delete_from_swap_cache(page); 225 __delete_from_swap_cache(page);
226 spin_unlock_irq(&address_space->tree_lock); 226 spin_unlock_irq(&address_space->tree_lock);
227 227
228 swapcache_free(entry, page); 228 swapcache_free(entry);
229 page_cache_release(page); 229 page_cache_release(page);
230} 230}
231 231
@@ -386,7 +386,7 @@ struct page *read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
386 * add_to_swap_cache() doesn't return -EEXIST, so we can safely 386 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
387 * clear SWAP_HAS_CACHE flag. 387 * clear SWAP_HAS_CACHE flag.
388 */ 388 */
389 swapcache_free(entry, NULL); 389 swapcache_free(entry);
390 } while (err != -ENOMEM); 390 } while (err != -ENOMEM);
391 391
392 if (new_page) 392 if (new_page)
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 0883b4912ff7..8798b2e0ac59 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -843,16 +843,13 @@ void swap_free(swp_entry_t entry)
843/* 843/*
844 * Called after dropping swapcache to decrease refcnt to swap entries. 844 * Called after dropping swapcache to decrease refcnt to swap entries.
845 */ 845 */
846void swapcache_free(swp_entry_t entry, struct page *page) 846void swapcache_free(swp_entry_t entry)
847{ 847{
848 struct swap_info_struct *p; 848 struct swap_info_struct *p;
849 unsigned char count;
850 849
851 p = swap_info_get(entry); 850 p = swap_info_get(entry);
852 if (p) { 851 if (p) {
853 count = swap_entry_free(p, entry, SWAP_HAS_CACHE); 852 swap_entry_free(p, entry, SWAP_HAS_CACHE);
854 if (page)
855 mem_cgroup_uncharge_swapcache(page, entry, count != 0);
856 spin_unlock(&p->lock); 853 spin_unlock(&p->lock);
857 } 854 }
858} 855}
diff --git a/mm/truncate.c b/mm/truncate.c
index eda247307164..96d167372d89 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -281,7 +281,6 @@ void truncate_inode_pages_range(struct address_space *mapping,
281 while (index < end && pagevec_lookup_entries(&pvec, mapping, index, 281 while (index < end && pagevec_lookup_entries(&pvec, mapping, index,
282 min(end - index, (pgoff_t)PAGEVEC_SIZE), 282 min(end - index, (pgoff_t)PAGEVEC_SIZE),
283 indices)) { 283 indices)) {
284 mem_cgroup_uncharge_start();
285 for (i = 0; i < pagevec_count(&pvec); i++) { 284 for (i = 0; i < pagevec_count(&pvec); i++) {
286 struct page *page = pvec.pages[i]; 285 struct page *page = pvec.pages[i];
287 286
@@ -307,7 +306,6 @@ void truncate_inode_pages_range(struct address_space *mapping,
307 } 306 }
308 pagevec_remove_exceptionals(&pvec); 307 pagevec_remove_exceptionals(&pvec);
309 pagevec_release(&pvec); 308 pagevec_release(&pvec);
310 mem_cgroup_uncharge_end();
311 cond_resched(); 309 cond_resched();
312 index++; 310 index++;
313 } 311 }
@@ -369,7 +367,6 @@ void truncate_inode_pages_range(struct address_space *mapping,
369 pagevec_release(&pvec); 367 pagevec_release(&pvec);
370 break; 368 break;
371 } 369 }
372 mem_cgroup_uncharge_start();
373 for (i = 0; i < pagevec_count(&pvec); i++) { 370 for (i = 0; i < pagevec_count(&pvec); i++) {
374 struct page *page = pvec.pages[i]; 371 struct page *page = pvec.pages[i];
375 372
@@ -394,7 +391,6 @@ void truncate_inode_pages_range(struct address_space *mapping,
394 } 391 }
395 pagevec_remove_exceptionals(&pvec); 392 pagevec_remove_exceptionals(&pvec);
396 pagevec_release(&pvec); 393 pagevec_release(&pvec);
397 mem_cgroup_uncharge_end();
398 index++; 394 index++;
399 } 395 }
400 cleancache_invalidate_inode(mapping); 396 cleancache_invalidate_inode(mapping);
@@ -493,7 +489,6 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
493 while (index <= end && pagevec_lookup_entries(&pvec, mapping, index, 489 while (index <= end && pagevec_lookup_entries(&pvec, mapping, index,
494 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1, 490 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
495 indices)) { 491 indices)) {
496 mem_cgroup_uncharge_start();
497 for (i = 0; i < pagevec_count(&pvec); i++) { 492 for (i = 0; i < pagevec_count(&pvec); i++) {
498 struct page *page = pvec.pages[i]; 493 struct page *page = pvec.pages[i];
499 494
@@ -522,7 +517,6 @@ unsigned long invalidate_mapping_pages(struct address_space *mapping,
522 } 517 }
523 pagevec_remove_exceptionals(&pvec); 518 pagevec_remove_exceptionals(&pvec);
524 pagevec_release(&pvec); 519 pagevec_release(&pvec);
525 mem_cgroup_uncharge_end();
526 cond_resched(); 520 cond_resched();
527 index++; 521 index++;
528 } 522 }
@@ -553,7 +547,6 @@ invalidate_complete_page2(struct address_space *mapping, struct page *page)
553 BUG_ON(page_has_private(page)); 547 BUG_ON(page_has_private(page));
554 __delete_from_page_cache(page, NULL); 548 __delete_from_page_cache(page, NULL);
555 spin_unlock_irq(&mapping->tree_lock); 549 spin_unlock_irq(&mapping->tree_lock);
556 mem_cgroup_uncharge_cache_page(page);
557 550
558 if (mapping->a_ops->freepage) 551 if (mapping->a_ops->freepage)
559 mapping->a_ops->freepage(page); 552 mapping->a_ops->freepage(page);
@@ -602,7 +595,6 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
602 while (index <= end && pagevec_lookup_entries(&pvec, mapping, index, 595 while (index <= end && pagevec_lookup_entries(&pvec, mapping, index,
603 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1, 596 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
604 indices)) { 597 indices)) {
605 mem_cgroup_uncharge_start();
606 for (i = 0; i < pagevec_count(&pvec); i++) { 598 for (i = 0; i < pagevec_count(&pvec); i++) {
607 struct page *page = pvec.pages[i]; 599 struct page *page = pvec.pages[i];
608 600
@@ -655,7 +647,6 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
655 } 647 }
656 pagevec_remove_exceptionals(&pvec); 648 pagevec_remove_exceptionals(&pvec);
657 pagevec_release(&pvec); 649 pagevec_release(&pvec);
658 mem_cgroup_uncharge_end();
659 cond_resched(); 650 cond_resched();
660 index++; 651 index++;
661 } 652 }
diff --git a/mm/vmscan.c b/mm/vmscan.c
index d2f65c856350..7068e838d22b 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -577,9 +577,10 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
577 577
578 if (PageSwapCache(page)) { 578 if (PageSwapCache(page)) {
579 swp_entry_t swap = { .val = page_private(page) }; 579 swp_entry_t swap = { .val = page_private(page) };
580 mem_cgroup_swapout(page, swap);
580 __delete_from_swap_cache(page); 581 __delete_from_swap_cache(page);
581 spin_unlock_irq(&mapping->tree_lock); 582 spin_unlock_irq(&mapping->tree_lock);
582 swapcache_free(swap, page); 583 swapcache_free(swap);
583 } else { 584 } else {
584 void (*freepage)(struct page *); 585 void (*freepage)(struct page *);
585 void *shadow = NULL; 586 void *shadow = NULL;
@@ -600,7 +601,6 @@ static int __remove_mapping(struct address_space *mapping, struct page *page,
600 shadow = workingset_eviction(mapping, page); 601 shadow = workingset_eviction(mapping, page);
601 __delete_from_page_cache(page, shadow); 602 __delete_from_page_cache(page, shadow);
602 spin_unlock_irq(&mapping->tree_lock); 603 spin_unlock_irq(&mapping->tree_lock);
603 mem_cgroup_uncharge_cache_page(page);
604 604
605 if (freepage != NULL) 605 if (freepage != NULL)
606 freepage(page); 606 freepage(page);
@@ -1103,6 +1103,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
1103 */ 1103 */
1104 __clear_page_locked(page); 1104 __clear_page_locked(page);
1105free_it: 1105free_it:
1106 mem_cgroup_uncharge(page);
1106 nr_reclaimed++; 1107 nr_reclaimed++;
1107 1108
1108 /* 1109 /*
@@ -1132,12 +1133,13 @@ keep:
1132 list_add(&page->lru, &ret_pages); 1133 list_add(&page->lru, &ret_pages);
1133 VM_BUG_ON_PAGE(PageLRU(page) || PageUnevictable(page), page); 1134 VM_BUG_ON_PAGE(PageLRU(page) || PageUnevictable(page), page);
1134 } 1135 }
1136 mem_cgroup_uncharge_end();
1135 1137
1136 free_hot_cold_page_list(&free_pages, true); 1138 free_hot_cold_page_list(&free_pages, true);
1137 1139
1138 list_splice(&ret_pages, page_list); 1140 list_splice(&ret_pages, page_list);
1139 count_vm_events(PGACTIVATE, pgactivate); 1141 count_vm_events(PGACTIVATE, pgactivate);
1140 mem_cgroup_uncharge_end(); 1142
1141 *ret_nr_dirty += nr_dirty; 1143 *ret_nr_dirty += nr_dirty;
1142 *ret_nr_congested += nr_congested; 1144 *ret_nr_congested += nr_congested;
1143 *ret_nr_unqueued_dirty += nr_unqueued_dirty; 1145 *ret_nr_unqueued_dirty += nr_unqueued_dirty;
@@ -1435,6 +1437,8 @@ putback_inactive_pages(struct lruvec *lruvec, struct list_head *page_list)
1435 __ClearPageActive(page); 1437 __ClearPageActive(page);
1436 del_page_from_lru_list(page, lruvec, lru); 1438 del_page_from_lru_list(page, lruvec, lru);
1437 1439
1440 mem_cgroup_uncharge(page);
1441
1438 if (unlikely(PageCompound(page))) { 1442 if (unlikely(PageCompound(page))) {
1439 spin_unlock_irq(&zone->lru_lock); 1443 spin_unlock_irq(&zone->lru_lock);
1440 (*get_compound_page_dtor(page))(page); 1444 (*get_compound_page_dtor(page))(page);
@@ -1656,6 +1660,8 @@ static void move_active_pages_to_lru(struct lruvec *lruvec,
1656 __ClearPageActive(page); 1660 __ClearPageActive(page);
1657 del_page_from_lru_list(page, lruvec, lru); 1661 del_page_from_lru_list(page, lruvec, lru);
1658 1662
1663 mem_cgroup_uncharge(page);
1664
1659 if (unlikely(PageCompound(page))) { 1665 if (unlikely(PageCompound(page))) {
1660 spin_unlock_irq(&zone->lru_lock); 1666 spin_unlock_irq(&zone->lru_lock);
1661 (*get_compound_page_dtor(page))(page); 1667 (*get_compound_page_dtor(page))(page);
diff --git a/mm/zswap.c b/mm/zswap.c
index 032c21eeab2b..9da56af24df5 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -507,7 +507,7 @@ static int zswap_get_swap_cache_page(swp_entry_t entry,
507 * add_to_swap_cache() doesn't return -EEXIST, so we can safely 507 * add_to_swap_cache() doesn't return -EEXIST, so we can safely
508 * clear SWAP_HAS_CACHE flag. 508 * clear SWAP_HAS_CACHE flag.
509 */ 509 */
510 swapcache_free(entry, NULL); 510 swapcache_free(entry);
511 } while (err != -ENOMEM); 511 } while (err != -ENOMEM);
512 512
513 if (new_page) 513 if (new_page)