diff options
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 828 |
1 files changed, 321 insertions, 507 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 1cbe1e54ff5f..9106f1b12f56 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -754,9 +754,11 @@ static void __mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz, | |||
754 | static void mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz, | 754 | static void mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz, |
755 | struct mem_cgroup_tree_per_zone *mctz) | 755 | struct mem_cgroup_tree_per_zone *mctz) |
756 | { | 756 | { |
757 | spin_lock(&mctz->lock); | 757 | unsigned long flags; |
758 | |||
759 | spin_lock_irqsave(&mctz->lock, flags); | ||
758 | __mem_cgroup_remove_exceeded(mz, mctz); | 760 | __mem_cgroup_remove_exceeded(mz, mctz); |
759 | spin_unlock(&mctz->lock); | 761 | spin_unlock_irqrestore(&mctz->lock, flags); |
760 | } | 762 | } |
761 | 763 | ||
762 | 764 | ||
@@ -779,7 +781,9 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page) | |||
779 | * mem is over its softlimit. | 781 | * mem is over its softlimit. |
780 | */ | 782 | */ |
781 | if (excess || mz->on_tree) { | 783 | if (excess || mz->on_tree) { |
782 | spin_lock(&mctz->lock); | 784 | unsigned long flags; |
785 | |||
786 | spin_lock_irqsave(&mctz->lock, flags); | ||
783 | /* if on-tree, remove it */ | 787 | /* if on-tree, remove it */ |
784 | if (mz->on_tree) | 788 | if (mz->on_tree) |
785 | __mem_cgroup_remove_exceeded(mz, mctz); | 789 | __mem_cgroup_remove_exceeded(mz, mctz); |
@@ -788,7 +792,7 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page) | |||
788 | * If excess is 0, no tree ops. | 792 | * If excess is 0, no tree ops. |
789 | */ | 793 | */ |
790 | __mem_cgroup_insert_exceeded(mz, mctz, excess); | 794 | __mem_cgroup_insert_exceeded(mz, mctz, excess); |
791 | spin_unlock(&mctz->lock); | 795 | spin_unlock_irqrestore(&mctz->lock, flags); |
792 | } | 796 | } |
793 | } | 797 | } |
794 | } | 798 | } |
@@ -839,9 +843,9 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) | |||
839 | { | 843 | { |
840 | struct mem_cgroup_per_zone *mz; | 844 | struct mem_cgroup_per_zone *mz; |
841 | 845 | ||
842 | spin_lock(&mctz->lock); | 846 | spin_lock_irq(&mctz->lock); |
843 | mz = __mem_cgroup_largest_soft_limit_node(mctz); | 847 | mz = __mem_cgroup_largest_soft_limit_node(mctz); |
844 | spin_unlock(&mctz->lock); | 848 | spin_unlock_irq(&mctz->lock); |
845 | return mz; | 849 | return mz; |
846 | } | 850 | } |
847 | 851 | ||
@@ -882,13 +886,6 @@ static long mem_cgroup_read_stat(struct mem_cgroup *memcg, | |||
882 | return val; | 886 | return val; |
883 | } | 887 | } |
884 | 888 | ||
885 | static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg, | ||
886 | bool charge) | ||
887 | { | ||
888 | int val = (charge) ? 1 : -1; | ||
889 | this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_SWAP], val); | ||
890 | } | ||
891 | |||
892 | static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg, | 889 | static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg, |
893 | enum mem_cgroup_events_index idx) | 890 | enum mem_cgroup_events_index idx) |
894 | { | 891 | { |
@@ -909,13 +906,13 @@ static unsigned long mem_cgroup_read_events(struct mem_cgroup *memcg, | |||
909 | 906 | ||
910 | static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg, | 907 | static void mem_cgroup_charge_statistics(struct mem_cgroup *memcg, |
911 | struct page *page, | 908 | struct page *page, |
912 | bool anon, int nr_pages) | 909 | int nr_pages) |
913 | { | 910 | { |
914 | /* | 911 | /* |
915 | * Here, RSS means 'mapped anon' and anon's SwapCache. Shmem/tmpfs is | 912 | * Here, RSS means 'mapped anon' and anon's SwapCache. Shmem/tmpfs is |
916 | * counted as CACHE even if it's on ANON LRU. | 913 | * counted as CACHE even if it's on ANON LRU. |
917 | */ | 914 | */ |
918 | if (anon) | 915 | if (PageAnon(page)) |
919 | __this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_RSS], | 916 | __this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_RSS], |
920 | nr_pages); | 917 | nr_pages); |
921 | else | 918 | else |
@@ -1013,7 +1010,6 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg, | |||
1013 | */ | 1010 | */ |
1014 | static void memcg_check_events(struct mem_cgroup *memcg, struct page *page) | 1011 | static void memcg_check_events(struct mem_cgroup *memcg, struct page *page) |
1015 | { | 1012 | { |
1016 | preempt_disable(); | ||
1017 | /* threshold event is triggered in finer grain than soft limit */ | 1013 | /* threshold event is triggered in finer grain than soft limit */ |
1018 | if (unlikely(mem_cgroup_event_ratelimit(memcg, | 1014 | if (unlikely(mem_cgroup_event_ratelimit(memcg, |
1019 | MEM_CGROUP_TARGET_THRESH))) { | 1015 | MEM_CGROUP_TARGET_THRESH))) { |
@@ -1026,8 +1022,6 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page) | |||
1026 | do_numainfo = mem_cgroup_event_ratelimit(memcg, | 1022 | do_numainfo = mem_cgroup_event_ratelimit(memcg, |
1027 | MEM_CGROUP_TARGET_NUMAINFO); | 1023 | MEM_CGROUP_TARGET_NUMAINFO); |
1028 | #endif | 1024 | #endif |
1029 | preempt_enable(); | ||
1030 | |||
1031 | mem_cgroup_threshold(memcg); | 1025 | mem_cgroup_threshold(memcg); |
1032 | if (unlikely(do_softlimit)) | 1026 | if (unlikely(do_softlimit)) |
1033 | mem_cgroup_update_tree(memcg, page); | 1027 | mem_cgroup_update_tree(memcg, page); |
@@ -1035,8 +1029,7 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page) | |||
1035 | if (unlikely(do_numainfo)) | 1029 | if (unlikely(do_numainfo)) |
1036 | atomic_inc(&memcg->numainfo_events); | 1030 | atomic_inc(&memcg->numainfo_events); |
1037 | #endif | 1031 | #endif |
1038 | } else | 1032 | } |
1039 | preempt_enable(); | ||
1040 | } | 1033 | } |
1041 | 1034 | ||
1042 | struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p) | 1035 | struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p) |
@@ -1347,20 +1340,6 @@ out: | |||
1347 | return lruvec; | 1340 | return lruvec; |
1348 | } | 1341 | } |
1349 | 1342 | ||
1350 | /* | ||
1351 | * Following LRU functions are allowed to be used without PCG_LOCK. | ||
1352 | * Operations are called by routine of global LRU independently from memcg. | ||
1353 | * What we have to take care of here is validness of pc->mem_cgroup. | ||
1354 | * | ||
1355 | * Changes to pc->mem_cgroup happens when | ||
1356 | * 1. charge | ||
1357 | * 2. moving account | ||
1358 | * In typical case, "charge" is done before add-to-lru. Exception is SwapCache. | ||
1359 | * It is added to LRU before charge. | ||
1360 | * If PCG_USED bit is not set, page_cgroup is not added to this private LRU. | ||
1361 | * When moving account, the page is not on LRU. It's isolated. | ||
1362 | */ | ||
1363 | |||
1364 | /** | 1343 | /** |
1365 | * mem_cgroup_page_lruvec - return lruvec for adding an lru page | 1344 | * mem_cgroup_page_lruvec - return lruvec for adding an lru page |
1366 | * @page: the page | 1345 | * @page: the page |
@@ -2261,22 +2240,14 @@ cleanup: | |||
2261 | * | 2240 | * |
2262 | * Notes: Race condition | 2241 | * Notes: Race condition |
2263 | * | 2242 | * |
2264 | * We usually use lock_page_cgroup() for accessing page_cgroup member but | 2243 | * Charging occurs during page instantiation, while the page is |
2265 | * it tends to be costly. But considering some conditions, we doesn't need | 2244 | * unmapped and locked in page migration, or while the page table is |
2266 | * to do so _always_. | 2245 | * locked in THP migration. No race is possible. |
2267 | * | ||
2268 | * Considering "charge", lock_page_cgroup() is not required because all | ||
2269 | * file-stat operations happen after a page is attached to radix-tree. There | ||
2270 | * are no race with "charge". | ||
2271 | * | 2246 | * |
2272 | * Considering "uncharge", we know that memcg doesn't clear pc->mem_cgroup | 2247 | * Uncharge happens to pages with zero references, no race possible. |
2273 | * at "uncharge" intentionally. So, we always see valid pc->mem_cgroup even | ||
2274 | * if there are race with "uncharge". Statistics itself is properly handled | ||
2275 | * by flags. | ||
2276 | * | 2248 | * |
2277 | * Considering "move", this is an only case we see a race. To make the race | 2249 | * Charge moving between groups is protected by checking mm->moving |
2278 | * small, we check memcg->moving_account and detect there are possibility | 2250 | * account and taking the move_lock in the slowpath. |
2279 | * of race or not. If there is, we take a lock. | ||
2280 | */ | 2251 | */ |
2281 | 2252 | ||
2282 | void __mem_cgroup_begin_update_page_stat(struct page *page, | 2253 | void __mem_cgroup_begin_update_page_stat(struct page *page, |
@@ -2689,6 +2660,16 @@ static struct mem_cgroup *mem_cgroup_lookup(unsigned short id) | |||
2689 | return mem_cgroup_from_id(id); | 2660 | return mem_cgroup_from_id(id); |
2690 | } | 2661 | } |
2691 | 2662 | ||
2663 | /* | ||
2664 | * try_get_mem_cgroup_from_page - look up page's memcg association | ||
2665 | * @page: the page | ||
2666 | * | ||
2667 | * Look up, get a css reference, and return the memcg that owns @page. | ||
2668 | * | ||
2669 | * The page must be locked to prevent racing with swap-in and page | ||
2670 | * cache charges. If coming from an unlocked page table, the caller | ||
2671 | * must ensure the page is on the LRU or this can race with charging. | ||
2672 | */ | ||
2692 | struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) | 2673 | struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) |
2693 | { | 2674 | { |
2694 | struct mem_cgroup *memcg = NULL; | 2675 | struct mem_cgroup *memcg = NULL; |
@@ -2699,7 +2680,6 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) | |||
2699 | VM_BUG_ON_PAGE(!PageLocked(page), page); | 2680 | VM_BUG_ON_PAGE(!PageLocked(page), page); |
2700 | 2681 | ||
2701 | pc = lookup_page_cgroup(page); | 2682 | pc = lookup_page_cgroup(page); |
2702 | lock_page_cgroup(pc); | ||
2703 | if (PageCgroupUsed(pc)) { | 2683 | if (PageCgroupUsed(pc)) { |
2704 | memcg = pc->mem_cgroup; | 2684 | memcg = pc->mem_cgroup; |
2705 | if (memcg && !css_tryget_online(&memcg->css)) | 2685 | if (memcg && !css_tryget_online(&memcg->css)) |
@@ -2713,19 +2693,46 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) | |||
2713 | memcg = NULL; | 2693 | memcg = NULL; |
2714 | rcu_read_unlock(); | 2694 | rcu_read_unlock(); |
2715 | } | 2695 | } |
2716 | unlock_page_cgroup(pc); | ||
2717 | return memcg; | 2696 | return memcg; |
2718 | } | 2697 | } |
2719 | 2698 | ||
2699 | static void lock_page_lru(struct page *page, int *isolated) | ||
2700 | { | ||
2701 | struct zone *zone = page_zone(page); | ||
2702 | |||
2703 | spin_lock_irq(&zone->lru_lock); | ||
2704 | if (PageLRU(page)) { | ||
2705 | struct lruvec *lruvec; | ||
2706 | |||
2707 | lruvec = mem_cgroup_page_lruvec(page, zone); | ||
2708 | ClearPageLRU(page); | ||
2709 | del_page_from_lru_list(page, lruvec, page_lru(page)); | ||
2710 | *isolated = 1; | ||
2711 | } else | ||
2712 | *isolated = 0; | ||
2713 | } | ||
2714 | |||
2715 | static void unlock_page_lru(struct page *page, int isolated) | ||
2716 | { | ||
2717 | struct zone *zone = page_zone(page); | ||
2718 | |||
2719 | if (isolated) { | ||
2720 | struct lruvec *lruvec; | ||
2721 | |||
2722 | lruvec = mem_cgroup_page_lruvec(page, zone); | ||
2723 | VM_BUG_ON_PAGE(PageLRU(page), page); | ||
2724 | SetPageLRU(page); | ||
2725 | add_page_to_lru_list(page, lruvec, page_lru(page)); | ||
2726 | } | ||
2727 | spin_unlock_irq(&zone->lru_lock); | ||
2728 | } | ||
2729 | |||
2720 | static void commit_charge(struct page *page, struct mem_cgroup *memcg, | 2730 | static void commit_charge(struct page *page, struct mem_cgroup *memcg, |
2721 | unsigned int nr_pages, bool anon, bool lrucare) | 2731 | unsigned int nr_pages, bool lrucare) |
2722 | { | 2732 | { |
2723 | struct page_cgroup *pc = lookup_page_cgroup(page); | 2733 | struct page_cgroup *pc = lookup_page_cgroup(page); |
2724 | struct zone *uninitialized_var(zone); | 2734 | int isolated; |
2725 | struct lruvec *lruvec; | ||
2726 | bool was_on_lru = false; | ||
2727 | 2735 | ||
2728 | lock_page_cgroup(pc); | ||
2729 | VM_BUG_ON_PAGE(PageCgroupUsed(pc), page); | 2736 | VM_BUG_ON_PAGE(PageCgroupUsed(pc), page); |
2730 | /* | 2737 | /* |
2731 | * we don't need page_cgroup_lock about tail pages, becase they are not | 2738 | * we don't need page_cgroup_lock about tail pages, becase they are not |
@@ -2736,39 +2743,38 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg, | |||
2736 | * In some cases, SwapCache and FUSE(splice_buf->radixtree), the page | 2743 | * In some cases, SwapCache and FUSE(splice_buf->radixtree), the page |
2737 | * may already be on some other mem_cgroup's LRU. Take care of it. | 2744 | * may already be on some other mem_cgroup's LRU. Take care of it. |
2738 | */ | 2745 | */ |
2739 | if (lrucare) { | 2746 | if (lrucare) |
2740 | zone = page_zone(page); | 2747 | lock_page_lru(page, &isolated); |
2741 | spin_lock_irq(&zone->lru_lock); | ||
2742 | if (PageLRU(page)) { | ||
2743 | lruvec = mem_cgroup_zone_lruvec(zone, pc->mem_cgroup); | ||
2744 | ClearPageLRU(page); | ||
2745 | del_page_from_lru_list(page, lruvec, page_lru(page)); | ||
2746 | was_on_lru = true; | ||
2747 | } | ||
2748 | } | ||
2749 | 2748 | ||
2749 | /* | ||
2750 | * Nobody should be changing or seriously looking at | ||
2751 | * pc->mem_cgroup and pc->flags at this point: | ||
2752 | * | ||
2753 | * - the page is uncharged | ||
2754 | * | ||
2755 | * - the page is off-LRU | ||
2756 | * | ||
2757 | * - an anonymous fault has exclusive page access, except for | ||
2758 | * a locked page table | ||
2759 | * | ||
2760 | * - a page cache insertion, a swapin fault, or a migration | ||
2761 | * have the page locked | ||
2762 | */ | ||
2750 | pc->mem_cgroup = memcg; | 2763 | pc->mem_cgroup = memcg; |
2751 | SetPageCgroupUsed(pc); | 2764 | pc->flags = PCG_USED | PCG_MEM | (do_swap_account ? PCG_MEMSW : 0); |
2752 | |||
2753 | if (lrucare) { | ||
2754 | if (was_on_lru) { | ||
2755 | lruvec = mem_cgroup_zone_lruvec(zone, pc->mem_cgroup); | ||
2756 | VM_BUG_ON_PAGE(PageLRU(page), page); | ||
2757 | SetPageLRU(page); | ||
2758 | add_page_to_lru_list(page, lruvec, page_lru(page)); | ||
2759 | } | ||
2760 | spin_unlock_irq(&zone->lru_lock); | ||
2761 | } | ||
2762 | 2765 | ||
2763 | mem_cgroup_charge_statistics(memcg, page, anon, nr_pages); | 2766 | if (lrucare) |
2764 | unlock_page_cgroup(pc); | 2767 | unlock_page_lru(page, isolated); |
2765 | 2768 | ||
2769 | local_irq_disable(); | ||
2770 | mem_cgroup_charge_statistics(memcg, page, nr_pages); | ||
2766 | /* | 2771 | /* |
2767 | * "charge_statistics" updated event counter. Then, check it. | 2772 | * "charge_statistics" updated event counter. Then, check it. |
2768 | * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree. | 2773 | * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree. |
2769 | * if they exceeds softlimit. | 2774 | * if they exceeds softlimit. |
2770 | */ | 2775 | */ |
2771 | memcg_check_events(memcg, page); | 2776 | memcg_check_events(memcg, page); |
2777 | local_irq_enable(); | ||
2772 | } | 2778 | } |
2773 | 2779 | ||
2774 | static DEFINE_MUTEX(set_limit_mutex); | 2780 | static DEFINE_MUTEX(set_limit_mutex); |
@@ -3395,7 +3401,6 @@ static inline void memcg_unregister_all_caches(struct mem_cgroup *memcg) | |||
3395 | 3401 | ||
3396 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 3402 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
3397 | 3403 | ||
3398 | #define PCGF_NOCOPY_AT_SPLIT (1 << PCG_LOCK | 1 << PCG_MIGRATION) | ||
3399 | /* | 3404 | /* |
3400 | * Because tail pages are not marked as "used", set it. We're under | 3405 | * Because tail pages are not marked as "used", set it. We're under |
3401 | * zone->lru_lock, 'splitting on pmd' and compound_lock. | 3406 | * zone->lru_lock, 'splitting on pmd' and compound_lock. |
@@ -3416,7 +3421,7 @@ void mem_cgroup_split_huge_fixup(struct page *head) | |||
3416 | for (i = 1; i < HPAGE_PMD_NR; i++) { | 3421 | for (i = 1; i < HPAGE_PMD_NR; i++) { |
3417 | pc = head_pc + i; | 3422 | pc = head_pc + i; |
3418 | pc->mem_cgroup = memcg; | 3423 | pc->mem_cgroup = memcg; |
3419 | pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT; | 3424 | pc->flags = head_pc->flags; |
3420 | } | 3425 | } |
3421 | __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE], | 3426 | __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE], |
3422 | HPAGE_PMD_NR); | 3427 | HPAGE_PMD_NR); |
@@ -3446,7 +3451,6 @@ static int mem_cgroup_move_account(struct page *page, | |||
3446 | { | 3451 | { |
3447 | unsigned long flags; | 3452 | unsigned long flags; |
3448 | int ret; | 3453 | int ret; |
3449 | bool anon = PageAnon(page); | ||
3450 | 3454 | ||
3451 | VM_BUG_ON(from == to); | 3455 | VM_BUG_ON(from == to); |
3452 | VM_BUG_ON_PAGE(PageLRU(page), page); | 3456 | VM_BUG_ON_PAGE(PageLRU(page), page); |
@@ -3460,15 +3464,21 @@ static int mem_cgroup_move_account(struct page *page, | |||
3460 | if (nr_pages > 1 && !PageTransHuge(page)) | 3464 | if (nr_pages > 1 && !PageTransHuge(page)) |
3461 | goto out; | 3465 | goto out; |
3462 | 3466 | ||
3463 | lock_page_cgroup(pc); | 3467 | /* |
3468 | * Prevent mem_cgroup_migrate() from looking at pc->mem_cgroup | ||
3469 | * of its source page while we change it: page migration takes | ||
3470 | * both pages off the LRU, but page cache replacement doesn't. | ||
3471 | */ | ||
3472 | if (!trylock_page(page)) | ||
3473 | goto out; | ||
3464 | 3474 | ||
3465 | ret = -EINVAL; | 3475 | ret = -EINVAL; |
3466 | if (!PageCgroupUsed(pc) || pc->mem_cgroup != from) | 3476 | if (!PageCgroupUsed(pc) || pc->mem_cgroup != from) |
3467 | goto unlock; | 3477 | goto out_unlock; |
3468 | 3478 | ||
3469 | move_lock_mem_cgroup(from, &flags); | 3479 | move_lock_mem_cgroup(from, &flags); |
3470 | 3480 | ||
3471 | if (!anon && page_mapped(page)) { | 3481 | if (!PageAnon(page) && page_mapped(page)) { |
3472 | __this_cpu_sub(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED], | 3482 | __this_cpu_sub(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED], |
3473 | nr_pages); | 3483 | nr_pages); |
3474 | __this_cpu_add(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED], | 3484 | __this_cpu_add(to->stat->count[MEM_CGROUP_STAT_FILE_MAPPED], |
@@ -3482,20 +3492,25 @@ static int mem_cgroup_move_account(struct page *page, | |||
3482 | nr_pages); | 3492 | nr_pages); |
3483 | } | 3493 | } |
3484 | 3494 | ||
3485 | mem_cgroup_charge_statistics(from, page, anon, -nr_pages); | 3495 | /* |
3496 | * It is safe to change pc->mem_cgroup here because the page | ||
3497 | * is referenced, charged, and isolated - we can't race with | ||
3498 | * uncharging, charging, migration, or LRU putback. | ||
3499 | */ | ||
3486 | 3500 | ||
3487 | /* caller should have done css_get */ | 3501 | /* caller should have done css_get */ |
3488 | pc->mem_cgroup = to; | 3502 | pc->mem_cgroup = to; |
3489 | mem_cgroup_charge_statistics(to, page, anon, nr_pages); | ||
3490 | move_unlock_mem_cgroup(from, &flags); | 3503 | move_unlock_mem_cgroup(from, &flags); |
3491 | ret = 0; | 3504 | ret = 0; |
3492 | unlock: | 3505 | |
3493 | unlock_page_cgroup(pc); | 3506 | local_irq_disable(); |
3494 | /* | 3507 | mem_cgroup_charge_statistics(to, page, nr_pages); |
3495 | * check events | ||
3496 | */ | ||
3497 | memcg_check_events(to, page); | 3508 | memcg_check_events(to, page); |
3509 | mem_cgroup_charge_statistics(from, page, -nr_pages); | ||
3498 | memcg_check_events(from, page); | 3510 | memcg_check_events(from, page); |
3511 | local_irq_enable(); | ||
3512 | out_unlock: | ||
3513 | unlock_page(page); | ||
3499 | out: | 3514 | out: |
3500 | return ret; | 3515 | return ret; |
3501 | } | 3516 | } |
@@ -3566,193 +3581,6 @@ out: | |||
3566 | return ret; | 3581 | return ret; |
3567 | } | 3582 | } |
3568 | 3583 | ||
3569 | static void mem_cgroup_do_uncharge(struct mem_cgroup *memcg, | ||
3570 | unsigned int nr_pages, | ||
3571 | const enum charge_type ctype) | ||
3572 | { | ||
3573 | struct memcg_batch_info *batch = NULL; | ||
3574 | bool uncharge_memsw = true; | ||
3575 | |||
3576 | /* If swapout, usage of swap doesn't decrease */ | ||
3577 | if (!do_swap_account || ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) | ||
3578 | uncharge_memsw = false; | ||
3579 | |||
3580 | batch = ¤t->memcg_batch; | ||
3581 | /* | ||
3582 | * In usual, we do css_get() when we remember memcg pointer. | ||
3583 | * But in this case, we keep res->usage until end of a series of | ||
3584 | * uncharges. Then, it's ok to ignore memcg's refcnt. | ||
3585 | */ | ||
3586 | if (!batch->memcg) | ||
3587 | batch->memcg = memcg; | ||
3588 | /* | ||
3589 | * do_batch > 0 when unmapping pages or inode invalidate/truncate. | ||
3590 | * In those cases, all pages freed continuously can be expected to be in | ||
3591 | * the same cgroup and we have chance to coalesce uncharges. | ||
3592 | * But we do uncharge one by one if this is killed by OOM(TIF_MEMDIE) | ||
3593 | * because we want to do uncharge as soon as possible. | ||
3594 | */ | ||
3595 | |||
3596 | if (!batch->do_batch || test_thread_flag(TIF_MEMDIE)) | ||
3597 | goto direct_uncharge; | ||
3598 | |||
3599 | if (nr_pages > 1) | ||
3600 | goto direct_uncharge; | ||
3601 | |||
3602 | /* | ||
3603 | * In typical case, batch->memcg == mem. This means we can | ||
3604 | * merge a series of uncharges to an uncharge of res_counter. | ||
3605 | * If not, we uncharge res_counter ony by one. | ||
3606 | */ | ||
3607 | if (batch->memcg != memcg) | ||
3608 | goto direct_uncharge; | ||
3609 | /* remember freed charge and uncharge it later */ | ||
3610 | batch->nr_pages++; | ||
3611 | if (uncharge_memsw) | ||
3612 | batch->memsw_nr_pages++; | ||
3613 | return; | ||
3614 | direct_uncharge: | ||
3615 | res_counter_uncharge(&memcg->res, nr_pages * PAGE_SIZE); | ||
3616 | if (uncharge_memsw) | ||
3617 | res_counter_uncharge(&memcg->memsw, nr_pages * PAGE_SIZE); | ||
3618 | if (unlikely(batch->memcg != memcg)) | ||
3619 | memcg_oom_recover(memcg); | ||
3620 | } | ||
3621 | |||
3622 | /* | ||
3623 | * uncharge if !page_mapped(page) | ||
3624 | */ | ||
3625 | static struct mem_cgroup * | ||
3626 | __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype, | ||
3627 | bool end_migration) | ||
3628 | { | ||
3629 | struct mem_cgroup *memcg = NULL; | ||
3630 | unsigned int nr_pages = 1; | ||
3631 | struct page_cgroup *pc; | ||
3632 | bool anon; | ||
3633 | |||
3634 | if (mem_cgroup_disabled()) | ||
3635 | return NULL; | ||
3636 | |||
3637 | if (PageTransHuge(page)) { | ||
3638 | nr_pages <<= compound_order(page); | ||
3639 | VM_BUG_ON_PAGE(!PageTransHuge(page), page); | ||
3640 | } | ||
3641 | /* | ||
3642 | * Check if our page_cgroup is valid | ||
3643 | */ | ||
3644 | pc = lookup_page_cgroup(page); | ||
3645 | if (unlikely(!PageCgroupUsed(pc))) | ||
3646 | return NULL; | ||
3647 | |||
3648 | lock_page_cgroup(pc); | ||
3649 | |||
3650 | memcg = pc->mem_cgroup; | ||
3651 | |||
3652 | if (!PageCgroupUsed(pc)) | ||
3653 | goto unlock_out; | ||
3654 | |||
3655 | anon = PageAnon(page); | ||
3656 | |||
3657 | switch (ctype) { | ||
3658 | case MEM_CGROUP_CHARGE_TYPE_ANON: | ||
3659 | /* | ||
3660 | * Generally PageAnon tells if it's the anon statistics to be | ||
3661 | * updated; but sometimes e.g. mem_cgroup_uncharge_page() is | ||
3662 | * used before page reached the stage of being marked PageAnon. | ||
3663 | */ | ||
3664 | anon = true; | ||
3665 | /* fallthrough */ | ||
3666 | case MEM_CGROUP_CHARGE_TYPE_DROP: | ||
3667 | /* See mem_cgroup_prepare_migration() */ | ||
3668 | if (page_mapped(page)) | ||
3669 | goto unlock_out; | ||
3670 | /* | ||
3671 | * Pages under migration may not be uncharged. But | ||
3672 | * end_migration() /must/ be the one uncharging the | ||
3673 | * unused post-migration page and so it has to call | ||
3674 | * here with the migration bit still set. See the | ||
3675 | * res_counter handling below. | ||
3676 | */ | ||
3677 | if (!end_migration && PageCgroupMigration(pc)) | ||
3678 | goto unlock_out; | ||
3679 | break; | ||
3680 | case MEM_CGROUP_CHARGE_TYPE_SWAPOUT: | ||
3681 | if (!PageAnon(page)) { /* Shared memory */ | ||
3682 | if (page->mapping && !page_is_file_cache(page)) | ||
3683 | goto unlock_out; | ||
3684 | } else if (page_mapped(page)) /* Anon */ | ||
3685 | goto unlock_out; | ||
3686 | break; | ||
3687 | default: | ||
3688 | break; | ||
3689 | } | ||
3690 | |||
3691 | mem_cgroup_charge_statistics(memcg, page, anon, -nr_pages); | ||
3692 | |||
3693 | ClearPageCgroupUsed(pc); | ||
3694 | /* | ||
3695 | * pc->mem_cgroup is not cleared here. It will be accessed when it's | ||
3696 | * freed from LRU. This is safe because uncharged page is expected not | ||
3697 | * to be reused (freed soon). Exception is SwapCache, it's handled by | ||
3698 | * special functions. | ||
3699 | */ | ||
3700 | |||
3701 | unlock_page_cgroup(pc); | ||
3702 | /* | ||
3703 | * even after unlock, we have memcg->res.usage here and this memcg | ||
3704 | * will never be freed, so it's safe to call css_get(). | ||
3705 | */ | ||
3706 | memcg_check_events(memcg, page); | ||
3707 | if (do_swap_account && ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) { | ||
3708 | mem_cgroup_swap_statistics(memcg, true); | ||
3709 | css_get(&memcg->css); | ||
3710 | } | ||
3711 | /* | ||
3712 | * Migration does not charge the res_counter for the | ||
3713 | * replacement page, so leave it alone when phasing out the | ||
3714 | * page that is unused after the migration. | ||
3715 | */ | ||
3716 | if (!end_migration) | ||
3717 | mem_cgroup_do_uncharge(memcg, nr_pages, ctype); | ||
3718 | |||
3719 | return memcg; | ||
3720 | |||
3721 | unlock_out: | ||
3722 | unlock_page_cgroup(pc); | ||
3723 | return NULL; | ||
3724 | } | ||
3725 | |||
3726 | void mem_cgroup_uncharge_page(struct page *page) | ||
3727 | { | ||
3728 | /* early check. */ | ||
3729 | if (page_mapped(page)) | ||
3730 | return; | ||
3731 | VM_BUG_ON_PAGE(page->mapping && !PageAnon(page), page); | ||
3732 | /* | ||
3733 | * If the page is in swap cache, uncharge should be deferred | ||
3734 | * to the swap path, which also properly accounts swap usage | ||
3735 | * and handles memcg lifetime. | ||
3736 | * | ||
3737 | * Note that this check is not stable and reclaim may add the | ||
3738 | * page to swap cache at any time after this. However, if the | ||
3739 | * page is not in swap cache by the time page->mapcount hits | ||
3740 | * 0, there won't be any page table references to the swap | ||
3741 | * slot, and reclaim will free it and not actually write the | ||
3742 | * page to disk. | ||
3743 | */ | ||
3744 | if (PageSwapCache(page)) | ||
3745 | return; | ||
3746 | __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_ANON, false); | ||
3747 | } | ||
3748 | |||
3749 | void mem_cgroup_uncharge_cache_page(struct page *page) | ||
3750 | { | ||
3751 | VM_BUG_ON_PAGE(page_mapped(page), page); | ||
3752 | VM_BUG_ON_PAGE(page->mapping, page); | ||
3753 | __mem_cgroup_uncharge_common(page, MEM_CGROUP_CHARGE_TYPE_CACHE, false); | ||
3754 | } | ||
3755 | |||
3756 | /* | 3584 | /* |
3757 | * Batch_start/batch_end is called in unmap_page_range/invlidate/trucate. | 3585 | * Batch_start/batch_end is called in unmap_page_range/invlidate/trucate. |
3758 | * In that cases, pages are freed continuously and we can expect pages | 3586 | * In that cases, pages are freed continuously and we can expect pages |
@@ -3763,6 +3591,9 @@ void mem_cgroup_uncharge_cache_page(struct page *page) | |||
3763 | 3591 | ||
3764 | void mem_cgroup_uncharge_start(void) | 3592 | void mem_cgroup_uncharge_start(void) |
3765 | { | 3593 | { |
3594 | unsigned long flags; | ||
3595 | |||
3596 | local_irq_save(flags); | ||
3766 | current->memcg_batch.do_batch++; | 3597 | current->memcg_batch.do_batch++; |
3767 | /* We can do nest. */ | 3598 | /* We can do nest. */ |
3768 | if (current->memcg_batch.do_batch == 1) { | 3599 | if (current->memcg_batch.do_batch == 1) { |
@@ -3770,21 +3601,18 @@ void mem_cgroup_uncharge_start(void) | |||
3770 | current->memcg_batch.nr_pages = 0; | 3601 | current->memcg_batch.nr_pages = 0; |
3771 | current->memcg_batch.memsw_nr_pages = 0; | 3602 | current->memcg_batch.memsw_nr_pages = 0; |
3772 | } | 3603 | } |
3604 | local_irq_restore(flags); | ||
3773 | } | 3605 | } |
3774 | 3606 | ||
3775 | void mem_cgroup_uncharge_end(void) | 3607 | void mem_cgroup_uncharge_end(void) |
3776 | { | 3608 | { |
3777 | struct memcg_batch_info *batch = ¤t->memcg_batch; | 3609 | struct memcg_batch_info *batch = ¤t->memcg_batch; |
3610 | unsigned long flags; | ||
3778 | 3611 | ||
3779 | if (!batch->do_batch) | 3612 | local_irq_save(flags); |
3780 | return; | 3613 | VM_BUG_ON(!batch->do_batch); |
3781 | 3614 | if (--batch->do_batch) /* If stacked, do nothing */ | |
3782 | batch->do_batch--; | 3615 | goto out; |
3783 | if (batch->do_batch) /* If stacked, do nothing. */ | ||
3784 | return; | ||
3785 | |||
3786 | if (!batch->memcg) | ||
3787 | return; | ||
3788 | /* | 3616 | /* |
3789 | * This "batch->memcg" is valid without any css_get/put etc... | 3617 | * This "batch->memcg" is valid without any css_get/put etc... |
3790 | * bacause we hide charges behind us. | 3618 | * bacause we hide charges behind us. |
@@ -3796,61 +3624,16 @@ void mem_cgroup_uncharge_end(void) | |||
3796 | res_counter_uncharge(&batch->memcg->memsw, | 3624 | res_counter_uncharge(&batch->memcg->memsw, |
3797 | batch->memsw_nr_pages * PAGE_SIZE); | 3625 | batch->memsw_nr_pages * PAGE_SIZE); |
3798 | memcg_oom_recover(batch->memcg); | 3626 | memcg_oom_recover(batch->memcg); |
3799 | /* forget this pointer (for sanity check) */ | 3627 | out: |
3800 | batch->memcg = NULL; | 3628 | local_irq_restore(flags); |
3801 | } | ||
3802 | |||
3803 | #ifdef CONFIG_SWAP | ||
3804 | /* | ||
3805 | * called after __delete_from_swap_cache() and drop "page" account. | ||
3806 | * memcg information is recorded to swap_cgroup of "ent" | ||
3807 | */ | ||
3808 | void | ||
3809 | mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout) | ||
3810 | { | ||
3811 | struct mem_cgroup *memcg; | ||
3812 | int ctype = MEM_CGROUP_CHARGE_TYPE_SWAPOUT; | ||
3813 | |||
3814 | if (!swapout) /* this was a swap cache but the swap is unused ! */ | ||
3815 | ctype = MEM_CGROUP_CHARGE_TYPE_DROP; | ||
3816 | |||
3817 | memcg = __mem_cgroup_uncharge_common(page, ctype, false); | ||
3818 | |||
3819 | /* | ||
3820 | * record memcg information, if swapout && memcg != NULL, | ||
3821 | * css_get() was called in uncharge(). | ||
3822 | */ | ||
3823 | if (do_swap_account && swapout && memcg) | ||
3824 | swap_cgroup_record(ent, mem_cgroup_id(memcg)); | ||
3825 | } | 3629 | } |
3826 | #endif | ||
3827 | 3630 | ||
3828 | #ifdef CONFIG_MEMCG_SWAP | 3631 | #ifdef CONFIG_MEMCG_SWAP |
3829 | /* | 3632 | static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg, |
3830 | * called from swap_entry_free(). remove record in swap_cgroup and | 3633 | bool charge) |
3831 | * uncharge "memsw" account. | ||
3832 | */ | ||
3833 | void mem_cgroup_uncharge_swap(swp_entry_t ent) | ||
3834 | { | 3634 | { |
3835 | struct mem_cgroup *memcg; | 3635 | int val = (charge) ? 1 : -1; |
3836 | unsigned short id; | 3636 | this_cpu_add(memcg->stat->count[MEM_CGROUP_STAT_SWAP], val); |
3837 | |||
3838 | if (!do_swap_account) | ||
3839 | return; | ||
3840 | |||
3841 | id = swap_cgroup_record(ent, 0); | ||
3842 | rcu_read_lock(); | ||
3843 | memcg = mem_cgroup_lookup(id); | ||
3844 | if (memcg) { | ||
3845 | /* | ||
3846 | * We uncharge this because swap is freed. This memcg can | ||
3847 | * be obsolete one. We avoid calling css_tryget_online(). | ||
3848 | */ | ||
3849 | res_counter_uncharge(&memcg->memsw, PAGE_SIZE); | ||
3850 | mem_cgroup_swap_statistics(memcg, false); | ||
3851 | css_put(&memcg->css); | ||
3852 | } | ||
3853 | rcu_read_unlock(); | ||
3854 | } | 3637 | } |
3855 | 3638 | ||
3856 | /** | 3639 | /** |
@@ -3902,169 +3685,6 @@ static inline int mem_cgroup_move_swap_account(swp_entry_t entry, | |||
3902 | } | 3685 | } |
3903 | #endif | 3686 | #endif |
3904 | 3687 | ||
3905 | /* | ||
3906 | * Before starting migration, account PAGE_SIZE to mem_cgroup that the old | ||
3907 | * page belongs to. | ||
3908 | */ | ||
3909 | void mem_cgroup_prepare_migration(struct page *page, struct page *newpage, | ||
3910 | struct mem_cgroup **memcgp) | ||
3911 | { | ||
3912 | struct mem_cgroup *memcg = NULL; | ||
3913 | unsigned int nr_pages = 1; | ||
3914 | struct page_cgroup *pc; | ||
3915 | |||
3916 | *memcgp = NULL; | ||
3917 | |||
3918 | if (mem_cgroup_disabled()) | ||
3919 | return; | ||
3920 | |||
3921 | if (PageTransHuge(page)) | ||
3922 | nr_pages <<= compound_order(page); | ||
3923 | |||
3924 | pc = lookup_page_cgroup(page); | ||
3925 | lock_page_cgroup(pc); | ||
3926 | if (PageCgroupUsed(pc)) { | ||
3927 | memcg = pc->mem_cgroup; | ||
3928 | css_get(&memcg->css); | ||
3929 | /* | ||
3930 | * At migrating an anonymous page, its mapcount goes down | ||
3931 | * to 0 and uncharge() will be called. But, even if it's fully | ||
3932 | * unmapped, migration may fail and this page has to be | ||
3933 | * charged again. We set MIGRATION flag here and delay uncharge | ||
3934 | * until end_migration() is called | ||
3935 | * | ||
3936 | * Corner Case Thinking | ||
3937 | * A) | ||
3938 | * When the old page was mapped as Anon and it's unmap-and-freed | ||
3939 | * while migration was ongoing. | ||
3940 | * If unmap finds the old page, uncharge() of it will be delayed | ||
3941 | * until end_migration(). If unmap finds a new page, it's | ||
3942 | * uncharged when it make mapcount to be 1->0. If unmap code | ||
3943 | * finds swap_migration_entry, the new page will not be mapped | ||
3944 | * and end_migration() will find it(mapcount==0). | ||
3945 | * | ||
3946 | * B) | ||
3947 | * When the old page was mapped but migraion fails, the kernel | ||
3948 | * remaps it. A charge for it is kept by MIGRATION flag even | ||
3949 | * if mapcount goes down to 0. We can do remap successfully | ||
3950 | * without charging it again. | ||
3951 | * | ||
3952 | * C) | ||
3953 | * The "old" page is under lock_page() until the end of | ||
3954 | * migration, so, the old page itself will not be swapped-out. | ||
3955 | * If the new page is swapped out before end_migraton, our | ||
3956 | * hook to usual swap-out path will catch the event. | ||
3957 | */ | ||
3958 | if (PageAnon(page)) | ||
3959 | SetPageCgroupMigration(pc); | ||
3960 | } | ||
3961 | unlock_page_cgroup(pc); | ||
3962 | /* | ||
3963 | * If the page is not charged at this point, | ||
3964 | * we return here. | ||
3965 | */ | ||
3966 | if (!memcg) | ||
3967 | return; | ||
3968 | |||
3969 | *memcgp = memcg; | ||
3970 | /* | ||
3971 | * We charge new page before it's used/mapped. So, even if unlock_page() | ||
3972 | * is called before end_migration, we can catch all events on this new | ||
3973 | * page. In the case new page is migrated but not remapped, new page's | ||
3974 | * mapcount will be finally 0 and we call uncharge in end_migration(). | ||
3975 | */ | ||
3976 | /* | ||
3977 | * The page is committed to the memcg, but it's not actually | ||
3978 | * charged to the res_counter since we plan on replacing the | ||
3979 | * old one and only one page is going to be left afterwards. | ||
3980 | */ | ||
3981 | commit_charge(newpage, memcg, nr_pages, PageAnon(page), false); | ||
3982 | } | ||
3983 | |||
3984 | /* remove redundant charge if migration failed*/ | ||
3985 | void mem_cgroup_end_migration(struct mem_cgroup *memcg, | ||
3986 | struct page *oldpage, struct page *newpage, bool migration_ok) | ||
3987 | { | ||
3988 | struct page *used, *unused; | ||
3989 | struct page_cgroup *pc; | ||
3990 | bool anon; | ||
3991 | |||
3992 | if (!memcg) | ||
3993 | return; | ||
3994 | |||
3995 | if (!migration_ok) { | ||
3996 | used = oldpage; | ||
3997 | unused = newpage; | ||
3998 | } else { | ||
3999 | used = newpage; | ||
4000 | unused = oldpage; | ||
4001 | } | ||
4002 | anon = PageAnon(used); | ||
4003 | __mem_cgroup_uncharge_common(unused, | ||
4004 | anon ? MEM_CGROUP_CHARGE_TYPE_ANON | ||
4005 | : MEM_CGROUP_CHARGE_TYPE_CACHE, | ||
4006 | true); | ||
4007 | css_put(&memcg->css); | ||
4008 | /* | ||
4009 | * We disallowed uncharge of pages under migration because mapcount | ||
4010 | * of the page goes down to zero, temporarly. | ||
4011 | * Clear the flag and check the page should be charged. | ||
4012 | */ | ||
4013 | pc = lookup_page_cgroup(oldpage); | ||
4014 | lock_page_cgroup(pc); | ||
4015 | ClearPageCgroupMigration(pc); | ||
4016 | unlock_page_cgroup(pc); | ||
4017 | |||
4018 | /* | ||
4019 | * If a page is a file cache, radix-tree replacement is very atomic | ||
4020 | * and we can skip this check. When it was an Anon page, its mapcount | ||
4021 | * goes down to 0. But because we added MIGRATION flage, it's not | ||
4022 | * uncharged yet. There are several case but page->mapcount check | ||
4023 | * and USED bit check in mem_cgroup_uncharge_page() will do enough | ||
4024 | * check. (see prepare_charge() also) | ||
4025 | */ | ||
4026 | if (anon) | ||
4027 | mem_cgroup_uncharge_page(used); | ||
4028 | } | ||
4029 | |||
4030 | /* | ||
4031 | * At replace page cache, newpage is not under any memcg but it's on | ||
4032 | * LRU. So, this function doesn't touch res_counter but handles LRU | ||
4033 | * in correct way. Both pages are locked so we cannot race with uncharge. | ||
4034 | */ | ||
4035 | void mem_cgroup_replace_page_cache(struct page *oldpage, | ||
4036 | struct page *newpage) | ||
4037 | { | ||
4038 | struct mem_cgroup *memcg = NULL; | ||
4039 | struct page_cgroup *pc; | ||
4040 | |||
4041 | if (mem_cgroup_disabled()) | ||
4042 | return; | ||
4043 | |||
4044 | pc = lookup_page_cgroup(oldpage); | ||
4045 | /* fix accounting on old pages */ | ||
4046 | lock_page_cgroup(pc); | ||
4047 | if (PageCgroupUsed(pc)) { | ||
4048 | memcg = pc->mem_cgroup; | ||
4049 | mem_cgroup_charge_statistics(memcg, oldpage, false, -1); | ||
4050 | ClearPageCgroupUsed(pc); | ||
4051 | } | ||
4052 | unlock_page_cgroup(pc); | ||
4053 | |||
4054 | /* | ||
4055 | * When called from shmem_replace_page(), in some cases the | ||
4056 | * oldpage has already been charged, and in some cases not. | ||
4057 | */ | ||
4058 | if (!memcg) | ||
4059 | return; | ||
4060 | /* | ||
4061 | * Even if newpage->mapping was NULL before starting replacement, | ||
4062 | * the newpage may be on LRU(or pagevec for LRU) already. We lock | ||
4063 | * LRU while we overwrite pc->mem_cgroup. | ||
4064 | */ | ||
4065 | commit_charge(newpage, memcg, 1, false, true); | ||
4066 | } | ||
4067 | |||
4068 | #ifdef CONFIG_DEBUG_VM | 3688 | #ifdef CONFIG_DEBUG_VM |
4069 | static struct page_cgroup *lookup_page_cgroup_used(struct page *page) | 3689 | static struct page_cgroup *lookup_page_cgroup_used(struct page *page) |
4070 | { | 3690 | { |
@@ -4263,7 +3883,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, | |||
4263 | gfp_mask, &nr_scanned); | 3883 | gfp_mask, &nr_scanned); |
4264 | nr_reclaimed += reclaimed; | 3884 | nr_reclaimed += reclaimed; |
4265 | *total_scanned += nr_scanned; | 3885 | *total_scanned += nr_scanned; |
4266 | spin_lock(&mctz->lock); | 3886 | spin_lock_irq(&mctz->lock); |
4267 | 3887 | ||
4268 | /* | 3888 | /* |
4269 | * If we failed to reclaim anything from this memory cgroup | 3889 | * If we failed to reclaim anything from this memory cgroup |
@@ -4303,7 +3923,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, | |||
4303 | */ | 3923 | */ |
4304 | /* If excess == 0, no tree ops */ | 3924 | /* If excess == 0, no tree ops */ |
4305 | __mem_cgroup_insert_exceeded(mz, mctz, excess); | 3925 | __mem_cgroup_insert_exceeded(mz, mctz, excess); |
4306 | spin_unlock(&mctz->lock); | 3926 | spin_unlock_irq(&mctz->lock); |
4307 | css_put(&mz->memcg->css); | 3927 | css_put(&mz->memcg->css); |
4308 | loop++; | 3928 | loop++; |
4309 | /* | 3929 | /* |
@@ -6265,9 +5885,9 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma, | |||
6265 | if (page) { | 5885 | if (page) { |
6266 | pc = lookup_page_cgroup(page); | 5886 | pc = lookup_page_cgroup(page); |
6267 | /* | 5887 | /* |
6268 | * Do only loose check w/o page_cgroup lock. | 5888 | * Do only loose check w/o serialization. |
6269 | * mem_cgroup_move_account() checks the pc is valid or not under | 5889 | * mem_cgroup_move_account() checks the pc is valid or |
6270 | * the lock. | 5890 | * not under LRU exclusion. |
6271 | */ | 5891 | */ |
6272 | if (PageCgroupUsed(pc) && pc->mem_cgroup == mc.from) { | 5892 | if (PageCgroupUsed(pc) && pc->mem_cgroup == mc.from) { |
6273 | ret = MC_TARGET_PAGE; | 5893 | ret = MC_TARGET_PAGE; |
@@ -6729,6 +6349,67 @@ static void __init enable_swap_cgroup(void) | |||
6729 | } | 6349 | } |
6730 | #endif | 6350 | #endif |
6731 | 6351 | ||
6352 | #ifdef CONFIG_MEMCG_SWAP | ||
6353 | /** | ||
6354 | * mem_cgroup_swapout - transfer a memsw charge to swap | ||
6355 | * @page: page whose memsw charge to transfer | ||
6356 | * @entry: swap entry to move the charge to | ||
6357 | * | ||
6358 | * Transfer the memsw charge of @page to @entry. | ||
6359 | */ | ||
6360 | void mem_cgroup_swapout(struct page *page, swp_entry_t entry) | ||
6361 | { | ||
6362 | struct page_cgroup *pc; | ||
6363 | unsigned short oldid; | ||
6364 | |||
6365 | VM_BUG_ON_PAGE(PageLRU(page), page); | ||
6366 | VM_BUG_ON_PAGE(page_count(page), page); | ||
6367 | |||
6368 | if (!do_swap_account) | ||
6369 | return; | ||
6370 | |||
6371 | pc = lookup_page_cgroup(page); | ||
6372 | |||
6373 | /* Readahead page, never charged */ | ||
6374 | if (!PageCgroupUsed(pc)) | ||
6375 | return; | ||
6376 | |||
6377 | VM_BUG_ON_PAGE(!(pc->flags & PCG_MEMSW), page); | ||
6378 | |||
6379 | oldid = swap_cgroup_record(entry, mem_cgroup_id(pc->mem_cgroup)); | ||
6380 | VM_BUG_ON_PAGE(oldid, page); | ||
6381 | |||
6382 | pc->flags &= ~PCG_MEMSW; | ||
6383 | css_get(&pc->mem_cgroup->css); | ||
6384 | mem_cgroup_swap_statistics(pc->mem_cgroup, true); | ||
6385 | } | ||
6386 | |||
6387 | /** | ||
6388 | * mem_cgroup_uncharge_swap - uncharge a swap entry | ||
6389 | * @entry: swap entry to uncharge | ||
6390 | * | ||
6391 | * Drop the memsw charge associated with @entry. | ||
6392 | */ | ||
6393 | void mem_cgroup_uncharge_swap(swp_entry_t entry) | ||
6394 | { | ||
6395 | struct mem_cgroup *memcg; | ||
6396 | unsigned short id; | ||
6397 | |||
6398 | if (!do_swap_account) | ||
6399 | return; | ||
6400 | |||
6401 | id = swap_cgroup_record(entry, 0); | ||
6402 | rcu_read_lock(); | ||
6403 | memcg = mem_cgroup_lookup(id); | ||
6404 | if (memcg) { | ||
6405 | res_counter_uncharge(&memcg->memsw, PAGE_SIZE); | ||
6406 | mem_cgroup_swap_statistics(memcg, false); | ||
6407 | css_put(&memcg->css); | ||
6408 | } | ||
6409 | rcu_read_unlock(); | ||
6410 | } | ||
6411 | #endif | ||
6412 | |||
6732 | /** | 6413 | /** |
6733 | * mem_cgroup_try_charge - try charging a page | 6414 | * mem_cgroup_try_charge - try charging a page |
6734 | * @page: page to charge | 6415 | * @page: page to charge |
@@ -6831,7 +6512,7 @@ void mem_cgroup_commit_charge(struct page *page, struct mem_cgroup *memcg, | |||
6831 | VM_BUG_ON_PAGE(!PageTransHuge(page), page); | 6512 | VM_BUG_ON_PAGE(!PageTransHuge(page), page); |
6832 | } | 6513 | } |
6833 | 6514 | ||
6834 | commit_charge(page, memcg, nr_pages, PageAnon(page), lrucare); | 6515 | commit_charge(page, memcg, nr_pages, lrucare); |
6835 | 6516 | ||
6836 | if (do_swap_account && PageSwapCache(page)) { | 6517 | if (do_swap_account && PageSwapCache(page)) { |
6837 | swp_entry_t entry = { .val = page_private(page) }; | 6518 | swp_entry_t entry = { .val = page_private(page) }; |
@@ -6873,6 +6554,139 @@ void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg) | |||
6873 | cancel_charge(memcg, nr_pages); | 6554 | cancel_charge(memcg, nr_pages); |
6874 | } | 6555 | } |
6875 | 6556 | ||
6557 | /** | ||
6558 | * mem_cgroup_uncharge - uncharge a page | ||
6559 | * @page: page to uncharge | ||
6560 | * | ||
6561 | * Uncharge a page previously charged with mem_cgroup_try_charge() and | ||
6562 | * mem_cgroup_commit_charge(). | ||
6563 | */ | ||
6564 | void mem_cgroup_uncharge(struct page *page) | ||
6565 | { | ||
6566 | struct memcg_batch_info *batch; | ||
6567 | unsigned int nr_pages = 1; | ||
6568 | struct mem_cgroup *memcg; | ||
6569 | struct page_cgroup *pc; | ||
6570 | unsigned long pc_flags; | ||
6571 | unsigned long flags; | ||
6572 | |||
6573 | VM_BUG_ON_PAGE(PageLRU(page), page); | ||
6574 | VM_BUG_ON_PAGE(page_count(page), page); | ||
6575 | |||
6576 | if (mem_cgroup_disabled()) | ||
6577 | return; | ||
6578 | |||
6579 | pc = lookup_page_cgroup(page); | ||
6580 | |||
6581 | /* Every final put_page() ends up here */ | ||
6582 | if (!PageCgroupUsed(pc)) | ||
6583 | return; | ||
6584 | |||
6585 | if (PageTransHuge(page)) { | ||
6586 | nr_pages <<= compound_order(page); | ||
6587 | VM_BUG_ON_PAGE(!PageTransHuge(page), page); | ||
6588 | } | ||
6589 | /* | ||
6590 | * Nobody should be changing or seriously looking at | ||
6591 | * pc->mem_cgroup and pc->flags at this point, we have fully | ||
6592 | * exclusive access to the page. | ||
6593 | */ | ||
6594 | memcg = pc->mem_cgroup; | ||
6595 | pc_flags = pc->flags; | ||
6596 | pc->flags = 0; | ||
6597 | |||
6598 | local_irq_save(flags); | ||
6599 | |||
6600 | if (nr_pages > 1) | ||
6601 | goto direct; | ||
6602 | if (unlikely(test_thread_flag(TIF_MEMDIE))) | ||
6603 | goto direct; | ||
6604 | batch = ¤t->memcg_batch; | ||
6605 | if (!batch->do_batch) | ||
6606 | goto direct; | ||
6607 | if (batch->memcg && batch->memcg != memcg) | ||
6608 | goto direct; | ||
6609 | if (!batch->memcg) | ||
6610 | batch->memcg = memcg; | ||
6611 | if (pc_flags & PCG_MEM) | ||
6612 | batch->nr_pages++; | ||
6613 | if (pc_flags & PCG_MEMSW) | ||
6614 | batch->memsw_nr_pages++; | ||
6615 | goto out; | ||
6616 | direct: | ||
6617 | if (pc_flags & PCG_MEM) | ||
6618 | res_counter_uncharge(&memcg->res, nr_pages * PAGE_SIZE); | ||
6619 | if (pc_flags & PCG_MEMSW) | ||
6620 | res_counter_uncharge(&memcg->memsw, nr_pages * PAGE_SIZE); | ||
6621 | memcg_oom_recover(memcg); | ||
6622 | out: | ||
6623 | mem_cgroup_charge_statistics(memcg, page, -nr_pages); | ||
6624 | memcg_check_events(memcg, page); | ||
6625 | |||
6626 | local_irq_restore(flags); | ||
6627 | } | ||
6628 | |||
6629 | /** | ||
6630 | * mem_cgroup_migrate - migrate a charge to another page | ||
6631 | * @oldpage: currently charged page | ||
6632 | * @newpage: page to transfer the charge to | ||
6633 | * @lrucare: both pages might be on the LRU already | ||
6634 | * | ||
6635 | * Migrate the charge from @oldpage to @newpage. | ||
6636 | * | ||
6637 | * Both pages must be locked, @newpage->mapping must be set up. | ||
6638 | */ | ||
6639 | void mem_cgroup_migrate(struct page *oldpage, struct page *newpage, | ||
6640 | bool lrucare) | ||
6641 | { | ||
6642 | unsigned int nr_pages = 1; | ||
6643 | struct page_cgroup *pc; | ||
6644 | int isolated; | ||
6645 | |||
6646 | VM_BUG_ON_PAGE(!PageLocked(oldpage), oldpage); | ||
6647 | VM_BUG_ON_PAGE(!PageLocked(newpage), newpage); | ||
6648 | VM_BUG_ON_PAGE(!lrucare && PageLRU(oldpage), oldpage); | ||
6649 | VM_BUG_ON_PAGE(!lrucare && PageLRU(newpage), newpage); | ||
6650 | VM_BUG_ON_PAGE(PageAnon(oldpage) != PageAnon(newpage), newpage); | ||
6651 | |||
6652 | if (mem_cgroup_disabled()) | ||
6653 | return; | ||
6654 | |||
6655 | /* Page cache replacement: new page already charged? */ | ||
6656 | pc = lookup_page_cgroup(newpage); | ||
6657 | if (PageCgroupUsed(pc)) | ||
6658 | return; | ||
6659 | |||
6660 | /* Re-entrant migration: old page already uncharged? */ | ||
6661 | pc = lookup_page_cgroup(oldpage); | ||
6662 | if (!PageCgroupUsed(pc)) | ||
6663 | return; | ||
6664 | |||
6665 | VM_BUG_ON_PAGE(!(pc->flags & PCG_MEM), oldpage); | ||
6666 | VM_BUG_ON_PAGE(do_swap_account && !(pc->flags & PCG_MEMSW), oldpage); | ||
6667 | |||
6668 | if (PageTransHuge(oldpage)) { | ||
6669 | nr_pages <<= compound_order(oldpage); | ||
6670 | VM_BUG_ON_PAGE(!PageTransHuge(oldpage), oldpage); | ||
6671 | VM_BUG_ON_PAGE(!PageTransHuge(newpage), newpage); | ||
6672 | } | ||
6673 | |||
6674 | if (lrucare) | ||
6675 | lock_page_lru(oldpage, &isolated); | ||
6676 | |||
6677 | pc->flags = 0; | ||
6678 | |||
6679 | if (lrucare) | ||
6680 | unlock_page_lru(oldpage, isolated); | ||
6681 | |||
6682 | local_irq_disable(); | ||
6683 | mem_cgroup_charge_statistics(pc->mem_cgroup, oldpage, -nr_pages); | ||
6684 | memcg_check_events(pc->mem_cgroup, oldpage); | ||
6685 | local_irq_enable(); | ||
6686 | |||
6687 | commit_charge(newpage, pc->mem_cgroup, nr_pages, lrucare); | ||
6688 | } | ||
6689 | |||
6876 | /* | 6690 | /* |
6877 | * subsys_initcall() for memory controller. | 6691 | * subsys_initcall() for memory controller. |
6878 | * | 6692 | * |