diff options
Diffstat (limited to 'mm/memcontrol.c')
-rw-r--r-- | mm/memcontrol.c | 669 |
1 files changed, 389 insertions, 280 deletions
diff --git a/mm/memcontrol.c b/mm/memcontrol.c index da53a252b259..010f9166fa6e 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c | |||
@@ -73,15 +73,6 @@ static int really_do_swap_account __initdata = 0; | |||
73 | #define do_swap_account (0) | 73 | #define do_swap_account (0) |
74 | #endif | 74 | #endif |
75 | 75 | ||
76 | /* | ||
77 | * Per memcg event counter is incremented at every pagein/pageout. This counter | ||
78 | * is used for trigger some periodic events. This is straightforward and better | ||
79 | * than using jiffies etc. to handle periodic memcg event. | ||
80 | * | ||
81 | * These values will be used as !((event) & ((1 <<(thresh)) - 1)) | ||
82 | */ | ||
83 | #define THRESHOLDS_EVENTS_THRESH (7) /* once in 128 */ | ||
84 | #define SOFTLIMIT_EVENTS_THRESH (10) /* once in 1024 */ | ||
85 | 76 | ||
86 | /* | 77 | /* |
87 | * Statistics for memory cgroup. | 78 | * Statistics for memory cgroup. |
@@ -93,19 +84,36 @@ enum mem_cgroup_stat_index { | |||
93 | MEM_CGROUP_STAT_CACHE, /* # of pages charged as cache */ | 84 | MEM_CGROUP_STAT_CACHE, /* # of pages charged as cache */ |
94 | MEM_CGROUP_STAT_RSS, /* # of pages charged as anon rss */ | 85 | MEM_CGROUP_STAT_RSS, /* # of pages charged as anon rss */ |
95 | MEM_CGROUP_STAT_FILE_MAPPED, /* # of pages charged as file rss */ | 86 | MEM_CGROUP_STAT_FILE_MAPPED, /* # of pages charged as file rss */ |
96 | MEM_CGROUP_STAT_PGPGIN_COUNT, /* # of pages paged in */ | ||
97 | MEM_CGROUP_STAT_PGPGOUT_COUNT, /* # of pages paged out */ | ||
98 | MEM_CGROUP_STAT_SWAPOUT, /* # of pages, swapped out */ | 87 | MEM_CGROUP_STAT_SWAPOUT, /* # of pages, swapped out */ |
99 | MEM_CGROUP_STAT_DATA, /* end of data requires synchronization */ | 88 | MEM_CGROUP_STAT_DATA, /* end of data requires synchronization */ |
100 | /* incremented at every pagein/pageout */ | ||
101 | MEM_CGROUP_EVENTS = MEM_CGROUP_STAT_DATA, | ||
102 | MEM_CGROUP_ON_MOVE, /* someone is moving account between groups */ | 89 | MEM_CGROUP_ON_MOVE, /* someone is moving account between groups */ |
103 | |||
104 | MEM_CGROUP_STAT_NSTATS, | 90 | MEM_CGROUP_STAT_NSTATS, |
105 | }; | 91 | }; |
106 | 92 | ||
93 | enum mem_cgroup_events_index { | ||
94 | MEM_CGROUP_EVENTS_PGPGIN, /* # of pages paged in */ | ||
95 | MEM_CGROUP_EVENTS_PGPGOUT, /* # of pages paged out */ | ||
96 | MEM_CGROUP_EVENTS_COUNT, /* # of pages paged in/out */ | ||
97 | MEM_CGROUP_EVENTS_NSTATS, | ||
98 | }; | ||
99 | /* | ||
100 | * Per memcg event counter is incremented at every pagein/pageout. With THP, | ||
101 | * it will be incremated by the number of pages. This counter is used for | ||
102 | * for trigger some periodic events. This is straightforward and better | ||
103 | * than using jiffies etc. to handle periodic memcg event. | ||
104 | */ | ||
105 | enum mem_cgroup_events_target { | ||
106 | MEM_CGROUP_TARGET_THRESH, | ||
107 | MEM_CGROUP_TARGET_SOFTLIMIT, | ||
108 | MEM_CGROUP_NTARGETS, | ||
109 | }; | ||
110 | #define THRESHOLDS_EVENTS_TARGET (128) | ||
111 | #define SOFTLIMIT_EVENTS_TARGET (1024) | ||
112 | |||
107 | struct mem_cgroup_stat_cpu { | 113 | struct mem_cgroup_stat_cpu { |
108 | s64 count[MEM_CGROUP_STAT_NSTATS]; | 114 | long count[MEM_CGROUP_STAT_NSTATS]; |
115 | unsigned long events[MEM_CGROUP_EVENTS_NSTATS]; | ||
116 | unsigned long targets[MEM_CGROUP_NTARGETS]; | ||
109 | }; | 117 | }; |
110 | 118 | ||
111 | /* | 119 | /* |
@@ -218,12 +226,6 @@ struct mem_cgroup { | |||
218 | * per zone LRU lists. | 226 | * per zone LRU lists. |
219 | */ | 227 | */ |
220 | struct mem_cgroup_lru_info info; | 228 | struct mem_cgroup_lru_info info; |
221 | |||
222 | /* | ||
223 | protect against reclaim related member. | ||
224 | */ | ||
225 | spinlock_t reclaim_param_lock; | ||
226 | |||
227 | /* | 229 | /* |
228 | * While reclaiming in a hierarchy, we cache the last child we | 230 | * While reclaiming in a hierarchy, we cache the last child we |
229 | * reclaimed from. | 231 | * reclaimed from. |
@@ -327,13 +329,6 @@ enum charge_type { | |||
327 | NR_CHARGE_TYPE, | 329 | NR_CHARGE_TYPE, |
328 | }; | 330 | }; |
329 | 331 | ||
330 | /* only for here (for easy reading.) */ | ||
331 | #define PCGF_CACHE (1UL << PCG_CACHE) | ||
332 | #define PCGF_USED (1UL << PCG_USED) | ||
333 | #define PCGF_LOCK (1UL << PCG_LOCK) | ||
334 | /* Not used, but added here for completeness */ | ||
335 | #define PCGF_ACCT (1UL << PCG_ACCT) | ||
336 | |||
337 | /* for encoding cft->private value on file */ | 332 | /* for encoding cft->private value on file */ |
338 | #define _MEM (0) | 333 | #define _MEM (0) |
339 | #define _MEMSWAP (1) | 334 | #define _MEMSWAP (1) |
@@ -371,14 +366,10 @@ struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *mem) | |||
371 | } | 366 | } |
372 | 367 | ||
373 | static struct mem_cgroup_per_zone * | 368 | static struct mem_cgroup_per_zone * |
374 | page_cgroup_zoneinfo(struct page_cgroup *pc) | 369 | page_cgroup_zoneinfo(struct mem_cgroup *mem, struct page *page) |
375 | { | 370 | { |
376 | struct mem_cgroup *mem = pc->mem_cgroup; | 371 | int nid = page_to_nid(page); |
377 | int nid = page_cgroup_nid(pc); | 372 | int zid = page_zonenum(page); |
378 | int zid = page_cgroup_zid(pc); | ||
379 | |||
380 | if (!mem) | ||
381 | return NULL; | ||
382 | 373 | ||
383 | return mem_cgroup_zoneinfo(mem, nid, zid); | 374 | return mem_cgroup_zoneinfo(mem, nid, zid); |
384 | } | 375 | } |
@@ -504,11 +495,6 @@ static void mem_cgroup_remove_from_trees(struct mem_cgroup *mem) | |||
504 | } | 495 | } |
505 | } | 496 | } |
506 | 497 | ||
507 | static inline unsigned long mem_cgroup_get_excess(struct mem_cgroup *mem) | ||
508 | { | ||
509 | return res_counter_soft_limit_excess(&mem->res) >> PAGE_SHIFT; | ||
510 | } | ||
511 | |||
512 | static struct mem_cgroup_per_zone * | 498 | static struct mem_cgroup_per_zone * |
513 | __mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) | 499 | __mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) |
514 | { | 500 | { |
@@ -565,11 +551,11 @@ mem_cgroup_largest_soft_limit_node(struct mem_cgroup_tree_per_zone *mctz) | |||
565 | * common workload, threashold and synchonization as vmstat[] should be | 551 | * common workload, threashold and synchonization as vmstat[] should be |
566 | * implemented. | 552 | * implemented. |
567 | */ | 553 | */ |
568 | static s64 mem_cgroup_read_stat(struct mem_cgroup *mem, | 554 | static long mem_cgroup_read_stat(struct mem_cgroup *mem, |
569 | enum mem_cgroup_stat_index idx) | 555 | enum mem_cgroup_stat_index idx) |
570 | { | 556 | { |
557 | long val = 0; | ||
571 | int cpu; | 558 | int cpu; |
572 | s64 val = 0; | ||
573 | 559 | ||
574 | get_online_cpus(); | 560 | get_online_cpus(); |
575 | for_each_online_cpu(cpu) | 561 | for_each_online_cpu(cpu) |
@@ -583,9 +569,9 @@ static s64 mem_cgroup_read_stat(struct mem_cgroup *mem, | |||
583 | return val; | 569 | return val; |
584 | } | 570 | } |
585 | 571 | ||
586 | static s64 mem_cgroup_local_usage(struct mem_cgroup *mem) | 572 | static long mem_cgroup_local_usage(struct mem_cgroup *mem) |
587 | { | 573 | { |
588 | s64 ret; | 574 | long ret; |
589 | 575 | ||
590 | ret = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_RSS); | 576 | ret = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_RSS); |
591 | ret += mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_CACHE); | 577 | ret += mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_CACHE); |
@@ -599,6 +585,22 @@ static void mem_cgroup_swap_statistics(struct mem_cgroup *mem, | |||
599 | this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_SWAPOUT], val); | 585 | this_cpu_add(mem->stat->count[MEM_CGROUP_STAT_SWAPOUT], val); |
600 | } | 586 | } |
601 | 587 | ||
588 | static unsigned long mem_cgroup_read_events(struct mem_cgroup *mem, | ||
589 | enum mem_cgroup_events_index idx) | ||
590 | { | ||
591 | unsigned long val = 0; | ||
592 | int cpu; | ||
593 | |||
594 | for_each_online_cpu(cpu) | ||
595 | val += per_cpu(mem->stat->events[idx], cpu); | ||
596 | #ifdef CONFIG_HOTPLUG_CPU | ||
597 | spin_lock(&mem->pcp_counter_lock); | ||
598 | val += mem->nocpu_base.events[idx]; | ||
599 | spin_unlock(&mem->pcp_counter_lock); | ||
600 | #endif | ||
601 | return val; | ||
602 | } | ||
603 | |||
602 | static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, | 604 | static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, |
603 | bool file, int nr_pages) | 605 | bool file, int nr_pages) |
604 | { | 606 | { |
@@ -611,13 +613,13 @@ static void mem_cgroup_charge_statistics(struct mem_cgroup *mem, | |||
611 | 613 | ||
612 | /* pagein of a big page is an event. So, ignore page size */ | 614 | /* pagein of a big page is an event. So, ignore page size */ |
613 | if (nr_pages > 0) | 615 | if (nr_pages > 0) |
614 | __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGIN_COUNT]); | 616 | __this_cpu_inc(mem->stat->events[MEM_CGROUP_EVENTS_PGPGIN]); |
615 | else { | 617 | else { |
616 | __this_cpu_inc(mem->stat->count[MEM_CGROUP_STAT_PGPGOUT_COUNT]); | 618 | __this_cpu_inc(mem->stat->events[MEM_CGROUP_EVENTS_PGPGOUT]); |
617 | nr_pages = -nr_pages; /* for event */ | 619 | nr_pages = -nr_pages; /* for event */ |
618 | } | 620 | } |
619 | 621 | ||
620 | __this_cpu_add(mem->stat->count[MEM_CGROUP_EVENTS], nr_pages); | 622 | __this_cpu_add(mem->stat->events[MEM_CGROUP_EVENTS_COUNT], nr_pages); |
621 | 623 | ||
622 | preempt_enable(); | 624 | preempt_enable(); |
623 | } | 625 | } |
@@ -637,13 +639,34 @@ static unsigned long mem_cgroup_get_local_zonestat(struct mem_cgroup *mem, | |||
637 | return total; | 639 | return total; |
638 | } | 640 | } |
639 | 641 | ||
640 | static bool __memcg_event_check(struct mem_cgroup *mem, int event_mask_shift) | 642 | static bool __memcg_event_check(struct mem_cgroup *mem, int target) |
641 | { | 643 | { |
642 | s64 val; | 644 | unsigned long val, next; |
645 | |||
646 | val = this_cpu_read(mem->stat->events[MEM_CGROUP_EVENTS_COUNT]); | ||
647 | next = this_cpu_read(mem->stat->targets[target]); | ||
648 | /* from time_after() in jiffies.h */ | ||
649 | return ((long)next - (long)val < 0); | ||
650 | } | ||
651 | |||
652 | static void __mem_cgroup_target_update(struct mem_cgroup *mem, int target) | ||
653 | { | ||
654 | unsigned long val, next; | ||
643 | 655 | ||
644 | val = this_cpu_read(mem->stat->count[MEM_CGROUP_EVENTS]); | 656 | val = this_cpu_read(mem->stat->events[MEM_CGROUP_EVENTS_COUNT]); |
645 | 657 | ||
646 | return !(val & ((1 << event_mask_shift) - 1)); | 658 | switch (target) { |
659 | case MEM_CGROUP_TARGET_THRESH: | ||
660 | next = val + THRESHOLDS_EVENTS_TARGET; | ||
661 | break; | ||
662 | case MEM_CGROUP_TARGET_SOFTLIMIT: | ||
663 | next = val + SOFTLIMIT_EVENTS_TARGET; | ||
664 | break; | ||
665 | default: | ||
666 | return; | ||
667 | } | ||
668 | |||
669 | this_cpu_write(mem->stat->targets[target], next); | ||
647 | } | 670 | } |
648 | 671 | ||
649 | /* | 672 | /* |
@@ -653,10 +676,15 @@ static bool __memcg_event_check(struct mem_cgroup *mem, int event_mask_shift) | |||
653 | static void memcg_check_events(struct mem_cgroup *mem, struct page *page) | 676 | static void memcg_check_events(struct mem_cgroup *mem, struct page *page) |
654 | { | 677 | { |
655 | /* threshold event is triggered in finer grain than soft limit */ | 678 | /* threshold event is triggered in finer grain than soft limit */ |
656 | if (unlikely(__memcg_event_check(mem, THRESHOLDS_EVENTS_THRESH))) { | 679 | if (unlikely(__memcg_event_check(mem, MEM_CGROUP_TARGET_THRESH))) { |
657 | mem_cgroup_threshold(mem); | 680 | mem_cgroup_threshold(mem); |
658 | if (unlikely(__memcg_event_check(mem, SOFTLIMIT_EVENTS_THRESH))) | 681 | __mem_cgroup_target_update(mem, MEM_CGROUP_TARGET_THRESH); |
682 | if (unlikely(__memcg_event_check(mem, | ||
683 | MEM_CGROUP_TARGET_SOFTLIMIT))){ | ||
659 | mem_cgroup_update_tree(mem, page); | 684 | mem_cgroup_update_tree(mem, page); |
685 | __mem_cgroup_target_update(mem, | ||
686 | MEM_CGROUP_TARGET_SOFTLIMIT); | ||
687 | } | ||
660 | } | 688 | } |
661 | } | 689 | } |
662 | 690 | ||
@@ -815,7 +843,7 @@ void mem_cgroup_del_lru_list(struct page *page, enum lru_list lru) | |||
815 | * We don't check PCG_USED bit. It's cleared when the "page" is finally | 843 | * We don't check PCG_USED bit. It's cleared when the "page" is finally |
816 | * removed from global LRU. | 844 | * removed from global LRU. |
817 | */ | 845 | */ |
818 | mz = page_cgroup_zoneinfo(pc); | 846 | mz = page_cgroup_zoneinfo(pc->mem_cgroup, page); |
819 | /* huge page split is done under lru_lock. so, we have no races. */ | 847 | /* huge page split is done under lru_lock. so, we have no races. */ |
820 | MEM_CGROUP_ZSTAT(mz, lru) -= 1 << compound_order(page); | 848 | MEM_CGROUP_ZSTAT(mz, lru) -= 1 << compound_order(page); |
821 | if (mem_cgroup_is_root(pc->mem_cgroup)) | 849 | if (mem_cgroup_is_root(pc->mem_cgroup)) |
@@ -829,6 +857,32 @@ void mem_cgroup_del_lru(struct page *page) | |||
829 | mem_cgroup_del_lru_list(page, page_lru(page)); | 857 | mem_cgroup_del_lru_list(page, page_lru(page)); |
830 | } | 858 | } |
831 | 859 | ||
860 | /* | ||
861 | * Writeback is about to end against a page which has been marked for immediate | ||
862 | * reclaim. If it still appears to be reclaimable, move it to the tail of the | ||
863 | * inactive list. | ||
864 | */ | ||
865 | void mem_cgroup_rotate_reclaimable_page(struct page *page) | ||
866 | { | ||
867 | struct mem_cgroup_per_zone *mz; | ||
868 | struct page_cgroup *pc; | ||
869 | enum lru_list lru = page_lru(page); | ||
870 | |||
871 | if (mem_cgroup_disabled()) | ||
872 | return; | ||
873 | |||
874 | pc = lookup_page_cgroup(page); | ||
875 | /* unused or root page is not rotated. */ | ||
876 | if (!PageCgroupUsed(pc)) | ||
877 | return; | ||
878 | /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */ | ||
879 | smp_rmb(); | ||
880 | if (mem_cgroup_is_root(pc->mem_cgroup)) | ||
881 | return; | ||
882 | mz = page_cgroup_zoneinfo(pc->mem_cgroup, page); | ||
883 | list_move_tail(&pc->lru, &mz->lists[lru]); | ||
884 | } | ||
885 | |||
832 | void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru) | 886 | void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru) |
833 | { | 887 | { |
834 | struct mem_cgroup_per_zone *mz; | 888 | struct mem_cgroup_per_zone *mz; |
@@ -845,7 +899,7 @@ void mem_cgroup_rotate_lru_list(struct page *page, enum lru_list lru) | |||
845 | smp_rmb(); | 899 | smp_rmb(); |
846 | if (mem_cgroup_is_root(pc->mem_cgroup)) | 900 | if (mem_cgroup_is_root(pc->mem_cgroup)) |
847 | return; | 901 | return; |
848 | mz = page_cgroup_zoneinfo(pc); | 902 | mz = page_cgroup_zoneinfo(pc->mem_cgroup, page); |
849 | list_move(&pc->lru, &mz->lists[lru]); | 903 | list_move(&pc->lru, &mz->lists[lru]); |
850 | } | 904 | } |
851 | 905 | ||
@@ -862,7 +916,7 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru) | |||
862 | return; | 916 | return; |
863 | /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */ | 917 | /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */ |
864 | smp_rmb(); | 918 | smp_rmb(); |
865 | mz = page_cgroup_zoneinfo(pc); | 919 | mz = page_cgroup_zoneinfo(pc->mem_cgroup, page); |
866 | /* huge page split is done under lru_lock. so, we have no races. */ | 920 | /* huge page split is done under lru_lock. so, we have no races. */ |
867 | MEM_CGROUP_ZSTAT(mz, lru) += 1 << compound_order(page); | 921 | MEM_CGROUP_ZSTAT(mz, lru) += 1 << compound_order(page); |
868 | SetPageCgroupAcctLRU(pc); | 922 | SetPageCgroupAcctLRU(pc); |
@@ -872,18 +926,28 @@ void mem_cgroup_add_lru_list(struct page *page, enum lru_list lru) | |||
872 | } | 926 | } |
873 | 927 | ||
874 | /* | 928 | /* |
875 | * At handling SwapCache, pc->mem_cgroup may be changed while it's linked to | 929 | * At handling SwapCache and other FUSE stuff, pc->mem_cgroup may be changed |
876 | * lru because the page may.be reused after it's fully uncharged (because of | 930 | * while it's linked to lru because the page may be reused after it's fully |
877 | * SwapCache behavior).To handle that, unlink page_cgroup from LRU when charge | 931 | * uncharged. To handle that, unlink page_cgroup from LRU when charge it again. |
878 | * it again. This function is only used to charge SwapCache. It's done under | 932 | * It's done under lock_page and expected that zone->lru_lock isnever held. |
879 | * lock_page and expected that zone->lru_lock is never held. | ||
880 | */ | 933 | */ |
881 | static void mem_cgroup_lru_del_before_commit_swapcache(struct page *page) | 934 | static void mem_cgroup_lru_del_before_commit(struct page *page) |
882 | { | 935 | { |
883 | unsigned long flags; | 936 | unsigned long flags; |
884 | struct zone *zone = page_zone(page); | 937 | struct zone *zone = page_zone(page); |
885 | struct page_cgroup *pc = lookup_page_cgroup(page); | 938 | struct page_cgroup *pc = lookup_page_cgroup(page); |
886 | 939 | ||
940 | /* | ||
941 | * Doing this check without taking ->lru_lock seems wrong but this | ||
942 | * is safe. Because if page_cgroup's USED bit is unset, the page | ||
943 | * will not be added to any memcg's LRU. If page_cgroup's USED bit is | ||
944 | * set, the commit after this will fail, anyway. | ||
945 | * This all charge/uncharge is done under some mutual execustion. | ||
946 | * So, we don't need to taking care of changes in USED bit. | ||
947 | */ | ||
948 | if (likely(!PageLRU(page))) | ||
949 | return; | ||
950 | |||
887 | spin_lock_irqsave(&zone->lru_lock, flags); | 951 | spin_lock_irqsave(&zone->lru_lock, flags); |
888 | /* | 952 | /* |
889 | * Forget old LRU when this page_cgroup is *not* used. This Used bit | 953 | * Forget old LRU when this page_cgroup is *not* used. This Used bit |
@@ -894,12 +958,15 @@ static void mem_cgroup_lru_del_before_commit_swapcache(struct page *page) | |||
894 | spin_unlock_irqrestore(&zone->lru_lock, flags); | 958 | spin_unlock_irqrestore(&zone->lru_lock, flags); |
895 | } | 959 | } |
896 | 960 | ||
897 | static void mem_cgroup_lru_add_after_commit_swapcache(struct page *page) | 961 | static void mem_cgroup_lru_add_after_commit(struct page *page) |
898 | { | 962 | { |
899 | unsigned long flags; | 963 | unsigned long flags; |
900 | struct zone *zone = page_zone(page); | 964 | struct zone *zone = page_zone(page); |
901 | struct page_cgroup *pc = lookup_page_cgroup(page); | 965 | struct page_cgroup *pc = lookup_page_cgroup(page); |
902 | 966 | ||
967 | /* taking care of that the page is added to LRU while we commit it */ | ||
968 | if (likely(!PageLRU(page))) | ||
969 | return; | ||
903 | spin_lock_irqsave(&zone->lru_lock, flags); | 970 | spin_lock_irqsave(&zone->lru_lock, flags); |
904 | /* link when the page is linked to LRU but page_cgroup isn't */ | 971 | /* link when the page is linked to LRU but page_cgroup isn't */ |
905 | if (PageLRU(page) && !PageCgroupAcctLRU(pc)) | 972 | if (PageLRU(page) && !PageCgroupAcctLRU(pc)) |
@@ -1032,10 +1099,7 @@ mem_cgroup_get_reclaim_stat_from_page(struct page *page) | |||
1032 | return NULL; | 1099 | return NULL; |
1033 | /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */ | 1100 | /* Ensure pc->mem_cgroup is visible after reading PCG_USED. */ |
1034 | smp_rmb(); | 1101 | smp_rmb(); |
1035 | mz = page_cgroup_zoneinfo(pc); | 1102 | mz = page_cgroup_zoneinfo(pc->mem_cgroup, page); |
1036 | if (!mz) | ||
1037 | return NULL; | ||
1038 | |||
1039 | return &mz->reclaim_stat; | 1103 | return &mz->reclaim_stat; |
1040 | } | 1104 | } |
1041 | 1105 | ||
@@ -1067,9 +1131,11 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, | |||
1067 | if (scan >= nr_to_scan) | 1131 | if (scan >= nr_to_scan) |
1068 | break; | 1132 | break; |
1069 | 1133 | ||
1070 | page = pc->page; | ||
1071 | if (unlikely(!PageCgroupUsed(pc))) | 1134 | if (unlikely(!PageCgroupUsed(pc))) |
1072 | continue; | 1135 | continue; |
1136 | |||
1137 | page = lookup_cgroup_page(pc); | ||
1138 | |||
1073 | if (unlikely(!PageLRU(page))) | 1139 | if (unlikely(!PageLRU(page))) |
1074 | continue; | 1140 | continue; |
1075 | 1141 | ||
@@ -1101,49 +1167,32 @@ unsigned long mem_cgroup_isolate_pages(unsigned long nr_to_scan, | |||
1101 | #define mem_cgroup_from_res_counter(counter, member) \ | 1167 | #define mem_cgroup_from_res_counter(counter, member) \ |
1102 | container_of(counter, struct mem_cgroup, member) | 1168 | container_of(counter, struct mem_cgroup, member) |
1103 | 1169 | ||
1104 | static bool mem_cgroup_check_under_limit(struct mem_cgroup *mem) | ||
1105 | { | ||
1106 | if (do_swap_account) { | ||
1107 | if (res_counter_check_under_limit(&mem->res) && | ||
1108 | res_counter_check_under_limit(&mem->memsw)) | ||
1109 | return true; | ||
1110 | } else | ||
1111 | if (res_counter_check_under_limit(&mem->res)) | ||
1112 | return true; | ||
1113 | return false; | ||
1114 | } | ||
1115 | |||
1116 | /** | 1170 | /** |
1117 | * mem_cgroup_check_margin - check if the memory cgroup allows charging | 1171 | * mem_cgroup_margin - calculate chargeable space of a memory cgroup |
1118 | * @mem: memory cgroup to check | 1172 | * @mem: the memory cgroup |
1119 | * @bytes: the number of bytes the caller intends to charge | ||
1120 | * | 1173 | * |
1121 | * Returns a boolean value on whether @mem can be charged @bytes or | 1174 | * Returns the maximum amount of memory @mem can be charged with, in |
1122 | * whether this would exceed the limit. | 1175 | * pages. |
1123 | */ | 1176 | */ |
1124 | static bool mem_cgroup_check_margin(struct mem_cgroup *mem, unsigned long bytes) | 1177 | static unsigned long mem_cgroup_margin(struct mem_cgroup *mem) |
1125 | { | 1178 | { |
1126 | if (!res_counter_check_margin(&mem->res, bytes)) | 1179 | unsigned long long margin; |
1127 | return false; | 1180 | |
1128 | if (do_swap_account && !res_counter_check_margin(&mem->memsw, bytes)) | 1181 | margin = res_counter_margin(&mem->res); |
1129 | return false; | 1182 | if (do_swap_account) |
1130 | return true; | 1183 | margin = min(margin, res_counter_margin(&mem->memsw)); |
1184 | return margin >> PAGE_SHIFT; | ||
1131 | } | 1185 | } |
1132 | 1186 | ||
1133 | static unsigned int get_swappiness(struct mem_cgroup *memcg) | 1187 | static unsigned int get_swappiness(struct mem_cgroup *memcg) |
1134 | { | 1188 | { |
1135 | struct cgroup *cgrp = memcg->css.cgroup; | 1189 | struct cgroup *cgrp = memcg->css.cgroup; |
1136 | unsigned int swappiness; | ||
1137 | 1190 | ||
1138 | /* root ? */ | 1191 | /* root ? */ |
1139 | if (cgrp->parent == NULL) | 1192 | if (cgrp->parent == NULL) |
1140 | return vm_swappiness; | 1193 | return vm_swappiness; |
1141 | 1194 | ||
1142 | spin_lock(&memcg->reclaim_param_lock); | 1195 | return memcg->swappiness; |
1143 | swappiness = memcg->swappiness; | ||
1144 | spin_unlock(&memcg->reclaim_param_lock); | ||
1145 | |||
1146 | return swappiness; | ||
1147 | } | 1196 | } |
1148 | 1197 | ||
1149 | static void mem_cgroup_start_move(struct mem_cgroup *mem) | 1198 | static void mem_cgroup_start_move(struct mem_cgroup *mem) |
@@ -1359,13 +1408,11 @@ mem_cgroup_select_victim(struct mem_cgroup *root_mem) | |||
1359 | 1408 | ||
1360 | rcu_read_unlock(); | 1409 | rcu_read_unlock(); |
1361 | /* Updates scanning parameter */ | 1410 | /* Updates scanning parameter */ |
1362 | spin_lock(&root_mem->reclaim_param_lock); | ||
1363 | if (!css) { | 1411 | if (!css) { |
1364 | /* this means start scan from ID:1 */ | 1412 | /* this means start scan from ID:1 */ |
1365 | root_mem->last_scanned_child = 0; | 1413 | root_mem->last_scanned_child = 0; |
1366 | } else | 1414 | } else |
1367 | root_mem->last_scanned_child = found; | 1415 | root_mem->last_scanned_child = found; |
1368 | spin_unlock(&root_mem->reclaim_param_lock); | ||
1369 | } | 1416 | } |
1370 | 1417 | ||
1371 | return ret; | 1418 | return ret; |
@@ -1394,7 +1441,9 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, | |||
1394 | bool noswap = reclaim_options & MEM_CGROUP_RECLAIM_NOSWAP; | 1441 | bool noswap = reclaim_options & MEM_CGROUP_RECLAIM_NOSWAP; |
1395 | bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK; | 1442 | bool shrink = reclaim_options & MEM_CGROUP_RECLAIM_SHRINK; |
1396 | bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT; | 1443 | bool check_soft = reclaim_options & MEM_CGROUP_RECLAIM_SOFT; |
1397 | unsigned long excess = mem_cgroup_get_excess(root_mem); | 1444 | unsigned long excess; |
1445 | |||
1446 | excess = res_counter_soft_limit_excess(&root_mem->res) >> PAGE_SHIFT; | ||
1398 | 1447 | ||
1399 | /* If memsw_is_minimum==1, swap-out is of-no-use. */ | 1448 | /* If memsw_is_minimum==1, swap-out is of-no-use. */ |
1400 | if (root_mem->memsw_is_minimum) | 1449 | if (root_mem->memsw_is_minimum) |
@@ -1417,7 +1466,7 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, | |||
1417 | break; | 1466 | break; |
1418 | } | 1467 | } |
1419 | /* | 1468 | /* |
1420 | * We want to do more targetted reclaim. | 1469 | * We want to do more targeted reclaim. |
1421 | * excess >> 2 is not to excessive so as to | 1470 | * excess >> 2 is not to excessive so as to |
1422 | * reclaim too much, nor too less that we keep | 1471 | * reclaim too much, nor too less that we keep |
1423 | * coming back to reclaim from this cgroup | 1472 | * coming back to reclaim from this cgroup |
@@ -1451,9 +1500,9 @@ static int mem_cgroup_hierarchical_reclaim(struct mem_cgroup *root_mem, | |||
1451 | return ret; | 1500 | return ret; |
1452 | total += ret; | 1501 | total += ret; |
1453 | if (check_soft) { | 1502 | if (check_soft) { |
1454 | if (res_counter_check_under_soft_limit(&root_mem->res)) | 1503 | if (!res_counter_soft_limit_excess(&root_mem->res)) |
1455 | return total; | 1504 | return total; |
1456 | } else if (mem_cgroup_check_under_limit(root_mem)) | 1505 | } else if (mem_cgroup_margin(root_mem)) |
1457 | return 1 + total; | 1506 | return 1 + total; |
1458 | } | 1507 | } |
1459 | return total; | 1508 | return total; |
@@ -1661,17 +1710,17 @@ EXPORT_SYMBOL(mem_cgroup_update_page_stat); | |||
1661 | * size of first charge trial. "32" comes from vmscan.c's magic value. | 1710 | * size of first charge trial. "32" comes from vmscan.c's magic value. |
1662 | * TODO: maybe necessary to use big numbers in big irons. | 1711 | * TODO: maybe necessary to use big numbers in big irons. |
1663 | */ | 1712 | */ |
1664 | #define CHARGE_SIZE (32 * PAGE_SIZE) | 1713 | #define CHARGE_BATCH 32U |
1665 | struct memcg_stock_pcp { | 1714 | struct memcg_stock_pcp { |
1666 | struct mem_cgroup *cached; /* this never be root cgroup */ | 1715 | struct mem_cgroup *cached; /* this never be root cgroup */ |
1667 | int charge; | 1716 | unsigned int nr_pages; |
1668 | struct work_struct work; | 1717 | struct work_struct work; |
1669 | }; | 1718 | }; |
1670 | static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock); | 1719 | static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock); |
1671 | static atomic_t memcg_drain_count; | 1720 | static atomic_t memcg_drain_count; |
1672 | 1721 | ||
1673 | /* | 1722 | /* |
1674 | * Try to consume stocked charge on this cpu. If success, PAGE_SIZE is consumed | 1723 | * Try to consume stocked charge on this cpu. If success, one page is consumed |
1675 | * from local stock and true is returned. If the stock is 0 or charges from a | 1724 | * from local stock and true is returned. If the stock is 0 or charges from a |
1676 | * cgroup which is not current target, returns false. This stock will be | 1725 | * cgroup which is not current target, returns false. This stock will be |
1677 | * refilled. | 1726 | * refilled. |
@@ -1682,8 +1731,8 @@ static bool consume_stock(struct mem_cgroup *mem) | |||
1682 | bool ret = true; | 1731 | bool ret = true; |
1683 | 1732 | ||
1684 | stock = &get_cpu_var(memcg_stock); | 1733 | stock = &get_cpu_var(memcg_stock); |
1685 | if (mem == stock->cached && stock->charge) | 1734 | if (mem == stock->cached && stock->nr_pages) |
1686 | stock->charge -= PAGE_SIZE; | 1735 | stock->nr_pages--; |
1687 | else /* need to call res_counter_charge */ | 1736 | else /* need to call res_counter_charge */ |
1688 | ret = false; | 1737 | ret = false; |
1689 | put_cpu_var(memcg_stock); | 1738 | put_cpu_var(memcg_stock); |
@@ -1697,13 +1746,15 @@ static void drain_stock(struct memcg_stock_pcp *stock) | |||
1697 | { | 1746 | { |
1698 | struct mem_cgroup *old = stock->cached; | 1747 | struct mem_cgroup *old = stock->cached; |
1699 | 1748 | ||
1700 | if (stock->charge) { | 1749 | if (stock->nr_pages) { |
1701 | res_counter_uncharge(&old->res, stock->charge); | 1750 | unsigned long bytes = stock->nr_pages * PAGE_SIZE; |
1751 | |||
1752 | res_counter_uncharge(&old->res, bytes); | ||
1702 | if (do_swap_account) | 1753 | if (do_swap_account) |
1703 | res_counter_uncharge(&old->memsw, stock->charge); | 1754 | res_counter_uncharge(&old->memsw, bytes); |
1755 | stock->nr_pages = 0; | ||
1704 | } | 1756 | } |
1705 | stock->cached = NULL; | 1757 | stock->cached = NULL; |
1706 | stock->charge = 0; | ||
1707 | } | 1758 | } |
1708 | 1759 | ||
1709 | /* | 1760 | /* |
@@ -1720,7 +1771,7 @@ static void drain_local_stock(struct work_struct *dummy) | |||
1720 | * Cache charges(val) which is from res_counter, to local per_cpu area. | 1771 | * Cache charges(val) which is from res_counter, to local per_cpu area. |
1721 | * This will be consumed by consume_stock() function, later. | 1772 | * This will be consumed by consume_stock() function, later. |
1722 | */ | 1773 | */ |
1723 | static void refill_stock(struct mem_cgroup *mem, int val) | 1774 | static void refill_stock(struct mem_cgroup *mem, unsigned int nr_pages) |
1724 | { | 1775 | { |
1725 | struct memcg_stock_pcp *stock = &get_cpu_var(memcg_stock); | 1776 | struct memcg_stock_pcp *stock = &get_cpu_var(memcg_stock); |
1726 | 1777 | ||
@@ -1728,7 +1779,7 @@ static void refill_stock(struct mem_cgroup *mem, int val) | |||
1728 | drain_stock(stock); | 1779 | drain_stock(stock); |
1729 | stock->cached = mem; | 1780 | stock->cached = mem; |
1730 | } | 1781 | } |
1731 | stock->charge += val; | 1782 | stock->nr_pages += nr_pages; |
1732 | put_cpu_var(memcg_stock); | 1783 | put_cpu_var(memcg_stock); |
1733 | } | 1784 | } |
1734 | 1785 | ||
@@ -1780,11 +1831,17 @@ static void mem_cgroup_drain_pcp_counter(struct mem_cgroup *mem, int cpu) | |||
1780 | 1831 | ||
1781 | spin_lock(&mem->pcp_counter_lock); | 1832 | spin_lock(&mem->pcp_counter_lock); |
1782 | for (i = 0; i < MEM_CGROUP_STAT_DATA; i++) { | 1833 | for (i = 0; i < MEM_CGROUP_STAT_DATA; i++) { |
1783 | s64 x = per_cpu(mem->stat->count[i], cpu); | 1834 | long x = per_cpu(mem->stat->count[i], cpu); |
1784 | 1835 | ||
1785 | per_cpu(mem->stat->count[i], cpu) = 0; | 1836 | per_cpu(mem->stat->count[i], cpu) = 0; |
1786 | mem->nocpu_base.count[i] += x; | 1837 | mem->nocpu_base.count[i] += x; |
1787 | } | 1838 | } |
1839 | for (i = 0; i < MEM_CGROUP_EVENTS_NSTATS; i++) { | ||
1840 | unsigned long x = per_cpu(mem->stat->events[i], cpu); | ||
1841 | |||
1842 | per_cpu(mem->stat->events[i], cpu) = 0; | ||
1843 | mem->nocpu_base.events[i] += x; | ||
1844 | } | ||
1788 | /* need to clear ON_MOVE value, works as a kind of lock. */ | 1845 | /* need to clear ON_MOVE value, works as a kind of lock. */ |
1789 | per_cpu(mem->stat->count[MEM_CGROUP_ON_MOVE], cpu) = 0; | 1846 | per_cpu(mem->stat->count[MEM_CGROUP_ON_MOVE], cpu) = 0; |
1790 | spin_unlock(&mem->pcp_counter_lock); | 1847 | spin_unlock(&mem->pcp_counter_lock); |
@@ -1834,9 +1891,10 @@ enum { | |||
1834 | CHARGE_OOM_DIE, /* the current is killed because of OOM */ | 1891 | CHARGE_OOM_DIE, /* the current is killed because of OOM */ |
1835 | }; | 1892 | }; |
1836 | 1893 | ||
1837 | static int __mem_cgroup_do_charge(struct mem_cgroup *mem, gfp_t gfp_mask, | 1894 | static int mem_cgroup_do_charge(struct mem_cgroup *mem, gfp_t gfp_mask, |
1838 | int csize, bool oom_check) | 1895 | unsigned int nr_pages, bool oom_check) |
1839 | { | 1896 | { |
1897 | unsigned long csize = nr_pages * PAGE_SIZE; | ||
1840 | struct mem_cgroup *mem_over_limit; | 1898 | struct mem_cgroup *mem_over_limit; |
1841 | struct res_counter *fail_res; | 1899 | struct res_counter *fail_res; |
1842 | unsigned long flags = 0; | 1900 | unsigned long flags = 0; |
@@ -1857,14 +1915,13 @@ static int __mem_cgroup_do_charge(struct mem_cgroup *mem, gfp_t gfp_mask, | |||
1857 | } else | 1915 | } else |
1858 | mem_over_limit = mem_cgroup_from_res_counter(fail_res, res); | 1916 | mem_over_limit = mem_cgroup_from_res_counter(fail_res, res); |
1859 | /* | 1917 | /* |
1860 | * csize can be either a huge page (HPAGE_SIZE), a batch of | 1918 | * nr_pages can be either a huge page (HPAGE_PMD_NR), a batch |
1861 | * regular pages (CHARGE_SIZE), or a single regular page | 1919 | * of regular pages (CHARGE_BATCH), or a single regular page (1). |
1862 | * (PAGE_SIZE). | ||
1863 | * | 1920 | * |
1864 | * Never reclaim on behalf of optional batching, retry with a | 1921 | * Never reclaim on behalf of optional batching, retry with a |
1865 | * single page instead. | 1922 | * single page instead. |
1866 | */ | 1923 | */ |
1867 | if (csize == CHARGE_SIZE) | 1924 | if (nr_pages == CHARGE_BATCH) |
1868 | return CHARGE_RETRY; | 1925 | return CHARGE_RETRY; |
1869 | 1926 | ||
1870 | if (!(gfp_mask & __GFP_WAIT)) | 1927 | if (!(gfp_mask & __GFP_WAIT)) |
@@ -1872,7 +1929,7 @@ static int __mem_cgroup_do_charge(struct mem_cgroup *mem, gfp_t gfp_mask, | |||
1872 | 1929 | ||
1873 | ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL, | 1930 | ret = mem_cgroup_hierarchical_reclaim(mem_over_limit, NULL, |
1874 | gfp_mask, flags); | 1931 | gfp_mask, flags); |
1875 | if (mem_cgroup_check_margin(mem_over_limit, csize)) | 1932 | if (mem_cgroup_margin(mem_over_limit) >= nr_pages) |
1876 | return CHARGE_RETRY; | 1933 | return CHARGE_RETRY; |
1877 | /* | 1934 | /* |
1878 | * Even though the limit is exceeded at this point, reclaim | 1935 | * Even though the limit is exceeded at this point, reclaim |
@@ -1883,7 +1940,7 @@ static int __mem_cgroup_do_charge(struct mem_cgroup *mem, gfp_t gfp_mask, | |||
1883 | * unlikely to succeed so close to the limit, and we fall back | 1940 | * unlikely to succeed so close to the limit, and we fall back |
1884 | * to regular pages anyway in case of failure. | 1941 | * to regular pages anyway in case of failure. |
1885 | */ | 1942 | */ |
1886 | if (csize == PAGE_SIZE && ret) | 1943 | if (nr_pages == 1 && ret) |
1887 | return CHARGE_RETRY; | 1944 | return CHARGE_RETRY; |
1888 | 1945 | ||
1889 | /* | 1946 | /* |
@@ -1909,13 +1966,14 @@ static int __mem_cgroup_do_charge(struct mem_cgroup *mem, gfp_t gfp_mask, | |||
1909 | */ | 1966 | */ |
1910 | static int __mem_cgroup_try_charge(struct mm_struct *mm, | 1967 | static int __mem_cgroup_try_charge(struct mm_struct *mm, |
1911 | gfp_t gfp_mask, | 1968 | gfp_t gfp_mask, |
1912 | struct mem_cgroup **memcg, bool oom, | 1969 | unsigned int nr_pages, |
1913 | int page_size) | 1970 | struct mem_cgroup **memcg, |
1971 | bool oom) | ||
1914 | { | 1972 | { |
1973 | unsigned int batch = max(CHARGE_BATCH, nr_pages); | ||
1915 | int nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES; | 1974 | int nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES; |
1916 | struct mem_cgroup *mem = NULL; | 1975 | struct mem_cgroup *mem = NULL; |
1917 | int ret; | 1976 | int ret; |
1918 | int csize = max(CHARGE_SIZE, (unsigned long) page_size); | ||
1919 | 1977 | ||
1920 | /* | 1978 | /* |
1921 | * Unlike gloval-vm's OOM-kill, we're not in memory shortage | 1979 | * Unlike gloval-vm's OOM-kill, we're not in memory shortage |
@@ -1940,7 +1998,7 @@ again: | |||
1940 | VM_BUG_ON(css_is_removed(&mem->css)); | 1998 | VM_BUG_ON(css_is_removed(&mem->css)); |
1941 | if (mem_cgroup_is_root(mem)) | 1999 | if (mem_cgroup_is_root(mem)) |
1942 | goto done; | 2000 | goto done; |
1943 | if (page_size == PAGE_SIZE && consume_stock(mem)) | 2001 | if (nr_pages == 1 && consume_stock(mem)) |
1944 | goto done; | 2002 | goto done; |
1945 | css_get(&mem->css); | 2003 | css_get(&mem->css); |
1946 | } else { | 2004 | } else { |
@@ -1963,7 +2021,7 @@ again: | |||
1963 | rcu_read_unlock(); | 2021 | rcu_read_unlock(); |
1964 | goto done; | 2022 | goto done; |
1965 | } | 2023 | } |
1966 | if (page_size == PAGE_SIZE && consume_stock(mem)) { | 2024 | if (nr_pages == 1 && consume_stock(mem)) { |
1967 | /* | 2025 | /* |
1968 | * It seems dagerous to access memcg without css_get(). | 2026 | * It seems dagerous to access memcg without css_get(). |
1969 | * But considering how consume_stok works, it's not | 2027 | * But considering how consume_stok works, it's not |
@@ -1998,13 +2056,12 @@ again: | |||
1998 | nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES; | 2056 | nr_oom_retries = MEM_CGROUP_RECLAIM_RETRIES; |
1999 | } | 2057 | } |
2000 | 2058 | ||
2001 | ret = __mem_cgroup_do_charge(mem, gfp_mask, csize, oom_check); | 2059 | ret = mem_cgroup_do_charge(mem, gfp_mask, batch, oom_check); |
2002 | |||
2003 | switch (ret) { | 2060 | switch (ret) { |
2004 | case CHARGE_OK: | 2061 | case CHARGE_OK: |
2005 | break; | 2062 | break; |
2006 | case CHARGE_RETRY: /* not in OOM situation but retry */ | 2063 | case CHARGE_RETRY: /* not in OOM situation but retry */ |
2007 | csize = page_size; | 2064 | batch = nr_pages; |
2008 | css_put(&mem->css); | 2065 | css_put(&mem->css); |
2009 | mem = NULL; | 2066 | mem = NULL; |
2010 | goto again; | 2067 | goto again; |
@@ -2025,8 +2082,8 @@ again: | |||
2025 | } | 2082 | } |
2026 | } while (ret != CHARGE_OK); | 2083 | } while (ret != CHARGE_OK); |
2027 | 2084 | ||
2028 | if (csize > page_size) | 2085 | if (batch > nr_pages) |
2029 | refill_stock(mem, csize - page_size); | 2086 | refill_stock(mem, batch - nr_pages); |
2030 | css_put(&mem->css); | 2087 | css_put(&mem->css); |
2031 | done: | 2088 | done: |
2032 | *memcg = mem; | 2089 | *memcg = mem; |
@@ -2045,21 +2102,17 @@ bypass: | |||
2045 | * gotten by try_charge(). | 2102 | * gotten by try_charge(). |
2046 | */ | 2103 | */ |
2047 | static void __mem_cgroup_cancel_charge(struct mem_cgroup *mem, | 2104 | static void __mem_cgroup_cancel_charge(struct mem_cgroup *mem, |
2048 | unsigned long count) | 2105 | unsigned int nr_pages) |
2049 | { | 2106 | { |
2050 | if (!mem_cgroup_is_root(mem)) { | 2107 | if (!mem_cgroup_is_root(mem)) { |
2051 | res_counter_uncharge(&mem->res, PAGE_SIZE * count); | 2108 | unsigned long bytes = nr_pages * PAGE_SIZE; |
2109 | |||
2110 | res_counter_uncharge(&mem->res, bytes); | ||
2052 | if (do_swap_account) | 2111 | if (do_swap_account) |
2053 | res_counter_uncharge(&mem->memsw, PAGE_SIZE * count); | 2112 | res_counter_uncharge(&mem->memsw, bytes); |
2054 | } | 2113 | } |
2055 | } | 2114 | } |
2056 | 2115 | ||
2057 | static void mem_cgroup_cancel_charge(struct mem_cgroup *mem, | ||
2058 | int page_size) | ||
2059 | { | ||
2060 | __mem_cgroup_cancel_charge(mem, page_size >> PAGE_SHIFT); | ||
2061 | } | ||
2062 | |||
2063 | /* | 2116 | /* |
2064 | * A helper function to get mem_cgroup from ID. must be called under | 2117 | * A helper function to get mem_cgroup from ID. must be called under |
2065 | * rcu_read_lock(). The caller must check css_is_removed() or some if | 2118 | * rcu_read_lock(). The caller must check css_is_removed() or some if |
@@ -2108,20 +2161,15 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) | |||
2108 | } | 2161 | } |
2109 | 2162 | ||
2110 | static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, | 2163 | static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, |
2164 | struct page *page, | ||
2165 | unsigned int nr_pages, | ||
2111 | struct page_cgroup *pc, | 2166 | struct page_cgroup *pc, |
2112 | enum charge_type ctype, | 2167 | enum charge_type ctype) |
2113 | int page_size) | ||
2114 | { | 2168 | { |
2115 | int nr_pages = page_size >> PAGE_SHIFT; | ||
2116 | |||
2117 | /* try_charge() can return NULL to *memcg, taking care of it. */ | ||
2118 | if (!mem) | ||
2119 | return; | ||
2120 | |||
2121 | lock_page_cgroup(pc); | 2169 | lock_page_cgroup(pc); |
2122 | if (unlikely(PageCgroupUsed(pc))) { | 2170 | if (unlikely(PageCgroupUsed(pc))) { |
2123 | unlock_page_cgroup(pc); | 2171 | unlock_page_cgroup(pc); |
2124 | mem_cgroup_cancel_charge(mem, page_size); | 2172 | __mem_cgroup_cancel_charge(mem, nr_pages); |
2125 | return; | 2173 | return; |
2126 | } | 2174 | } |
2127 | /* | 2175 | /* |
@@ -2158,7 +2206,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *mem, | |||
2158 | * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree. | 2206 | * Insert ancestor (and ancestor's ancestors), to softlimit RB-tree. |
2159 | * if they exceeds softlimit. | 2207 | * if they exceeds softlimit. |
2160 | */ | 2208 | */ |
2161 | memcg_check_events(mem, pc->page); | 2209 | memcg_check_events(mem, page); |
2162 | } | 2210 | } |
2163 | 2211 | ||
2164 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE | 2212 | #ifdef CONFIG_TRANSPARENT_HUGEPAGE |
@@ -2195,7 +2243,7 @@ void mem_cgroup_split_huge_fixup(struct page *head, struct page *tail) | |||
2195 | * We hold lru_lock, then, reduce counter directly. | 2243 | * We hold lru_lock, then, reduce counter directly. |
2196 | */ | 2244 | */ |
2197 | lru = page_lru(head); | 2245 | lru = page_lru(head); |
2198 | mz = page_cgroup_zoneinfo(head_pc); | 2246 | mz = page_cgroup_zoneinfo(head_pc->mem_cgroup, head); |
2199 | MEM_CGROUP_ZSTAT(mz, lru) -= 1; | 2247 | MEM_CGROUP_ZSTAT(mz, lru) -= 1; |
2200 | } | 2248 | } |
2201 | tail_pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT; | 2249 | tail_pc->flags = head_pc->flags & ~PCGF_NOCOPY_AT_SPLIT; |
@@ -2204,7 +2252,9 @@ void mem_cgroup_split_huge_fixup(struct page *head, struct page *tail) | |||
2204 | #endif | 2252 | #endif |
2205 | 2253 | ||
2206 | /** | 2254 | /** |
2207 | * __mem_cgroup_move_account - move account of the page | 2255 | * mem_cgroup_move_account - move account of the page |
2256 | * @page: the page | ||
2257 | * @nr_pages: number of regular pages (>1 for huge pages) | ||
2208 | * @pc: page_cgroup of the page. | 2258 | * @pc: page_cgroup of the page. |
2209 | * @from: mem_cgroup which the page is moved from. | 2259 | * @from: mem_cgroup which the page is moved from. |
2210 | * @to: mem_cgroup which the page is moved to. @from != @to. | 2260 | * @to: mem_cgroup which the page is moved to. @from != @to. |
@@ -2212,25 +2262,42 @@ void mem_cgroup_split_huge_fixup(struct page *head, struct page *tail) | |||
2212 | * | 2262 | * |
2213 | * The caller must confirm following. | 2263 | * The caller must confirm following. |
2214 | * - page is not on LRU (isolate_page() is useful.) | 2264 | * - page is not on LRU (isolate_page() is useful.) |
2215 | * - the pc is locked, used, and ->mem_cgroup points to @from. | 2265 | * - compound_lock is held when nr_pages > 1 |
2216 | * | 2266 | * |
2217 | * This function doesn't do "charge" nor css_get to new cgroup. It should be | 2267 | * This function doesn't do "charge" nor css_get to new cgroup. It should be |
2218 | * done by a caller(__mem_cgroup_try_charge would be usefull). If @uncharge is | 2268 | * done by a caller(__mem_cgroup_try_charge would be useful). If @uncharge is |
2219 | * true, this function does "uncharge" from old cgroup, but it doesn't if | 2269 | * true, this function does "uncharge" from old cgroup, but it doesn't if |
2220 | * @uncharge is false, so a caller should do "uncharge". | 2270 | * @uncharge is false, so a caller should do "uncharge". |
2221 | */ | 2271 | */ |
2222 | 2272 | static int mem_cgroup_move_account(struct page *page, | |
2223 | static void __mem_cgroup_move_account(struct page_cgroup *pc, | 2273 | unsigned int nr_pages, |
2224 | struct mem_cgroup *from, struct mem_cgroup *to, bool uncharge, | 2274 | struct page_cgroup *pc, |
2225 | int charge_size) | 2275 | struct mem_cgroup *from, |
2276 | struct mem_cgroup *to, | ||
2277 | bool uncharge) | ||
2226 | { | 2278 | { |
2227 | int nr_pages = charge_size >> PAGE_SHIFT; | 2279 | unsigned long flags; |
2280 | int ret; | ||
2228 | 2281 | ||
2229 | VM_BUG_ON(from == to); | 2282 | VM_BUG_ON(from == to); |
2230 | VM_BUG_ON(PageLRU(pc->page)); | 2283 | VM_BUG_ON(PageLRU(page)); |
2231 | VM_BUG_ON(!page_is_cgroup_locked(pc)); | 2284 | /* |
2232 | VM_BUG_ON(!PageCgroupUsed(pc)); | 2285 | * The page is isolated from LRU. So, collapse function |
2233 | VM_BUG_ON(pc->mem_cgroup != from); | 2286 | * will not handle this page. But page splitting can happen. |
2287 | * Do this check under compound_page_lock(). The caller should | ||
2288 | * hold it. | ||
2289 | */ | ||
2290 | ret = -EBUSY; | ||
2291 | if (nr_pages > 1 && !PageTransHuge(page)) | ||
2292 | goto out; | ||
2293 | |||
2294 | lock_page_cgroup(pc); | ||
2295 | |||
2296 | ret = -EINVAL; | ||
2297 | if (!PageCgroupUsed(pc) || pc->mem_cgroup != from) | ||
2298 | goto unlock; | ||
2299 | |||
2300 | move_lock_page_cgroup(pc, &flags); | ||
2234 | 2301 | ||
2235 | if (PageCgroupFileMapped(pc)) { | 2302 | if (PageCgroupFileMapped(pc)) { |
2236 | /* Update mapped_file data for mem_cgroup */ | 2303 | /* Update mapped_file data for mem_cgroup */ |
@@ -2242,7 +2309,7 @@ static void __mem_cgroup_move_account(struct page_cgroup *pc, | |||
2242 | mem_cgroup_charge_statistics(from, PageCgroupCache(pc), -nr_pages); | 2309 | mem_cgroup_charge_statistics(from, PageCgroupCache(pc), -nr_pages); |
2243 | if (uncharge) | 2310 | if (uncharge) |
2244 | /* This is not "cancel", but cancel_charge does all we need. */ | 2311 | /* This is not "cancel", but cancel_charge does all we need. */ |
2245 | mem_cgroup_cancel_charge(from, charge_size); | 2312 | __mem_cgroup_cancel_charge(from, nr_pages); |
2246 | 2313 | ||
2247 | /* caller should have done css_get */ | 2314 | /* caller should have done css_get */ |
2248 | pc->mem_cgroup = to; | 2315 | pc->mem_cgroup = to; |
@@ -2251,43 +2318,19 @@ static void __mem_cgroup_move_account(struct page_cgroup *pc, | |||
2251 | * We charges against "to" which may not have any tasks. Then, "to" | 2318 | * We charges against "to" which may not have any tasks. Then, "to" |
2252 | * can be under rmdir(). But in current implementation, caller of | 2319 | * can be under rmdir(). But in current implementation, caller of |
2253 | * this function is just force_empty() and move charge, so it's | 2320 | * this function is just force_empty() and move charge, so it's |
2254 | * garanteed that "to" is never removed. So, we don't check rmdir | 2321 | * guaranteed that "to" is never removed. So, we don't check rmdir |
2255 | * status here. | 2322 | * status here. |
2256 | */ | 2323 | */ |
2257 | } | 2324 | move_unlock_page_cgroup(pc, &flags); |
2258 | 2325 | ret = 0; | |
2259 | /* | 2326 | unlock: |
2260 | * check whether the @pc is valid for moving account and call | ||
2261 | * __mem_cgroup_move_account() | ||
2262 | */ | ||
2263 | static int mem_cgroup_move_account(struct page_cgroup *pc, | ||
2264 | struct mem_cgroup *from, struct mem_cgroup *to, | ||
2265 | bool uncharge, int charge_size) | ||
2266 | { | ||
2267 | int ret = -EINVAL; | ||
2268 | unsigned long flags; | ||
2269 | /* | ||
2270 | * The page is isolated from LRU. So, collapse function | ||
2271 | * will not handle this page. But page splitting can happen. | ||
2272 | * Do this check under compound_page_lock(). The caller should | ||
2273 | * hold it. | ||
2274 | */ | ||
2275 | if ((charge_size > PAGE_SIZE) && !PageTransHuge(pc->page)) | ||
2276 | return -EBUSY; | ||
2277 | |||
2278 | lock_page_cgroup(pc); | ||
2279 | if (PageCgroupUsed(pc) && pc->mem_cgroup == from) { | ||
2280 | move_lock_page_cgroup(pc, &flags); | ||
2281 | __mem_cgroup_move_account(pc, from, to, uncharge, charge_size); | ||
2282 | move_unlock_page_cgroup(pc, &flags); | ||
2283 | ret = 0; | ||
2284 | } | ||
2285 | unlock_page_cgroup(pc); | 2327 | unlock_page_cgroup(pc); |
2286 | /* | 2328 | /* |
2287 | * check events | 2329 | * check events |
2288 | */ | 2330 | */ |
2289 | memcg_check_events(to, pc->page); | 2331 | memcg_check_events(to, page); |
2290 | memcg_check_events(from, pc->page); | 2332 | memcg_check_events(from, page); |
2333 | out: | ||
2291 | return ret; | 2334 | return ret; |
2292 | } | 2335 | } |
2293 | 2336 | ||
@@ -2295,16 +2338,16 @@ static int mem_cgroup_move_account(struct page_cgroup *pc, | |||
2295 | * move charges to its parent. | 2338 | * move charges to its parent. |
2296 | */ | 2339 | */ |
2297 | 2340 | ||
2298 | static int mem_cgroup_move_parent(struct page_cgroup *pc, | 2341 | static int mem_cgroup_move_parent(struct page *page, |
2342 | struct page_cgroup *pc, | ||
2299 | struct mem_cgroup *child, | 2343 | struct mem_cgroup *child, |
2300 | gfp_t gfp_mask) | 2344 | gfp_t gfp_mask) |
2301 | { | 2345 | { |
2302 | struct page *page = pc->page; | ||
2303 | struct cgroup *cg = child->css.cgroup; | 2346 | struct cgroup *cg = child->css.cgroup; |
2304 | struct cgroup *pcg = cg->parent; | 2347 | struct cgroup *pcg = cg->parent; |
2305 | struct mem_cgroup *parent; | 2348 | struct mem_cgroup *parent; |
2306 | int page_size = PAGE_SIZE; | 2349 | unsigned int nr_pages; |
2307 | unsigned long flags; | 2350 | unsigned long uninitialized_var(flags); |
2308 | int ret; | 2351 | int ret; |
2309 | 2352 | ||
2310 | /* Is ROOT ? */ | 2353 | /* Is ROOT ? */ |
@@ -2317,23 +2360,21 @@ static int mem_cgroup_move_parent(struct page_cgroup *pc, | |||
2317 | if (isolate_lru_page(page)) | 2360 | if (isolate_lru_page(page)) |
2318 | goto put; | 2361 | goto put; |
2319 | 2362 | ||
2320 | if (PageTransHuge(page)) | 2363 | nr_pages = hpage_nr_pages(page); |
2321 | page_size = HPAGE_SIZE; | ||
2322 | 2364 | ||
2323 | parent = mem_cgroup_from_cont(pcg); | 2365 | parent = mem_cgroup_from_cont(pcg); |
2324 | ret = __mem_cgroup_try_charge(NULL, gfp_mask, | 2366 | ret = __mem_cgroup_try_charge(NULL, gfp_mask, nr_pages, &parent, false); |
2325 | &parent, false, page_size); | ||
2326 | if (ret || !parent) | 2367 | if (ret || !parent) |
2327 | goto put_back; | 2368 | goto put_back; |
2328 | 2369 | ||
2329 | if (page_size > PAGE_SIZE) | 2370 | if (nr_pages > 1) |
2330 | flags = compound_lock_irqsave(page); | 2371 | flags = compound_lock_irqsave(page); |
2331 | 2372 | ||
2332 | ret = mem_cgroup_move_account(pc, child, parent, true, page_size); | 2373 | ret = mem_cgroup_move_account(page, nr_pages, pc, child, parent, true); |
2333 | if (ret) | 2374 | if (ret) |
2334 | mem_cgroup_cancel_charge(parent, page_size); | 2375 | __mem_cgroup_cancel_charge(parent, nr_pages); |
2335 | 2376 | ||
2336 | if (page_size > PAGE_SIZE) | 2377 | if (nr_pages > 1) |
2337 | compound_unlock_irqrestore(page, flags); | 2378 | compound_unlock_irqrestore(page, flags); |
2338 | put_back: | 2379 | put_back: |
2339 | putback_lru_page(page); | 2380 | putback_lru_page(page); |
@@ -2353,13 +2394,13 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, | |||
2353 | gfp_t gfp_mask, enum charge_type ctype) | 2394 | gfp_t gfp_mask, enum charge_type ctype) |
2354 | { | 2395 | { |
2355 | struct mem_cgroup *mem = NULL; | 2396 | struct mem_cgroup *mem = NULL; |
2356 | int page_size = PAGE_SIZE; | 2397 | unsigned int nr_pages = 1; |
2357 | struct page_cgroup *pc; | 2398 | struct page_cgroup *pc; |
2358 | bool oom = true; | 2399 | bool oom = true; |
2359 | int ret; | 2400 | int ret; |
2360 | 2401 | ||
2361 | if (PageTransHuge(page)) { | 2402 | if (PageTransHuge(page)) { |
2362 | page_size <<= compound_order(page); | 2403 | nr_pages <<= compound_order(page); |
2363 | VM_BUG_ON(!PageTransHuge(page)); | 2404 | VM_BUG_ON(!PageTransHuge(page)); |
2364 | /* | 2405 | /* |
2365 | * Never OOM-kill a process for a huge page. The | 2406 | * Never OOM-kill a process for a huge page. The |
@@ -2369,16 +2410,13 @@ static int mem_cgroup_charge_common(struct page *page, struct mm_struct *mm, | |||
2369 | } | 2410 | } |
2370 | 2411 | ||
2371 | pc = lookup_page_cgroup(page); | 2412 | pc = lookup_page_cgroup(page); |
2372 | /* can happen at boot */ | 2413 | BUG_ON(!pc); /* XXX: remove this and move pc lookup into commit */ |
2373 | if (unlikely(!pc)) | ||
2374 | return 0; | ||
2375 | prefetchw(pc); | ||
2376 | 2414 | ||
2377 | ret = __mem_cgroup_try_charge(mm, gfp_mask, &mem, oom, page_size); | 2415 | ret = __mem_cgroup_try_charge(mm, gfp_mask, nr_pages, &mem, oom); |
2378 | if (ret || !mem) | 2416 | if (ret || !mem) |
2379 | return ret; | 2417 | return ret; |
2380 | 2418 | ||
2381 | __mem_cgroup_commit_charge(mem, pc, ctype, page_size); | 2419 | __mem_cgroup_commit_charge(mem, page, nr_pages, pc, ctype); |
2382 | return 0; | 2420 | return 0; |
2383 | } | 2421 | } |
2384 | 2422 | ||
@@ -2406,9 +2444,26 @@ static void | |||
2406 | __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr, | 2444 | __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr, |
2407 | enum charge_type ctype); | 2445 | enum charge_type ctype); |
2408 | 2446 | ||
2447 | static void | ||
2448 | __mem_cgroup_commit_charge_lrucare(struct page *page, struct mem_cgroup *mem, | ||
2449 | enum charge_type ctype) | ||
2450 | { | ||
2451 | struct page_cgroup *pc = lookup_page_cgroup(page); | ||
2452 | /* | ||
2453 | * In some case, SwapCache, FUSE(splice_buf->radixtree), the page | ||
2454 | * is already on LRU. It means the page may on some other page_cgroup's | ||
2455 | * LRU. Take care of it. | ||
2456 | */ | ||
2457 | mem_cgroup_lru_del_before_commit(page); | ||
2458 | __mem_cgroup_commit_charge(mem, page, 1, pc, ctype); | ||
2459 | mem_cgroup_lru_add_after_commit(page); | ||
2460 | return; | ||
2461 | } | ||
2462 | |||
2409 | int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, | 2463 | int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, |
2410 | gfp_t gfp_mask) | 2464 | gfp_t gfp_mask) |
2411 | { | 2465 | { |
2466 | struct mem_cgroup *mem = NULL; | ||
2412 | int ret; | 2467 | int ret; |
2413 | 2468 | ||
2414 | if (mem_cgroup_disabled()) | 2469 | if (mem_cgroup_disabled()) |
@@ -2443,14 +2498,22 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, | |||
2443 | if (unlikely(!mm)) | 2498 | if (unlikely(!mm)) |
2444 | mm = &init_mm; | 2499 | mm = &init_mm; |
2445 | 2500 | ||
2446 | if (page_is_file_cache(page)) | 2501 | if (page_is_file_cache(page)) { |
2447 | return mem_cgroup_charge_common(page, mm, gfp_mask, | 2502 | ret = __mem_cgroup_try_charge(mm, gfp_mask, 1, &mem, true); |
2448 | MEM_CGROUP_CHARGE_TYPE_CACHE); | 2503 | if (ret || !mem) |
2504 | return ret; | ||
2449 | 2505 | ||
2506 | /* | ||
2507 | * FUSE reuses pages without going through the final | ||
2508 | * put that would remove them from the LRU list, make | ||
2509 | * sure that they get relinked properly. | ||
2510 | */ | ||
2511 | __mem_cgroup_commit_charge_lrucare(page, mem, | ||
2512 | MEM_CGROUP_CHARGE_TYPE_CACHE); | ||
2513 | return ret; | ||
2514 | } | ||
2450 | /* shmem */ | 2515 | /* shmem */ |
2451 | if (PageSwapCache(page)) { | 2516 | if (PageSwapCache(page)) { |
2452 | struct mem_cgroup *mem = NULL; | ||
2453 | |||
2454 | ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &mem); | 2517 | ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &mem); |
2455 | if (!ret) | 2518 | if (!ret) |
2456 | __mem_cgroup_commit_charge_swapin(page, mem, | 2519 | __mem_cgroup_commit_charge_swapin(page, mem, |
@@ -2475,6 +2538,8 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm, | |||
2475 | struct mem_cgroup *mem; | 2538 | struct mem_cgroup *mem; |
2476 | int ret; | 2539 | int ret; |
2477 | 2540 | ||
2541 | *ptr = NULL; | ||
2542 | |||
2478 | if (mem_cgroup_disabled()) | 2543 | if (mem_cgroup_disabled()) |
2479 | return 0; | 2544 | return 0; |
2480 | 2545 | ||
@@ -2492,30 +2557,26 @@ int mem_cgroup_try_charge_swapin(struct mm_struct *mm, | |||
2492 | if (!mem) | 2557 | if (!mem) |
2493 | goto charge_cur_mm; | 2558 | goto charge_cur_mm; |
2494 | *ptr = mem; | 2559 | *ptr = mem; |
2495 | ret = __mem_cgroup_try_charge(NULL, mask, ptr, true, PAGE_SIZE); | 2560 | ret = __mem_cgroup_try_charge(NULL, mask, 1, ptr, true); |
2496 | css_put(&mem->css); | 2561 | css_put(&mem->css); |
2497 | return ret; | 2562 | return ret; |
2498 | charge_cur_mm: | 2563 | charge_cur_mm: |
2499 | if (unlikely(!mm)) | 2564 | if (unlikely(!mm)) |
2500 | mm = &init_mm; | 2565 | mm = &init_mm; |
2501 | return __mem_cgroup_try_charge(mm, mask, ptr, true, PAGE_SIZE); | 2566 | return __mem_cgroup_try_charge(mm, mask, 1, ptr, true); |
2502 | } | 2567 | } |
2503 | 2568 | ||
2504 | static void | 2569 | static void |
2505 | __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr, | 2570 | __mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *ptr, |
2506 | enum charge_type ctype) | 2571 | enum charge_type ctype) |
2507 | { | 2572 | { |
2508 | struct page_cgroup *pc; | ||
2509 | |||
2510 | if (mem_cgroup_disabled()) | 2573 | if (mem_cgroup_disabled()) |
2511 | return; | 2574 | return; |
2512 | if (!ptr) | 2575 | if (!ptr) |
2513 | return; | 2576 | return; |
2514 | cgroup_exclude_rmdir(&ptr->css); | 2577 | cgroup_exclude_rmdir(&ptr->css); |
2515 | pc = lookup_page_cgroup(page); | 2578 | |
2516 | mem_cgroup_lru_del_before_commit_swapcache(page); | 2579 | __mem_cgroup_commit_charge_lrucare(page, ptr, ctype); |
2517 | __mem_cgroup_commit_charge(ptr, pc, ctype, PAGE_SIZE); | ||
2518 | mem_cgroup_lru_add_after_commit_swapcache(page); | ||
2519 | /* | 2580 | /* |
2520 | * Now swap is on-memory. This means this page may be | 2581 | * Now swap is on-memory. This means this page may be |
2521 | * counted both as mem and swap....double count. | 2582 | * counted both as mem and swap....double count. |
@@ -2563,15 +2624,16 @@ void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *mem) | |||
2563 | return; | 2624 | return; |
2564 | if (!mem) | 2625 | if (!mem) |
2565 | return; | 2626 | return; |
2566 | mem_cgroup_cancel_charge(mem, PAGE_SIZE); | 2627 | __mem_cgroup_cancel_charge(mem, 1); |
2567 | } | 2628 | } |
2568 | 2629 | ||
2569 | static void | 2630 | static void mem_cgroup_do_uncharge(struct mem_cgroup *mem, |
2570 | __do_uncharge(struct mem_cgroup *mem, const enum charge_type ctype, | 2631 | unsigned int nr_pages, |
2571 | int page_size) | 2632 | const enum charge_type ctype) |
2572 | { | 2633 | { |
2573 | struct memcg_batch_info *batch = NULL; | 2634 | struct memcg_batch_info *batch = NULL; |
2574 | bool uncharge_memsw = true; | 2635 | bool uncharge_memsw = true; |
2636 | |||
2575 | /* If swapout, usage of swap doesn't decrease */ | 2637 | /* If swapout, usage of swap doesn't decrease */ |
2576 | if (!do_swap_account || ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) | 2638 | if (!do_swap_account || ctype == MEM_CGROUP_CHARGE_TYPE_SWAPOUT) |
2577 | uncharge_memsw = false; | 2639 | uncharge_memsw = false; |
@@ -2586,7 +2648,7 @@ __do_uncharge(struct mem_cgroup *mem, const enum charge_type ctype, | |||
2586 | batch->memcg = mem; | 2648 | batch->memcg = mem; |
2587 | /* | 2649 | /* |
2588 | * do_batch > 0 when unmapping pages or inode invalidate/truncate. | 2650 | * do_batch > 0 when unmapping pages or inode invalidate/truncate. |
2589 | * In those cases, all pages freed continously can be expected to be in | 2651 | * In those cases, all pages freed continuously can be expected to be in |
2590 | * the same cgroup and we have chance to coalesce uncharges. | 2652 | * the same cgroup and we have chance to coalesce uncharges. |
2591 | * But we do uncharge one by one if this is killed by OOM(TIF_MEMDIE) | 2653 | * But we do uncharge one by one if this is killed by OOM(TIF_MEMDIE) |
2592 | * because we want to do uncharge as soon as possible. | 2654 | * because we want to do uncharge as soon as possible. |
@@ -2595,7 +2657,7 @@ __do_uncharge(struct mem_cgroup *mem, const enum charge_type ctype, | |||
2595 | if (!batch->do_batch || test_thread_flag(TIF_MEMDIE)) | 2657 | if (!batch->do_batch || test_thread_flag(TIF_MEMDIE)) |
2596 | goto direct_uncharge; | 2658 | goto direct_uncharge; |
2597 | 2659 | ||
2598 | if (page_size != PAGE_SIZE) | 2660 | if (nr_pages > 1) |
2599 | goto direct_uncharge; | 2661 | goto direct_uncharge; |
2600 | 2662 | ||
2601 | /* | 2663 | /* |
@@ -2606,14 +2668,14 @@ __do_uncharge(struct mem_cgroup *mem, const enum charge_type ctype, | |||
2606 | if (batch->memcg != mem) | 2668 | if (batch->memcg != mem) |
2607 | goto direct_uncharge; | 2669 | goto direct_uncharge; |
2608 | /* remember freed charge and uncharge it later */ | 2670 | /* remember freed charge and uncharge it later */ |
2609 | batch->bytes += PAGE_SIZE; | 2671 | batch->nr_pages++; |
2610 | if (uncharge_memsw) | 2672 | if (uncharge_memsw) |
2611 | batch->memsw_bytes += PAGE_SIZE; | 2673 | batch->memsw_nr_pages++; |
2612 | return; | 2674 | return; |
2613 | direct_uncharge: | 2675 | direct_uncharge: |
2614 | res_counter_uncharge(&mem->res, page_size); | 2676 | res_counter_uncharge(&mem->res, nr_pages * PAGE_SIZE); |
2615 | if (uncharge_memsw) | 2677 | if (uncharge_memsw) |
2616 | res_counter_uncharge(&mem->memsw, page_size); | 2678 | res_counter_uncharge(&mem->memsw, nr_pages * PAGE_SIZE); |
2617 | if (unlikely(batch->memcg != mem)) | 2679 | if (unlikely(batch->memcg != mem)) |
2618 | memcg_oom_recover(mem); | 2680 | memcg_oom_recover(mem); |
2619 | return; | 2681 | return; |
@@ -2625,10 +2687,9 @@ direct_uncharge: | |||
2625 | static struct mem_cgroup * | 2687 | static struct mem_cgroup * |
2626 | __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) | 2688 | __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) |
2627 | { | 2689 | { |
2628 | int count; | ||
2629 | struct page_cgroup *pc; | ||
2630 | struct mem_cgroup *mem = NULL; | 2690 | struct mem_cgroup *mem = NULL; |
2631 | int page_size = PAGE_SIZE; | 2691 | unsigned int nr_pages = 1; |
2692 | struct page_cgroup *pc; | ||
2632 | 2693 | ||
2633 | if (mem_cgroup_disabled()) | 2694 | if (mem_cgroup_disabled()) |
2634 | return NULL; | 2695 | return NULL; |
@@ -2637,11 +2698,9 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) | |||
2637 | return NULL; | 2698 | return NULL; |
2638 | 2699 | ||
2639 | if (PageTransHuge(page)) { | 2700 | if (PageTransHuge(page)) { |
2640 | page_size <<= compound_order(page); | 2701 | nr_pages <<= compound_order(page); |
2641 | VM_BUG_ON(!PageTransHuge(page)); | 2702 | VM_BUG_ON(!PageTransHuge(page)); |
2642 | } | 2703 | } |
2643 | |||
2644 | count = page_size >> PAGE_SHIFT; | ||
2645 | /* | 2704 | /* |
2646 | * Check if our page_cgroup is valid | 2705 | * Check if our page_cgroup is valid |
2647 | */ | 2706 | */ |
@@ -2674,7 +2733,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) | |||
2674 | break; | 2733 | break; |
2675 | } | 2734 | } |
2676 | 2735 | ||
2677 | mem_cgroup_charge_statistics(mem, PageCgroupCache(pc), -count); | 2736 | mem_cgroup_charge_statistics(mem, PageCgroupCache(pc), -nr_pages); |
2678 | 2737 | ||
2679 | ClearPageCgroupUsed(pc); | 2738 | ClearPageCgroupUsed(pc); |
2680 | /* | 2739 | /* |
@@ -2695,7 +2754,7 @@ __mem_cgroup_uncharge_common(struct page *page, enum charge_type ctype) | |||
2695 | mem_cgroup_get(mem); | 2754 | mem_cgroup_get(mem); |
2696 | } | 2755 | } |
2697 | if (!mem_cgroup_is_root(mem)) | 2756 | if (!mem_cgroup_is_root(mem)) |
2698 | __do_uncharge(mem, ctype, page_size); | 2757 | mem_cgroup_do_uncharge(mem, nr_pages, ctype); |
2699 | 2758 | ||
2700 | return mem; | 2759 | return mem; |
2701 | 2760 | ||
@@ -2735,8 +2794,8 @@ void mem_cgroup_uncharge_start(void) | |||
2735 | /* We can do nest. */ | 2794 | /* We can do nest. */ |
2736 | if (current->memcg_batch.do_batch == 1) { | 2795 | if (current->memcg_batch.do_batch == 1) { |
2737 | current->memcg_batch.memcg = NULL; | 2796 | current->memcg_batch.memcg = NULL; |
2738 | current->memcg_batch.bytes = 0; | 2797 | current->memcg_batch.nr_pages = 0; |
2739 | current->memcg_batch.memsw_bytes = 0; | 2798 | current->memcg_batch.memsw_nr_pages = 0; |
2740 | } | 2799 | } |
2741 | } | 2800 | } |
2742 | 2801 | ||
@@ -2757,10 +2816,12 @@ void mem_cgroup_uncharge_end(void) | |||
2757 | * This "batch->memcg" is valid without any css_get/put etc... | 2816 | * This "batch->memcg" is valid without any css_get/put etc... |
2758 | * bacause we hide charges behind us. | 2817 | * bacause we hide charges behind us. |
2759 | */ | 2818 | */ |
2760 | if (batch->bytes) | 2819 | if (batch->nr_pages) |
2761 | res_counter_uncharge(&batch->memcg->res, batch->bytes); | 2820 | res_counter_uncharge(&batch->memcg->res, |
2762 | if (batch->memsw_bytes) | 2821 | batch->nr_pages * PAGE_SIZE); |
2763 | res_counter_uncharge(&batch->memcg->memsw, batch->memsw_bytes); | 2822 | if (batch->memsw_nr_pages) |
2823 | res_counter_uncharge(&batch->memcg->memsw, | ||
2824 | batch->memsw_nr_pages * PAGE_SIZE); | ||
2764 | memcg_oom_recover(batch->memcg); | 2825 | memcg_oom_recover(batch->memcg); |
2765 | /* forget this pointer (for sanity check) */ | 2826 | /* forget this pointer (for sanity check) */ |
2766 | batch->memcg = NULL; | 2827 | batch->memcg = NULL; |
@@ -2883,13 +2944,15 @@ static inline int mem_cgroup_move_swap_account(swp_entry_t entry, | |||
2883 | * page belongs to. | 2944 | * page belongs to. |
2884 | */ | 2945 | */ |
2885 | int mem_cgroup_prepare_migration(struct page *page, | 2946 | int mem_cgroup_prepare_migration(struct page *page, |
2886 | struct page *newpage, struct mem_cgroup **ptr) | 2947 | struct page *newpage, struct mem_cgroup **ptr, gfp_t gfp_mask) |
2887 | { | 2948 | { |
2888 | struct page_cgroup *pc; | ||
2889 | struct mem_cgroup *mem = NULL; | 2949 | struct mem_cgroup *mem = NULL; |
2950 | struct page_cgroup *pc; | ||
2890 | enum charge_type ctype; | 2951 | enum charge_type ctype; |
2891 | int ret = 0; | 2952 | int ret = 0; |
2892 | 2953 | ||
2954 | *ptr = NULL; | ||
2955 | |||
2893 | VM_BUG_ON(PageTransHuge(page)); | 2956 | VM_BUG_ON(PageTransHuge(page)); |
2894 | if (mem_cgroup_disabled()) | 2957 | if (mem_cgroup_disabled()) |
2895 | return 0; | 2958 | return 0; |
@@ -2940,7 +3003,7 @@ int mem_cgroup_prepare_migration(struct page *page, | |||
2940 | return 0; | 3003 | return 0; |
2941 | 3004 | ||
2942 | *ptr = mem; | 3005 | *ptr = mem; |
2943 | ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, ptr, false, PAGE_SIZE); | 3006 | ret = __mem_cgroup_try_charge(NULL, gfp_mask, 1, ptr, false); |
2944 | css_put(&mem->css);/* drop extra refcnt */ | 3007 | css_put(&mem->css);/* drop extra refcnt */ |
2945 | if (ret || *ptr == NULL) { | 3008 | if (ret || *ptr == NULL) { |
2946 | if (PageAnon(page)) { | 3009 | if (PageAnon(page)) { |
@@ -2967,7 +3030,7 @@ int mem_cgroup_prepare_migration(struct page *page, | |||
2967 | ctype = MEM_CGROUP_CHARGE_TYPE_CACHE; | 3030 | ctype = MEM_CGROUP_CHARGE_TYPE_CACHE; |
2968 | else | 3031 | else |
2969 | ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM; | 3032 | ctype = MEM_CGROUP_CHARGE_TYPE_SHMEM; |
2970 | __mem_cgroup_commit_charge(mem, pc, ctype, PAGE_SIZE); | 3033 | __mem_cgroup_commit_charge(mem, page, 1, pc, ctype); |
2971 | return ret; | 3034 | return ret; |
2972 | } | 3035 | } |
2973 | 3036 | ||
@@ -3032,7 +3095,7 @@ int mem_cgroup_shmem_charge_fallback(struct page *page, | |||
3032 | struct mm_struct *mm, | 3095 | struct mm_struct *mm, |
3033 | gfp_t gfp_mask) | 3096 | gfp_t gfp_mask) |
3034 | { | 3097 | { |
3035 | struct mem_cgroup *mem = NULL; | 3098 | struct mem_cgroup *mem; |
3036 | int ret; | 3099 | int ret; |
3037 | 3100 | ||
3038 | if (mem_cgroup_disabled()) | 3101 | if (mem_cgroup_disabled()) |
@@ -3045,6 +3108,52 @@ int mem_cgroup_shmem_charge_fallback(struct page *page, | |||
3045 | return ret; | 3108 | return ret; |
3046 | } | 3109 | } |
3047 | 3110 | ||
3111 | #ifdef CONFIG_DEBUG_VM | ||
3112 | static struct page_cgroup *lookup_page_cgroup_used(struct page *page) | ||
3113 | { | ||
3114 | struct page_cgroup *pc; | ||
3115 | |||
3116 | pc = lookup_page_cgroup(page); | ||
3117 | if (likely(pc) && PageCgroupUsed(pc)) | ||
3118 | return pc; | ||
3119 | return NULL; | ||
3120 | } | ||
3121 | |||
3122 | bool mem_cgroup_bad_page_check(struct page *page) | ||
3123 | { | ||
3124 | if (mem_cgroup_disabled()) | ||
3125 | return false; | ||
3126 | |||
3127 | return lookup_page_cgroup_used(page) != NULL; | ||
3128 | } | ||
3129 | |||
3130 | void mem_cgroup_print_bad_page(struct page *page) | ||
3131 | { | ||
3132 | struct page_cgroup *pc; | ||
3133 | |||
3134 | pc = lookup_page_cgroup_used(page); | ||
3135 | if (pc) { | ||
3136 | int ret = -1; | ||
3137 | char *path; | ||
3138 | |||
3139 | printk(KERN_ALERT "pc:%p pc->flags:%lx pc->mem_cgroup:%p", | ||
3140 | pc, pc->flags, pc->mem_cgroup); | ||
3141 | |||
3142 | path = kmalloc(PATH_MAX, GFP_KERNEL); | ||
3143 | if (path) { | ||
3144 | rcu_read_lock(); | ||
3145 | ret = cgroup_path(pc->mem_cgroup->css.cgroup, | ||
3146 | path, PATH_MAX); | ||
3147 | rcu_read_unlock(); | ||
3148 | } | ||
3149 | |||
3150 | printk(KERN_CONT "(%s)\n", | ||
3151 | (ret < 0) ? "cannot get the path" : path); | ||
3152 | kfree(path); | ||
3153 | } | ||
3154 | } | ||
3155 | #endif | ||
3156 | |||
3048 | static DEFINE_MUTEX(set_limit_mutex); | 3157 | static DEFINE_MUTEX(set_limit_mutex); |
3049 | 3158 | ||
3050 | static int mem_cgroup_resize_limit(struct mem_cgroup *memcg, | 3159 | static int mem_cgroup_resize_limit(struct mem_cgroup *memcg, |
@@ -3288,6 +3397,8 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *mem, | |||
3288 | loop += 256; | 3397 | loop += 256; |
3289 | busy = NULL; | 3398 | busy = NULL; |
3290 | while (loop--) { | 3399 | while (loop--) { |
3400 | struct page *page; | ||
3401 | |||
3291 | ret = 0; | 3402 | ret = 0; |
3292 | spin_lock_irqsave(&zone->lru_lock, flags); | 3403 | spin_lock_irqsave(&zone->lru_lock, flags); |
3293 | if (list_empty(list)) { | 3404 | if (list_empty(list)) { |
@@ -3303,7 +3414,9 @@ static int mem_cgroup_force_empty_list(struct mem_cgroup *mem, | |||
3303 | } | 3414 | } |
3304 | spin_unlock_irqrestore(&zone->lru_lock, flags); | 3415 | spin_unlock_irqrestore(&zone->lru_lock, flags); |
3305 | 3416 | ||
3306 | ret = mem_cgroup_move_parent(pc, mem, GFP_KERNEL); | 3417 | page = lookup_cgroup_page(pc); |
3418 | |||
3419 | ret = mem_cgroup_move_parent(page, pc, mem, GFP_KERNEL); | ||
3307 | if (ret == -ENOMEM) | 3420 | if (ret == -ENOMEM) |
3308 | break; | 3421 | break; |
3309 | 3422 | ||
@@ -3451,13 +3564,13 @@ static int mem_cgroup_hierarchy_write(struct cgroup *cont, struct cftype *cft, | |||
3451 | } | 3564 | } |
3452 | 3565 | ||
3453 | 3566 | ||
3454 | static u64 mem_cgroup_get_recursive_idx_stat(struct mem_cgroup *mem, | 3567 | static unsigned long mem_cgroup_recursive_stat(struct mem_cgroup *mem, |
3455 | enum mem_cgroup_stat_index idx) | 3568 | enum mem_cgroup_stat_index idx) |
3456 | { | 3569 | { |
3457 | struct mem_cgroup *iter; | 3570 | struct mem_cgroup *iter; |
3458 | s64 val = 0; | 3571 | long val = 0; |
3459 | 3572 | ||
3460 | /* each per cpu's value can be minus.Then, use s64 */ | 3573 | /* Per-cpu values can be negative, use a signed accumulator */ |
3461 | for_each_mem_cgroup_tree(iter, mem) | 3574 | for_each_mem_cgroup_tree(iter, mem) |
3462 | val += mem_cgroup_read_stat(iter, idx); | 3575 | val += mem_cgroup_read_stat(iter, idx); |
3463 | 3576 | ||
@@ -3477,12 +3590,11 @@ static inline u64 mem_cgroup_usage(struct mem_cgroup *mem, bool swap) | |||
3477 | return res_counter_read_u64(&mem->memsw, RES_USAGE); | 3590 | return res_counter_read_u64(&mem->memsw, RES_USAGE); |
3478 | } | 3591 | } |
3479 | 3592 | ||
3480 | val = mem_cgroup_get_recursive_idx_stat(mem, MEM_CGROUP_STAT_CACHE); | 3593 | val = mem_cgroup_recursive_stat(mem, MEM_CGROUP_STAT_CACHE); |
3481 | val += mem_cgroup_get_recursive_idx_stat(mem, MEM_CGROUP_STAT_RSS); | 3594 | val += mem_cgroup_recursive_stat(mem, MEM_CGROUP_STAT_RSS); |
3482 | 3595 | ||
3483 | if (swap) | 3596 | if (swap) |
3484 | val += mem_cgroup_get_recursive_idx_stat(mem, | 3597 | val += mem_cgroup_recursive_stat(mem, MEM_CGROUP_STAT_SWAPOUT); |
3485 | MEM_CGROUP_STAT_SWAPOUT); | ||
3486 | 3598 | ||
3487 | return val << PAGE_SHIFT; | 3599 | return val << PAGE_SHIFT; |
3488 | } | 3600 | } |
@@ -3702,9 +3814,9 @@ mem_cgroup_get_local_stat(struct mem_cgroup *mem, struct mcs_total_stat *s) | |||
3702 | s->stat[MCS_RSS] += val * PAGE_SIZE; | 3814 | s->stat[MCS_RSS] += val * PAGE_SIZE; |
3703 | val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_FILE_MAPPED); | 3815 | val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_FILE_MAPPED); |
3704 | s->stat[MCS_FILE_MAPPED] += val * PAGE_SIZE; | 3816 | s->stat[MCS_FILE_MAPPED] += val * PAGE_SIZE; |
3705 | val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_PGPGIN_COUNT); | 3817 | val = mem_cgroup_read_events(mem, MEM_CGROUP_EVENTS_PGPGIN); |
3706 | s->stat[MCS_PGPGIN] += val; | 3818 | s->stat[MCS_PGPGIN] += val; |
3707 | val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_PGPGOUT_COUNT); | 3819 | val = mem_cgroup_read_events(mem, MEM_CGROUP_EVENTS_PGPGOUT); |
3708 | s->stat[MCS_PGPGOUT] += val; | 3820 | s->stat[MCS_PGPGOUT] += val; |
3709 | if (do_swap_account) { | 3821 | if (do_swap_account) { |
3710 | val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_SWAPOUT); | 3822 | val = mem_cgroup_read_stat(mem, MEM_CGROUP_STAT_SWAPOUT); |
@@ -3828,9 +3940,7 @@ static int mem_cgroup_swappiness_write(struct cgroup *cgrp, struct cftype *cft, | |||
3828 | return -EINVAL; | 3940 | return -EINVAL; |
3829 | } | 3941 | } |
3830 | 3942 | ||
3831 | spin_lock(&memcg->reclaim_param_lock); | ||
3832 | memcg->swappiness = val; | 3943 | memcg->swappiness = val; |
3833 | spin_unlock(&memcg->reclaim_param_lock); | ||
3834 | 3944 | ||
3835 | cgroup_unlock(); | 3945 | cgroup_unlock(); |
3836 | 3946 | ||
@@ -4486,7 +4596,6 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) | |||
4486 | res_counter_init(&mem->memsw, NULL); | 4596 | res_counter_init(&mem->memsw, NULL); |
4487 | } | 4597 | } |
4488 | mem->last_scanned_child = 0; | 4598 | mem->last_scanned_child = 0; |
4489 | spin_lock_init(&mem->reclaim_param_lock); | ||
4490 | INIT_LIST_HEAD(&mem->oom_notify); | 4599 | INIT_LIST_HEAD(&mem->oom_notify); |
4491 | 4600 | ||
4492 | if (parent) | 4601 | if (parent) |
@@ -4574,8 +4683,7 @@ one_by_one: | |||
4574 | batch_count = PRECHARGE_COUNT_AT_ONCE; | 4683 | batch_count = PRECHARGE_COUNT_AT_ONCE; |
4575 | cond_resched(); | 4684 | cond_resched(); |
4576 | } | 4685 | } |
4577 | ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, &mem, false, | 4686 | ret = __mem_cgroup_try_charge(NULL, GFP_KERNEL, 1, &mem, false); |
4578 | PAGE_SIZE); | ||
4579 | if (ret || !mem) | 4687 | if (ret || !mem) |
4580 | /* mem_cgroup_clear_mc() will do uncharge later */ | 4688 | /* mem_cgroup_clear_mc() will do uncharge later */ |
4581 | return -ENOMEM; | 4689 | return -ENOMEM; |
@@ -4737,7 +4845,8 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd, | |||
4737 | pte_t *pte; | 4845 | pte_t *pte; |
4738 | spinlock_t *ptl; | 4846 | spinlock_t *ptl; |
4739 | 4847 | ||
4740 | VM_BUG_ON(pmd_trans_huge(*pmd)); | 4848 | split_huge_page_pmd(walk->mm, pmd); |
4849 | |||
4741 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); | 4850 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); |
4742 | for (; addr != end; pte++, addr += PAGE_SIZE) | 4851 | for (; addr != end; pte++, addr += PAGE_SIZE) |
4743 | if (is_target_pte_for_mc(vma, addr, *pte, NULL)) | 4852 | if (is_target_pte_for_mc(vma, addr, *pte, NULL)) |
@@ -4899,8 +5008,8 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd, | |||
4899 | pte_t *pte; | 5008 | pte_t *pte; |
4900 | spinlock_t *ptl; | 5009 | spinlock_t *ptl; |
4901 | 5010 | ||
5011 | split_huge_page_pmd(walk->mm, pmd); | ||
4902 | retry: | 5012 | retry: |
4903 | VM_BUG_ON(pmd_trans_huge(*pmd)); | ||
4904 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); | 5013 | pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl); |
4905 | for (; addr != end; addr += PAGE_SIZE) { | 5014 | for (; addr != end; addr += PAGE_SIZE) { |
4906 | pte_t ptent = *(pte++); | 5015 | pte_t ptent = *(pte++); |
@@ -4920,8 +5029,8 @@ retry: | |||
4920 | if (isolate_lru_page(page)) | 5029 | if (isolate_lru_page(page)) |
4921 | goto put; | 5030 | goto put; |
4922 | pc = lookup_page_cgroup(page); | 5031 | pc = lookup_page_cgroup(page); |
4923 | if (!mem_cgroup_move_account(pc, | 5032 | if (!mem_cgroup_move_account(page, 1, pc, |
4924 | mc.from, mc.to, false, PAGE_SIZE)) { | 5033 | mc.from, mc.to, false)) { |
4925 | mc.precharge--; | 5034 | mc.precharge--; |
4926 | /* we uncharge from mc.from later. */ | 5035 | /* we uncharge from mc.from later. */ |
4927 | mc.moved_charge++; | 5036 | mc.moved_charge++; |