diff options
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r-- | mm/vmscan.c | 416 |
1 files changed, 216 insertions, 200 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index e656035d3406..d10d2f9a33f3 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -78,7 +78,7 @@ struct scan_control { | |||
78 | unsigned long (*isolate_pages)(unsigned long nr, struct list_head *dst, | 78 | unsigned long (*isolate_pages)(unsigned long nr, struct list_head *dst, |
79 | unsigned long *scanned, int order, int mode, | 79 | unsigned long *scanned, int order, int mode, |
80 | struct zone *z, struct mem_cgroup *mem_cont, | 80 | struct zone *z, struct mem_cgroup *mem_cont, |
81 | int active); | 81 | int active, int file); |
82 | }; | 82 | }; |
83 | 83 | ||
84 | #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) | 84 | #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) |
@@ -680,7 +680,7 @@ keep: | |||
680 | * | 680 | * |
681 | * returns 0 on success, -ve errno on failure. | 681 | * returns 0 on success, -ve errno on failure. |
682 | */ | 682 | */ |
683 | int __isolate_lru_page(struct page *page, int mode) | 683 | int __isolate_lru_page(struct page *page, int mode, int file) |
684 | { | 684 | { |
685 | int ret = -EINVAL; | 685 | int ret = -EINVAL; |
686 | 686 | ||
@@ -696,6 +696,9 @@ int __isolate_lru_page(struct page *page, int mode) | |||
696 | if (mode != ISOLATE_BOTH && (!PageActive(page) != !mode)) | 696 | if (mode != ISOLATE_BOTH && (!PageActive(page) != !mode)) |
697 | return ret; | 697 | return ret; |
698 | 698 | ||
699 | if (mode != ISOLATE_BOTH && (!page_is_file_cache(page) != !file)) | ||
700 | return ret; | ||
701 | |||
699 | ret = -EBUSY; | 702 | ret = -EBUSY; |
700 | if (likely(get_page_unless_zero(page))) { | 703 | if (likely(get_page_unless_zero(page))) { |
701 | /* | 704 | /* |
@@ -726,12 +729,13 @@ int __isolate_lru_page(struct page *page, int mode) | |||
726 | * @scanned: The number of pages that were scanned. | 729 | * @scanned: The number of pages that were scanned. |
727 | * @order: The caller's attempted allocation order | 730 | * @order: The caller's attempted allocation order |
728 | * @mode: One of the LRU isolation modes | 731 | * @mode: One of the LRU isolation modes |
732 | * @file: True [1] if isolating file [!anon] pages | ||
729 | * | 733 | * |
730 | * returns how many pages were moved onto *@dst. | 734 | * returns how many pages were moved onto *@dst. |
731 | */ | 735 | */ |
732 | static unsigned long isolate_lru_pages(unsigned long nr_to_scan, | 736 | static unsigned long isolate_lru_pages(unsigned long nr_to_scan, |
733 | struct list_head *src, struct list_head *dst, | 737 | struct list_head *src, struct list_head *dst, |
734 | unsigned long *scanned, int order, int mode) | 738 | unsigned long *scanned, int order, int mode, int file) |
735 | { | 739 | { |
736 | unsigned long nr_taken = 0; | 740 | unsigned long nr_taken = 0; |
737 | unsigned long scan; | 741 | unsigned long scan; |
@@ -748,7 +752,7 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, | |||
748 | 752 | ||
749 | VM_BUG_ON(!PageLRU(page)); | 753 | VM_BUG_ON(!PageLRU(page)); |
750 | 754 | ||
751 | switch (__isolate_lru_page(page, mode)) { | 755 | switch (__isolate_lru_page(page, mode, file)) { |
752 | case 0: | 756 | case 0: |
753 | list_move(&page->lru, dst); | 757 | list_move(&page->lru, dst); |
754 | nr_taken++; | 758 | nr_taken++; |
@@ -791,10 +795,11 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, | |||
791 | break; | 795 | break; |
792 | 796 | ||
793 | cursor_page = pfn_to_page(pfn); | 797 | cursor_page = pfn_to_page(pfn); |
798 | |||
794 | /* Check that we have not crossed a zone boundary. */ | 799 | /* Check that we have not crossed a zone boundary. */ |
795 | if (unlikely(page_zone_id(cursor_page) != zone_id)) | 800 | if (unlikely(page_zone_id(cursor_page) != zone_id)) |
796 | continue; | 801 | continue; |
797 | switch (__isolate_lru_page(cursor_page, mode)) { | 802 | switch (__isolate_lru_page(cursor_page, mode, file)) { |
798 | case 0: | 803 | case 0: |
799 | list_move(&cursor_page->lru, dst); | 804 | list_move(&cursor_page->lru, dst); |
800 | nr_taken++; | 805 | nr_taken++; |
@@ -819,30 +824,37 @@ static unsigned long isolate_pages_global(unsigned long nr, | |||
819 | unsigned long *scanned, int order, | 824 | unsigned long *scanned, int order, |
820 | int mode, struct zone *z, | 825 | int mode, struct zone *z, |
821 | struct mem_cgroup *mem_cont, | 826 | struct mem_cgroup *mem_cont, |
822 | int active) | 827 | int active, int file) |
823 | { | 828 | { |
829 | int lru = LRU_BASE; | ||
824 | if (active) | 830 | if (active) |
825 | return isolate_lru_pages(nr, &z->lru[LRU_ACTIVE].list, dst, | 831 | lru += LRU_ACTIVE; |
826 | scanned, order, mode); | 832 | if (file) |
827 | else | 833 | lru += LRU_FILE; |
828 | return isolate_lru_pages(nr, &z->lru[LRU_INACTIVE].list, dst, | 834 | return isolate_lru_pages(nr, &z->lru[lru].list, dst, scanned, order, |
829 | scanned, order, mode); | 835 | mode, !!file); |
830 | } | 836 | } |
831 | 837 | ||
832 | /* | 838 | /* |
833 | * clear_active_flags() is a helper for shrink_active_list(), clearing | 839 | * clear_active_flags() is a helper for shrink_active_list(), clearing |
834 | * any active bits from the pages in the list. | 840 | * any active bits from the pages in the list. |
835 | */ | 841 | */ |
836 | static unsigned long clear_active_flags(struct list_head *page_list) | 842 | static unsigned long clear_active_flags(struct list_head *page_list, |
843 | unsigned int *count) | ||
837 | { | 844 | { |
838 | int nr_active = 0; | 845 | int nr_active = 0; |
846 | int lru; | ||
839 | struct page *page; | 847 | struct page *page; |
840 | 848 | ||
841 | list_for_each_entry(page, page_list, lru) | 849 | list_for_each_entry(page, page_list, lru) { |
850 | lru = page_is_file_cache(page); | ||
842 | if (PageActive(page)) { | 851 | if (PageActive(page)) { |
852 | lru += LRU_ACTIVE; | ||
843 | ClearPageActive(page); | 853 | ClearPageActive(page); |
844 | nr_active++; | 854 | nr_active++; |
845 | } | 855 | } |
856 | count[lru]++; | ||
857 | } | ||
846 | 858 | ||
847 | return nr_active; | 859 | return nr_active; |
848 | } | 860 | } |
@@ -880,12 +892,12 @@ int isolate_lru_page(struct page *page) | |||
880 | 892 | ||
881 | spin_lock_irq(&zone->lru_lock); | 893 | spin_lock_irq(&zone->lru_lock); |
882 | if (PageLRU(page) && get_page_unless_zero(page)) { | 894 | if (PageLRU(page) && get_page_unless_zero(page)) { |
895 | int lru = LRU_BASE; | ||
883 | ret = 0; | 896 | ret = 0; |
884 | ClearPageLRU(page); | 897 | ClearPageLRU(page); |
885 | if (PageActive(page)) | 898 | |
886 | del_page_from_active_list(zone, page); | 899 | lru += page_is_file_cache(page) + !!PageActive(page); |
887 | else | 900 | del_page_from_lru_list(zone, page, lru); |
888 | del_page_from_inactive_list(zone, page); | ||
889 | } | 901 | } |
890 | spin_unlock_irq(&zone->lru_lock); | 902 | spin_unlock_irq(&zone->lru_lock); |
891 | } | 903 | } |
@@ -897,7 +909,7 @@ int isolate_lru_page(struct page *page) | |||
897 | * of reclaimed pages | 909 | * of reclaimed pages |
898 | */ | 910 | */ |
899 | static unsigned long shrink_inactive_list(unsigned long max_scan, | 911 | static unsigned long shrink_inactive_list(unsigned long max_scan, |
900 | struct zone *zone, struct scan_control *sc) | 912 | struct zone *zone, struct scan_control *sc, int file) |
901 | { | 913 | { |
902 | LIST_HEAD(page_list); | 914 | LIST_HEAD(page_list); |
903 | struct pagevec pvec; | 915 | struct pagevec pvec; |
@@ -914,20 +926,32 @@ static unsigned long shrink_inactive_list(unsigned long max_scan, | |||
914 | unsigned long nr_scan; | 926 | unsigned long nr_scan; |
915 | unsigned long nr_freed; | 927 | unsigned long nr_freed; |
916 | unsigned long nr_active; | 928 | unsigned long nr_active; |
929 | unsigned int count[NR_LRU_LISTS] = { 0, }; | ||
930 | int mode = (sc->order > PAGE_ALLOC_COSTLY_ORDER) ? | ||
931 | ISOLATE_BOTH : ISOLATE_INACTIVE; | ||
917 | 932 | ||
918 | nr_taken = sc->isolate_pages(sc->swap_cluster_max, | 933 | nr_taken = sc->isolate_pages(sc->swap_cluster_max, |
919 | &page_list, &nr_scan, sc->order, | 934 | &page_list, &nr_scan, sc->order, mode, |
920 | (sc->order > PAGE_ALLOC_COSTLY_ORDER)? | 935 | zone, sc->mem_cgroup, 0, file); |
921 | ISOLATE_BOTH : ISOLATE_INACTIVE, | 936 | nr_active = clear_active_flags(&page_list, count); |
922 | zone, sc->mem_cgroup, 0); | ||
923 | nr_active = clear_active_flags(&page_list); | ||
924 | __count_vm_events(PGDEACTIVATE, nr_active); | 937 | __count_vm_events(PGDEACTIVATE, nr_active); |
925 | 938 | ||
926 | __mod_zone_page_state(zone, NR_ACTIVE, -nr_active); | 939 | __mod_zone_page_state(zone, NR_ACTIVE_FILE, |
927 | __mod_zone_page_state(zone, NR_INACTIVE, | 940 | -count[LRU_ACTIVE_FILE]); |
928 | -(nr_taken - nr_active)); | 941 | __mod_zone_page_state(zone, NR_INACTIVE_FILE, |
929 | if (scan_global_lru(sc)) | 942 | -count[LRU_INACTIVE_FILE]); |
943 | __mod_zone_page_state(zone, NR_ACTIVE_ANON, | ||
944 | -count[LRU_ACTIVE_ANON]); | ||
945 | __mod_zone_page_state(zone, NR_INACTIVE_ANON, | ||
946 | -count[LRU_INACTIVE_ANON]); | ||
947 | |||
948 | if (scan_global_lru(sc)) { | ||
930 | zone->pages_scanned += nr_scan; | 949 | zone->pages_scanned += nr_scan; |
950 | zone->recent_scanned[0] += count[LRU_INACTIVE_ANON]; | ||
951 | zone->recent_scanned[0] += count[LRU_ACTIVE_ANON]; | ||
952 | zone->recent_scanned[1] += count[LRU_INACTIVE_FILE]; | ||
953 | zone->recent_scanned[1] += count[LRU_ACTIVE_FILE]; | ||
954 | } | ||
931 | spin_unlock_irq(&zone->lru_lock); | 955 | spin_unlock_irq(&zone->lru_lock); |
932 | 956 | ||
933 | nr_scanned += nr_scan; | 957 | nr_scanned += nr_scan; |
@@ -947,7 +971,7 @@ static unsigned long shrink_inactive_list(unsigned long max_scan, | |||
947 | * The attempt at page out may have made some | 971 | * The attempt at page out may have made some |
948 | * of the pages active, mark them inactive again. | 972 | * of the pages active, mark them inactive again. |
949 | */ | 973 | */ |
950 | nr_active = clear_active_flags(&page_list); | 974 | nr_active = clear_active_flags(&page_list, count); |
951 | count_vm_events(PGDEACTIVATE, nr_active); | 975 | count_vm_events(PGDEACTIVATE, nr_active); |
952 | 976 | ||
953 | nr_freed += shrink_page_list(&page_list, sc, | 977 | nr_freed += shrink_page_list(&page_list, sc, |
@@ -977,6 +1001,10 @@ static unsigned long shrink_inactive_list(unsigned long max_scan, | |||
977 | SetPageLRU(page); | 1001 | SetPageLRU(page); |
978 | list_del(&page->lru); | 1002 | list_del(&page->lru); |
979 | add_page_to_lru_list(zone, page, page_lru(page)); | 1003 | add_page_to_lru_list(zone, page, page_lru(page)); |
1004 | if (PageActive(page) && scan_global_lru(sc)) { | ||
1005 | int file = !!page_is_file_cache(page); | ||
1006 | zone->recent_rotated[file]++; | ||
1007 | } | ||
980 | if (!pagevec_add(&pvec, page)) { | 1008 | if (!pagevec_add(&pvec, page)) { |
981 | spin_unlock_irq(&zone->lru_lock); | 1009 | spin_unlock_irq(&zone->lru_lock); |
982 | __pagevec_release(&pvec); | 1010 | __pagevec_release(&pvec); |
@@ -1007,115 +1035,7 @@ static inline void note_zone_scanning_priority(struct zone *zone, int priority) | |||
1007 | 1035 | ||
1008 | static inline int zone_is_near_oom(struct zone *zone) | 1036 | static inline int zone_is_near_oom(struct zone *zone) |
1009 | { | 1037 | { |
1010 | return zone->pages_scanned >= (zone_page_state(zone, NR_ACTIVE) | 1038 | return zone->pages_scanned >= (zone_lru_pages(zone) * 3); |
1011 | + zone_page_state(zone, NR_INACTIVE))*3; | ||
1012 | } | ||
1013 | |||
1014 | /* | ||
1015 | * Determine we should try to reclaim mapped pages. | ||
1016 | * This is called only when sc->mem_cgroup is NULL. | ||
1017 | */ | ||
1018 | static int calc_reclaim_mapped(struct scan_control *sc, struct zone *zone, | ||
1019 | int priority) | ||
1020 | { | ||
1021 | long mapped_ratio; | ||
1022 | long distress; | ||
1023 | long swap_tendency; | ||
1024 | long imbalance; | ||
1025 | int reclaim_mapped = 0; | ||
1026 | int prev_priority; | ||
1027 | |||
1028 | if (scan_global_lru(sc) && zone_is_near_oom(zone)) | ||
1029 | return 1; | ||
1030 | /* | ||
1031 | * `distress' is a measure of how much trouble we're having | ||
1032 | * reclaiming pages. 0 -> no problems. 100 -> great trouble. | ||
1033 | */ | ||
1034 | if (scan_global_lru(sc)) | ||
1035 | prev_priority = zone->prev_priority; | ||
1036 | else | ||
1037 | prev_priority = mem_cgroup_get_reclaim_priority(sc->mem_cgroup); | ||
1038 | |||
1039 | distress = 100 >> min(prev_priority, priority); | ||
1040 | |||
1041 | /* | ||
1042 | * The point of this algorithm is to decide when to start | ||
1043 | * reclaiming mapped memory instead of just pagecache. Work out | ||
1044 | * how much memory | ||
1045 | * is mapped. | ||
1046 | */ | ||
1047 | if (scan_global_lru(sc)) | ||
1048 | mapped_ratio = ((global_page_state(NR_FILE_MAPPED) + | ||
1049 | global_page_state(NR_ANON_PAGES)) * 100) / | ||
1050 | vm_total_pages; | ||
1051 | else | ||
1052 | mapped_ratio = mem_cgroup_calc_mapped_ratio(sc->mem_cgroup); | ||
1053 | |||
1054 | /* | ||
1055 | * Now decide how much we really want to unmap some pages. The | ||
1056 | * mapped ratio is downgraded - just because there's a lot of | ||
1057 | * mapped memory doesn't necessarily mean that page reclaim | ||
1058 | * isn't succeeding. | ||
1059 | * | ||
1060 | * The distress ratio is important - we don't want to start | ||
1061 | * going oom. | ||
1062 | * | ||
1063 | * A 100% value of vm_swappiness overrides this algorithm | ||
1064 | * altogether. | ||
1065 | */ | ||
1066 | swap_tendency = mapped_ratio / 2 + distress + sc->swappiness; | ||
1067 | |||
1068 | /* | ||
1069 | * If there's huge imbalance between active and inactive | ||
1070 | * (think active 100 times larger than inactive) we should | ||
1071 | * become more permissive, or the system will take too much | ||
1072 | * cpu before it start swapping during memory pressure. | ||
1073 | * Distress is about avoiding early-oom, this is about | ||
1074 | * making swappiness graceful despite setting it to low | ||
1075 | * values. | ||
1076 | * | ||
1077 | * Avoid div by zero with nr_inactive+1, and max resulting | ||
1078 | * value is vm_total_pages. | ||
1079 | */ | ||
1080 | if (scan_global_lru(sc)) { | ||
1081 | imbalance = zone_page_state(zone, NR_ACTIVE); | ||
1082 | imbalance /= zone_page_state(zone, NR_INACTIVE) + 1; | ||
1083 | } else | ||
1084 | imbalance = mem_cgroup_reclaim_imbalance(sc->mem_cgroup); | ||
1085 | |||
1086 | /* | ||
1087 | * Reduce the effect of imbalance if swappiness is low, | ||
1088 | * this means for a swappiness very low, the imbalance | ||
1089 | * must be much higher than 100 for this logic to make | ||
1090 | * the difference. | ||
1091 | * | ||
1092 | * Max temporary value is vm_total_pages*100. | ||
1093 | */ | ||
1094 | imbalance *= (vm_swappiness + 1); | ||
1095 | imbalance /= 100; | ||
1096 | |||
1097 | /* | ||
1098 | * If not much of the ram is mapped, makes the imbalance | ||
1099 | * less relevant, it's high priority we refill the inactive | ||
1100 | * list with mapped pages only in presence of high ratio of | ||
1101 | * mapped pages. | ||
1102 | * | ||
1103 | * Max temporary value is vm_total_pages*100. | ||
1104 | */ | ||
1105 | imbalance *= mapped_ratio; | ||
1106 | imbalance /= 100; | ||
1107 | |||
1108 | /* apply imbalance feedback to swap_tendency */ | ||
1109 | swap_tendency += imbalance; | ||
1110 | |||
1111 | /* | ||
1112 | * Now use this metric to decide whether to start moving mapped | ||
1113 | * memory onto the inactive list. | ||
1114 | */ | ||
1115 | if (swap_tendency >= 100) | ||
1116 | reclaim_mapped = 1; | ||
1117 | |||
1118 | return reclaim_mapped; | ||
1119 | } | 1039 | } |
1120 | 1040 | ||
1121 | /* | 1041 | /* |
@@ -1138,7 +1058,7 @@ static int calc_reclaim_mapped(struct scan_control *sc, struct zone *zone, | |||
1138 | 1058 | ||
1139 | 1059 | ||
1140 | static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | 1060 | static void shrink_active_list(unsigned long nr_pages, struct zone *zone, |
1141 | struct scan_control *sc, int priority) | 1061 | struct scan_control *sc, int priority, int file) |
1142 | { | 1062 | { |
1143 | unsigned long pgmoved; | 1063 | unsigned long pgmoved; |
1144 | int pgdeactivate = 0; | 1064 | int pgdeactivate = 0; |
@@ -1148,43 +1068,42 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
1148 | LIST_HEAD(l_inactive); | 1068 | LIST_HEAD(l_inactive); |
1149 | struct page *page; | 1069 | struct page *page; |
1150 | struct pagevec pvec; | 1070 | struct pagevec pvec; |
1151 | int reclaim_mapped = 0; | 1071 | enum lru_list lru; |
1152 | |||
1153 | if (sc->may_swap) | ||
1154 | reclaim_mapped = calc_reclaim_mapped(sc, zone, priority); | ||
1155 | 1072 | ||
1156 | lru_add_drain(); | 1073 | lru_add_drain(); |
1157 | spin_lock_irq(&zone->lru_lock); | 1074 | spin_lock_irq(&zone->lru_lock); |
1158 | pgmoved = sc->isolate_pages(nr_pages, &l_hold, &pgscanned, sc->order, | 1075 | pgmoved = sc->isolate_pages(nr_pages, &l_hold, &pgscanned, sc->order, |
1159 | ISOLATE_ACTIVE, zone, | 1076 | ISOLATE_ACTIVE, zone, |
1160 | sc->mem_cgroup, 1); | 1077 | sc->mem_cgroup, 1, file); |
1161 | /* | 1078 | /* |
1162 | * zone->pages_scanned is used for detect zone's oom | 1079 | * zone->pages_scanned is used for detect zone's oom |
1163 | * mem_cgroup remembers nr_scan by itself. | 1080 | * mem_cgroup remembers nr_scan by itself. |
1164 | */ | 1081 | */ |
1165 | if (scan_global_lru(sc)) | 1082 | if (scan_global_lru(sc)) { |
1166 | zone->pages_scanned += pgscanned; | 1083 | zone->pages_scanned += pgscanned; |
1084 | zone->recent_scanned[!!file] += pgmoved; | ||
1085 | } | ||
1167 | 1086 | ||
1168 | __mod_zone_page_state(zone, NR_ACTIVE, -pgmoved); | 1087 | if (file) |
1088 | __mod_zone_page_state(zone, NR_ACTIVE_FILE, -pgmoved); | ||
1089 | else | ||
1090 | __mod_zone_page_state(zone, NR_ACTIVE_ANON, -pgmoved); | ||
1169 | spin_unlock_irq(&zone->lru_lock); | 1091 | spin_unlock_irq(&zone->lru_lock); |
1170 | 1092 | ||
1171 | while (!list_empty(&l_hold)) { | 1093 | while (!list_empty(&l_hold)) { |
1172 | cond_resched(); | 1094 | cond_resched(); |
1173 | page = lru_to_page(&l_hold); | 1095 | page = lru_to_page(&l_hold); |
1174 | list_del(&page->lru); | 1096 | list_del(&page->lru); |
1175 | if (page_mapped(page)) { | ||
1176 | if (!reclaim_mapped || | ||
1177 | (total_swap_pages == 0 && PageAnon(page)) || | ||
1178 | page_referenced(page, 0, sc->mem_cgroup)) { | ||
1179 | list_add(&page->lru, &l_active); | ||
1180 | continue; | ||
1181 | } | ||
1182 | } | ||
1183 | list_add(&page->lru, &l_inactive); | 1097 | list_add(&page->lru, &l_inactive); |
1184 | } | 1098 | } |
1185 | 1099 | ||
1100 | /* | ||
1101 | * Now put the pages back on the appropriate [file or anon] inactive | ||
1102 | * and active lists. | ||
1103 | */ | ||
1186 | pagevec_init(&pvec, 1); | 1104 | pagevec_init(&pvec, 1); |
1187 | pgmoved = 0; | 1105 | pgmoved = 0; |
1106 | lru = LRU_BASE + file * LRU_FILE; | ||
1188 | spin_lock_irq(&zone->lru_lock); | 1107 | spin_lock_irq(&zone->lru_lock); |
1189 | while (!list_empty(&l_inactive)) { | 1108 | while (!list_empty(&l_inactive)) { |
1190 | page = lru_to_page(&l_inactive); | 1109 | page = lru_to_page(&l_inactive); |
@@ -1194,11 +1113,11 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
1194 | VM_BUG_ON(!PageActive(page)); | 1113 | VM_BUG_ON(!PageActive(page)); |
1195 | ClearPageActive(page); | 1114 | ClearPageActive(page); |
1196 | 1115 | ||
1197 | list_move(&page->lru, &zone->lru[LRU_INACTIVE].list); | 1116 | list_move(&page->lru, &zone->lru[lru].list); |
1198 | mem_cgroup_move_lists(page, false); | 1117 | mem_cgroup_move_lists(page, false); |
1199 | pgmoved++; | 1118 | pgmoved++; |
1200 | if (!pagevec_add(&pvec, page)) { | 1119 | if (!pagevec_add(&pvec, page)) { |
1201 | __mod_zone_page_state(zone, NR_INACTIVE, pgmoved); | 1120 | __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved); |
1202 | spin_unlock_irq(&zone->lru_lock); | 1121 | spin_unlock_irq(&zone->lru_lock); |
1203 | pgdeactivate += pgmoved; | 1122 | pgdeactivate += pgmoved; |
1204 | pgmoved = 0; | 1123 | pgmoved = 0; |
@@ -1208,7 +1127,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
1208 | spin_lock_irq(&zone->lru_lock); | 1127 | spin_lock_irq(&zone->lru_lock); |
1209 | } | 1128 | } |
1210 | } | 1129 | } |
1211 | __mod_zone_page_state(zone, NR_INACTIVE, pgmoved); | 1130 | __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved); |
1212 | pgdeactivate += pgmoved; | 1131 | pgdeactivate += pgmoved; |
1213 | if (buffer_heads_over_limit) { | 1132 | if (buffer_heads_over_limit) { |
1214 | spin_unlock_irq(&zone->lru_lock); | 1133 | spin_unlock_irq(&zone->lru_lock); |
@@ -1217,6 +1136,7 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
1217 | } | 1136 | } |
1218 | 1137 | ||
1219 | pgmoved = 0; | 1138 | pgmoved = 0; |
1139 | lru = LRU_ACTIVE + file * LRU_FILE; | ||
1220 | while (!list_empty(&l_active)) { | 1140 | while (!list_empty(&l_active)) { |
1221 | page = lru_to_page(&l_active); | 1141 | page = lru_to_page(&l_active); |
1222 | prefetchw_prev_lru_page(page, &l_active, flags); | 1142 | prefetchw_prev_lru_page(page, &l_active, flags); |
@@ -1224,11 +1144,11 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
1224 | SetPageLRU(page); | 1144 | SetPageLRU(page); |
1225 | VM_BUG_ON(!PageActive(page)); | 1145 | VM_BUG_ON(!PageActive(page)); |
1226 | 1146 | ||
1227 | list_move(&page->lru, &zone->lru[LRU_ACTIVE].list); | 1147 | list_move(&page->lru, &zone->lru[lru].list); |
1228 | mem_cgroup_move_lists(page, true); | 1148 | mem_cgroup_move_lists(page, true); |
1229 | pgmoved++; | 1149 | pgmoved++; |
1230 | if (!pagevec_add(&pvec, page)) { | 1150 | if (!pagevec_add(&pvec, page)) { |
1231 | __mod_zone_page_state(zone, NR_ACTIVE, pgmoved); | 1151 | __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved); |
1232 | pgmoved = 0; | 1152 | pgmoved = 0; |
1233 | spin_unlock_irq(&zone->lru_lock); | 1153 | spin_unlock_irq(&zone->lru_lock); |
1234 | if (vm_swap_full()) | 1154 | if (vm_swap_full()) |
@@ -1237,7 +1157,8 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
1237 | spin_lock_irq(&zone->lru_lock); | 1157 | spin_lock_irq(&zone->lru_lock); |
1238 | } | 1158 | } |
1239 | } | 1159 | } |
1240 | __mod_zone_page_state(zone, NR_ACTIVE, pgmoved); | 1160 | __mod_zone_page_state(zone, NR_LRU_BASE + lru, pgmoved); |
1161 | zone->recent_rotated[!!file] += pgmoved; | ||
1241 | 1162 | ||
1242 | __count_zone_vm_events(PGREFILL, zone, pgscanned); | 1163 | __count_zone_vm_events(PGREFILL, zone, pgscanned); |
1243 | __count_vm_events(PGDEACTIVATE, pgdeactivate); | 1164 | __count_vm_events(PGDEACTIVATE, pgdeactivate); |
@@ -1248,16 +1169,103 @@ static void shrink_active_list(unsigned long nr_pages, struct zone *zone, | |||
1248 | pagevec_release(&pvec); | 1169 | pagevec_release(&pvec); |
1249 | } | 1170 | } |
1250 | 1171 | ||
1251 | static unsigned long shrink_list(enum lru_list l, unsigned long nr_to_scan, | 1172 | static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, |
1252 | struct zone *zone, struct scan_control *sc, int priority) | 1173 | struct zone *zone, struct scan_control *sc, int priority) |
1253 | { | 1174 | { |
1254 | if (l == LRU_ACTIVE) { | 1175 | int file = is_file_lru(lru); |
1255 | shrink_active_list(nr_to_scan, zone, sc, priority); | 1176 | |
1177 | if (lru == LRU_ACTIVE_ANON || lru == LRU_ACTIVE_FILE) { | ||
1178 | shrink_active_list(nr_to_scan, zone, sc, priority, file); | ||
1256 | return 0; | 1179 | return 0; |
1257 | } | 1180 | } |
1258 | return shrink_inactive_list(nr_to_scan, zone, sc); | 1181 | return shrink_inactive_list(nr_to_scan, zone, sc, file); |
1182 | } | ||
1183 | |||
1184 | /* | ||
1185 | * Determine how aggressively the anon and file LRU lists should be | ||
1186 | * scanned. The relative value of each set of LRU lists is determined | ||
1187 | * by looking at the fraction of the pages scanned we did rotate back | ||
1188 | * onto the active list instead of evict. | ||
1189 | * | ||
1190 | * percent[0] specifies how much pressure to put on ram/swap backed | ||
1191 | * memory, while percent[1] determines pressure on the file LRUs. | ||
1192 | */ | ||
1193 | static void get_scan_ratio(struct zone *zone, struct scan_control *sc, | ||
1194 | unsigned long *percent) | ||
1195 | { | ||
1196 | unsigned long anon, file, free; | ||
1197 | unsigned long anon_prio, file_prio; | ||
1198 | unsigned long ap, fp; | ||
1199 | |||
1200 | anon = zone_page_state(zone, NR_ACTIVE_ANON) + | ||
1201 | zone_page_state(zone, NR_INACTIVE_ANON); | ||
1202 | file = zone_page_state(zone, NR_ACTIVE_FILE) + | ||
1203 | zone_page_state(zone, NR_INACTIVE_FILE); | ||
1204 | free = zone_page_state(zone, NR_FREE_PAGES); | ||
1205 | |||
1206 | /* If we have no swap space, do not bother scanning anon pages. */ | ||
1207 | if (nr_swap_pages <= 0) { | ||
1208 | percent[0] = 0; | ||
1209 | percent[1] = 100; | ||
1210 | return; | ||
1211 | } | ||
1212 | |||
1213 | /* If we have very few page cache pages, force-scan anon pages. */ | ||
1214 | if (unlikely(file + free <= zone->pages_high)) { | ||
1215 | percent[0] = 100; | ||
1216 | percent[1] = 0; | ||
1217 | return; | ||
1218 | } | ||
1219 | |||
1220 | /* | ||
1221 | * OK, so we have swap space and a fair amount of page cache | ||
1222 | * pages. We use the recently rotated / recently scanned | ||
1223 | * ratios to determine how valuable each cache is. | ||
1224 | * | ||
1225 | * Because workloads change over time (and to avoid overflow) | ||
1226 | * we keep these statistics as a floating average, which ends | ||
1227 | * up weighing recent references more than old ones. | ||
1228 | * | ||
1229 | * anon in [0], file in [1] | ||
1230 | */ | ||
1231 | if (unlikely(zone->recent_scanned[0] > anon / 4)) { | ||
1232 | spin_lock_irq(&zone->lru_lock); | ||
1233 | zone->recent_scanned[0] /= 2; | ||
1234 | zone->recent_rotated[0] /= 2; | ||
1235 | spin_unlock_irq(&zone->lru_lock); | ||
1236 | } | ||
1237 | |||
1238 | if (unlikely(zone->recent_scanned[1] > file / 4)) { | ||
1239 | spin_lock_irq(&zone->lru_lock); | ||
1240 | zone->recent_scanned[1] /= 2; | ||
1241 | zone->recent_rotated[1] /= 2; | ||
1242 | spin_unlock_irq(&zone->lru_lock); | ||
1243 | } | ||
1244 | |||
1245 | /* | ||
1246 | * With swappiness at 100, anonymous and file have the same priority. | ||
1247 | * This scanning priority is essentially the inverse of IO cost. | ||
1248 | */ | ||
1249 | anon_prio = sc->swappiness; | ||
1250 | file_prio = 200 - sc->swappiness; | ||
1251 | |||
1252 | /* | ||
1253 | * anon recent_rotated[0] | ||
1254 | * %anon = 100 * ----------- / ----------------- * IO cost | ||
1255 | * anon + file rotate_sum | ||
1256 | */ | ||
1257 | ap = (anon_prio + 1) * (zone->recent_scanned[0] + 1); | ||
1258 | ap /= zone->recent_rotated[0] + 1; | ||
1259 | |||
1260 | fp = (file_prio + 1) * (zone->recent_scanned[1] + 1); | ||
1261 | fp /= zone->recent_rotated[1] + 1; | ||
1262 | |||
1263 | /* Normalize to percentages */ | ||
1264 | percent[0] = 100 * ap / (ap + fp + 1); | ||
1265 | percent[1] = 100 - percent[0]; | ||
1259 | } | 1266 | } |
1260 | 1267 | ||
1268 | |||
1261 | /* | 1269 | /* |
1262 | * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. | 1270 | * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. |
1263 | */ | 1271 | */ |
@@ -1267,36 +1275,43 @@ static unsigned long shrink_zone(int priority, struct zone *zone, | |||
1267 | unsigned long nr[NR_LRU_LISTS]; | 1275 | unsigned long nr[NR_LRU_LISTS]; |
1268 | unsigned long nr_to_scan; | 1276 | unsigned long nr_to_scan; |
1269 | unsigned long nr_reclaimed = 0; | 1277 | unsigned long nr_reclaimed = 0; |
1278 | unsigned long percent[2]; /* anon @ 0; file @ 1 */ | ||
1270 | enum lru_list l; | 1279 | enum lru_list l; |
1271 | 1280 | ||
1272 | if (scan_global_lru(sc)) { | 1281 | get_scan_ratio(zone, sc, percent); |
1273 | /* | 1282 | |
1274 | * Add one to nr_to_scan just to make sure that the kernel | 1283 | for_each_lru(l) { |
1275 | * will slowly sift through the active list. | 1284 | if (scan_global_lru(sc)) { |
1276 | */ | 1285 | int file = is_file_lru(l); |
1277 | for_each_lru(l) { | 1286 | int scan; |
1278 | zone->lru[l].nr_scan += (zone_page_state(zone, | 1287 | /* |
1279 | NR_LRU_BASE + l) >> priority) + 1; | 1288 | * Add one to nr_to_scan just to make sure that the |
1289 | * kernel will slowly sift through each list. | ||
1290 | */ | ||
1291 | scan = zone_page_state(zone, NR_LRU_BASE + l); | ||
1292 | if (priority) { | ||
1293 | scan >>= priority; | ||
1294 | scan = (scan * percent[file]) / 100; | ||
1295 | } | ||
1296 | zone->lru[l].nr_scan += scan + 1; | ||
1280 | nr[l] = zone->lru[l].nr_scan; | 1297 | nr[l] = zone->lru[l].nr_scan; |
1281 | if (nr[l] >= sc->swap_cluster_max) | 1298 | if (nr[l] >= sc->swap_cluster_max) |
1282 | zone->lru[l].nr_scan = 0; | 1299 | zone->lru[l].nr_scan = 0; |
1283 | else | 1300 | else |
1284 | nr[l] = 0; | 1301 | nr[l] = 0; |
1302 | } else { | ||
1303 | /* | ||
1304 | * This reclaim occurs not because zone memory shortage | ||
1305 | * but because memory controller hits its limit. | ||
1306 | * Don't modify zone reclaim related data. | ||
1307 | */ | ||
1308 | nr[l] = mem_cgroup_calc_reclaim(sc->mem_cgroup, zone, | ||
1309 | priority, l); | ||
1285 | } | 1310 | } |
1286 | } else { | ||
1287 | /* | ||
1288 | * This reclaim occurs not because zone memory shortage but | ||
1289 | * because memory controller hits its limit. | ||
1290 | * Then, don't modify zone reclaim related data. | ||
1291 | */ | ||
1292 | nr[LRU_ACTIVE] = mem_cgroup_calc_reclaim(sc->mem_cgroup, | ||
1293 | zone, priority, LRU_ACTIVE); | ||
1294 | |||
1295 | nr[LRU_INACTIVE] = mem_cgroup_calc_reclaim(sc->mem_cgroup, | ||
1296 | zone, priority, LRU_INACTIVE); | ||
1297 | } | 1311 | } |
1298 | 1312 | ||
1299 | while (nr[LRU_ACTIVE] || nr[LRU_INACTIVE]) { | 1313 | while (nr[LRU_ACTIVE_ANON] || nr[LRU_INACTIVE_ANON] || |
1314 | nr[LRU_ACTIVE_FILE] || nr[LRU_INACTIVE_FILE]) { | ||
1300 | for_each_lru(l) { | 1315 | for_each_lru(l) { |
1301 | if (nr[l]) { | 1316 | if (nr[l]) { |
1302 | nr_to_scan = min(nr[l], | 1317 | nr_to_scan = min(nr[l], |
@@ -1369,7 +1384,7 @@ static unsigned long shrink_zones(int priority, struct zonelist *zonelist, | |||
1369 | 1384 | ||
1370 | return nr_reclaimed; | 1385 | return nr_reclaimed; |
1371 | } | 1386 | } |
1372 | 1387 | ||
1373 | /* | 1388 | /* |
1374 | * This is the main entry point to direct page reclaim. | 1389 | * This is the main entry point to direct page reclaim. |
1375 | * | 1390 | * |
@@ -1412,8 +1427,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, | |||
1412 | if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) | 1427 | if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) |
1413 | continue; | 1428 | continue; |
1414 | 1429 | ||
1415 | lru_pages += zone_page_state(zone, NR_ACTIVE) | 1430 | lru_pages += zone_lru_pages(zone); |
1416 | + zone_page_state(zone, NR_INACTIVE); | ||
1417 | } | 1431 | } |
1418 | } | 1432 | } |
1419 | 1433 | ||
@@ -1615,8 +1629,7 @@ loop_again: | |||
1615 | for (i = 0; i <= end_zone; i++) { | 1629 | for (i = 0; i <= end_zone; i++) { |
1616 | struct zone *zone = pgdat->node_zones + i; | 1630 | struct zone *zone = pgdat->node_zones + i; |
1617 | 1631 | ||
1618 | lru_pages += zone_page_state(zone, NR_ACTIVE) | 1632 | lru_pages += zone_lru_pages(zone); |
1619 | + zone_page_state(zone, NR_INACTIVE); | ||
1620 | } | 1633 | } |
1621 | 1634 | ||
1622 | /* | 1635 | /* |
@@ -1660,8 +1673,7 @@ loop_again: | |||
1660 | if (zone_is_all_unreclaimable(zone)) | 1673 | if (zone_is_all_unreclaimable(zone)) |
1661 | continue; | 1674 | continue; |
1662 | if (nr_slab == 0 && zone->pages_scanned >= | 1675 | if (nr_slab == 0 && zone->pages_scanned >= |
1663 | (zone_page_state(zone, NR_ACTIVE) | 1676 | (zone_lru_pages(zone) * 6)) |
1664 | + zone_page_state(zone, NR_INACTIVE)) * 6) | ||
1665 | zone_set_flag(zone, | 1677 | zone_set_flag(zone, |
1666 | ZONE_ALL_UNRECLAIMABLE); | 1678 | ZONE_ALL_UNRECLAIMABLE); |
1667 | /* | 1679 | /* |
@@ -1715,7 +1727,7 @@ out: | |||
1715 | 1727 | ||
1716 | /* | 1728 | /* |
1717 | * The background pageout daemon, started as a kernel thread | 1729 | * The background pageout daemon, started as a kernel thread |
1718 | * from the init process. | 1730 | * from the init process. |
1719 | * | 1731 | * |
1720 | * This basically trickles out pages so that we have _some_ | 1732 | * This basically trickles out pages so that we have _some_ |
1721 | * free memory available even if there is no other activity | 1733 | * free memory available even if there is no other activity |
@@ -1809,6 +1821,14 @@ void wakeup_kswapd(struct zone *zone, int order) | |||
1809 | wake_up_interruptible(&pgdat->kswapd_wait); | 1821 | wake_up_interruptible(&pgdat->kswapd_wait); |
1810 | } | 1822 | } |
1811 | 1823 | ||
1824 | unsigned long global_lru_pages(void) | ||
1825 | { | ||
1826 | return global_page_state(NR_ACTIVE_ANON) | ||
1827 | + global_page_state(NR_ACTIVE_FILE) | ||
1828 | + global_page_state(NR_INACTIVE_ANON) | ||
1829 | + global_page_state(NR_INACTIVE_FILE); | ||
1830 | } | ||
1831 | |||
1812 | #ifdef CONFIG_PM | 1832 | #ifdef CONFIG_PM |
1813 | /* | 1833 | /* |
1814 | * Helper function for shrink_all_memory(). Tries to reclaim 'nr_pages' pages | 1834 | * Helper function for shrink_all_memory(). Tries to reclaim 'nr_pages' pages |
@@ -1834,7 +1854,8 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio, | |||
1834 | 1854 | ||
1835 | for_each_lru(l) { | 1855 | for_each_lru(l) { |
1836 | /* For pass = 0 we don't shrink the active list */ | 1856 | /* For pass = 0 we don't shrink the active list */ |
1837 | if (pass == 0 && l == LRU_ACTIVE) | 1857 | if (pass == 0 && |
1858 | (l == LRU_ACTIVE || l == LRU_ACTIVE_FILE)) | ||
1838 | continue; | 1859 | continue; |
1839 | 1860 | ||
1840 | zone->lru[l].nr_scan += | 1861 | zone->lru[l].nr_scan += |
@@ -1856,11 +1877,6 @@ static unsigned long shrink_all_zones(unsigned long nr_pages, int prio, | |||
1856 | return ret; | 1877 | return ret; |
1857 | } | 1878 | } |
1858 | 1879 | ||
1859 | static unsigned long count_lru_pages(void) | ||
1860 | { | ||
1861 | return global_page_state(NR_ACTIVE) + global_page_state(NR_INACTIVE); | ||
1862 | } | ||
1863 | |||
1864 | /* | 1880 | /* |
1865 | * Try to free `nr_pages' of memory, system-wide, and return the number of | 1881 | * Try to free `nr_pages' of memory, system-wide, and return the number of |
1866 | * freed pages. | 1882 | * freed pages. |
@@ -1886,7 +1902,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages) | |||
1886 | 1902 | ||
1887 | current->reclaim_state = &reclaim_state; | 1903 | current->reclaim_state = &reclaim_state; |
1888 | 1904 | ||
1889 | lru_pages = count_lru_pages(); | 1905 | lru_pages = global_lru_pages(); |
1890 | nr_slab = global_page_state(NR_SLAB_RECLAIMABLE); | 1906 | nr_slab = global_page_state(NR_SLAB_RECLAIMABLE); |
1891 | /* If slab caches are huge, it's better to hit them first */ | 1907 | /* If slab caches are huge, it's better to hit them first */ |
1892 | while (nr_slab >= lru_pages) { | 1908 | while (nr_slab >= lru_pages) { |
@@ -1929,7 +1945,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages) | |||
1929 | 1945 | ||
1930 | reclaim_state.reclaimed_slab = 0; | 1946 | reclaim_state.reclaimed_slab = 0; |
1931 | shrink_slab(sc.nr_scanned, sc.gfp_mask, | 1947 | shrink_slab(sc.nr_scanned, sc.gfp_mask, |
1932 | count_lru_pages()); | 1948 | global_lru_pages()); |
1933 | ret += reclaim_state.reclaimed_slab; | 1949 | ret += reclaim_state.reclaimed_slab; |
1934 | if (ret >= nr_pages) | 1950 | if (ret >= nr_pages) |
1935 | goto out; | 1951 | goto out; |
@@ -1946,7 +1962,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages) | |||
1946 | if (!ret) { | 1962 | if (!ret) { |
1947 | do { | 1963 | do { |
1948 | reclaim_state.reclaimed_slab = 0; | 1964 | reclaim_state.reclaimed_slab = 0; |
1949 | shrink_slab(nr_pages, sc.gfp_mask, count_lru_pages()); | 1965 | shrink_slab(nr_pages, sc.gfp_mask, global_lru_pages()); |
1950 | ret += reclaim_state.reclaimed_slab; | 1966 | ret += reclaim_state.reclaimed_slab; |
1951 | } while (ret < nr_pages && reclaim_state.reclaimed_slab > 0); | 1967 | } while (ret < nr_pages && reclaim_state.reclaimed_slab > 0); |
1952 | } | 1968 | } |