diff options
Diffstat (limited to 'mm/vmscan.c')
-rw-r--r-- | mm/vmscan.c | 462 |
1 files changed, 184 insertions, 278 deletions
diff --git a/mm/vmscan.c b/mm/vmscan.c index 8deb5f4da4d9..347b3ff2a478 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c | |||
@@ -78,6 +78,9 @@ struct scan_control { | |||
78 | 78 | ||
79 | int order; | 79 | int order; |
80 | 80 | ||
81 | /* Scan (total_size >> priority) pages at once */ | ||
82 | int priority; | ||
83 | |||
81 | /* | 84 | /* |
82 | * The memory cgroup that hit its limit and as a result is the | 85 | * The memory cgroup that hit its limit and as a result is the |
83 | * primary target of this reclaim invocation. | 86 | * primary target of this reclaim invocation. |
@@ -91,11 +94,6 @@ struct scan_control { | |||
91 | nodemask_t *nodemask; | 94 | nodemask_t *nodemask; |
92 | }; | 95 | }; |
93 | 96 | ||
94 | struct mem_cgroup_zone { | ||
95 | struct mem_cgroup *mem_cgroup; | ||
96 | struct zone *zone; | ||
97 | }; | ||
98 | |||
99 | #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) | 97 | #define lru_to_page(_head) (list_entry((_head)->prev, struct page, lru)) |
100 | 98 | ||
101 | #ifdef ARCH_HAS_PREFETCH | 99 | #ifdef ARCH_HAS_PREFETCH |
@@ -147,24 +145,14 @@ static bool global_reclaim(struct scan_control *sc) | |||
147 | } | 145 | } |
148 | #endif | 146 | #endif |
149 | 147 | ||
150 | static struct zone_reclaim_stat *get_reclaim_stat(struct mem_cgroup_zone *mz) | 148 | static unsigned long get_lru_size(struct lruvec *lruvec, enum lru_list lru) |
151 | { | ||
152 | return &mem_cgroup_zone_lruvec(mz->zone, mz->mem_cgroup)->reclaim_stat; | ||
153 | } | ||
154 | |||
155 | static unsigned long zone_nr_lru_pages(struct mem_cgroup_zone *mz, | ||
156 | enum lru_list lru) | ||
157 | { | 149 | { |
158 | if (!mem_cgroup_disabled()) | 150 | if (!mem_cgroup_disabled()) |
159 | return mem_cgroup_zone_nr_lru_pages(mz->mem_cgroup, | 151 | return mem_cgroup_get_lru_size(lruvec, lru); |
160 | zone_to_nid(mz->zone), | ||
161 | zone_idx(mz->zone), | ||
162 | BIT(lru)); | ||
163 | 152 | ||
164 | return zone_page_state(mz->zone, NR_LRU_BASE + lru); | 153 | return zone_page_state(lruvec_zone(lruvec), NR_LRU_BASE + lru); |
165 | } | 154 | } |
166 | 155 | ||
167 | |||
168 | /* | 156 | /* |
169 | * Add a shrinker callback to be called from the vm | 157 | * Add a shrinker callback to be called from the vm |
170 | */ | 158 | */ |
@@ -626,7 +614,6 @@ enum page_references { | |||
626 | }; | 614 | }; |
627 | 615 | ||
628 | static enum page_references page_check_references(struct page *page, | 616 | static enum page_references page_check_references(struct page *page, |
629 | struct mem_cgroup_zone *mz, | ||
630 | struct scan_control *sc) | 617 | struct scan_control *sc) |
631 | { | 618 | { |
632 | int referenced_ptes, referenced_page; | 619 | int referenced_ptes, referenced_page; |
@@ -685,9 +672,8 @@ static enum page_references page_check_references(struct page *page, | |||
685 | * shrink_page_list() returns the number of reclaimed pages | 672 | * shrink_page_list() returns the number of reclaimed pages |
686 | */ | 673 | */ |
687 | static unsigned long shrink_page_list(struct list_head *page_list, | 674 | static unsigned long shrink_page_list(struct list_head *page_list, |
688 | struct mem_cgroup_zone *mz, | 675 | struct zone *zone, |
689 | struct scan_control *sc, | 676 | struct scan_control *sc, |
690 | int priority, | ||
691 | unsigned long *ret_nr_dirty, | 677 | unsigned long *ret_nr_dirty, |
692 | unsigned long *ret_nr_writeback) | 678 | unsigned long *ret_nr_writeback) |
693 | { | 679 | { |
@@ -716,7 +702,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
716 | goto keep; | 702 | goto keep; |
717 | 703 | ||
718 | VM_BUG_ON(PageActive(page)); | 704 | VM_BUG_ON(PageActive(page)); |
719 | VM_BUG_ON(page_zone(page) != mz->zone); | 705 | VM_BUG_ON(page_zone(page) != zone); |
720 | 706 | ||
721 | sc->nr_scanned++; | 707 | sc->nr_scanned++; |
722 | 708 | ||
@@ -739,7 +725,7 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
739 | goto keep; | 725 | goto keep; |
740 | } | 726 | } |
741 | 727 | ||
742 | references = page_check_references(page, mz, sc); | 728 | references = page_check_references(page, sc); |
743 | switch (references) { | 729 | switch (references) { |
744 | case PAGEREF_ACTIVATE: | 730 | case PAGEREF_ACTIVATE: |
745 | goto activate_locked; | 731 | goto activate_locked; |
@@ -790,7 +776,8 @@ static unsigned long shrink_page_list(struct list_head *page_list, | |||
790 | * unless under significant pressure. | 776 | * unless under significant pressure. |
791 | */ | 777 | */ |
792 | if (page_is_file_cache(page) && | 778 | if (page_is_file_cache(page) && |
793 | (!current_is_kswapd() || priority >= DEF_PRIORITY - 2)) { | 779 | (!current_is_kswapd() || |
780 | sc->priority >= DEF_PRIORITY - 2)) { | ||
794 | /* | 781 | /* |
795 | * Immediately reclaim when written back. | 782 | * Immediately reclaim when written back. |
796 | * Similar in principal to deactivate_page() | 783 | * Similar in principal to deactivate_page() |
@@ -928,7 +915,7 @@ keep: | |||
928 | * will encounter the same problem | 915 | * will encounter the same problem |
929 | */ | 916 | */ |
930 | if (nr_dirty && nr_dirty == nr_congested && global_reclaim(sc)) | 917 | if (nr_dirty && nr_dirty == nr_congested && global_reclaim(sc)) |
931 | zone_set_flag(mz->zone, ZONE_CONGESTED); | 918 | zone_set_flag(zone, ZONE_CONGESTED); |
932 | 919 | ||
933 | free_hot_cold_page_list(&free_pages, 1); | 920 | free_hot_cold_page_list(&free_pages, 1); |
934 | 921 | ||
@@ -949,29 +936,14 @@ keep: | |||
949 | * | 936 | * |
950 | * returns 0 on success, -ve errno on failure. | 937 | * returns 0 on success, -ve errno on failure. |
951 | */ | 938 | */ |
952 | int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file) | 939 | int __isolate_lru_page(struct page *page, isolate_mode_t mode) |
953 | { | 940 | { |
954 | bool all_lru_mode; | ||
955 | int ret = -EINVAL; | 941 | int ret = -EINVAL; |
956 | 942 | ||
957 | /* Only take pages on the LRU. */ | 943 | /* Only take pages on the LRU. */ |
958 | if (!PageLRU(page)) | 944 | if (!PageLRU(page)) |
959 | return ret; | 945 | return ret; |
960 | 946 | ||
961 | all_lru_mode = (mode & (ISOLATE_ACTIVE|ISOLATE_INACTIVE)) == | ||
962 | (ISOLATE_ACTIVE|ISOLATE_INACTIVE); | ||
963 | |||
964 | /* | ||
965 | * When checking the active state, we need to be sure we are | ||
966 | * dealing with comparible boolean values. Take the logical not | ||
967 | * of each. | ||
968 | */ | ||
969 | if (!all_lru_mode && !PageActive(page) != !(mode & ISOLATE_ACTIVE)) | ||
970 | return ret; | ||
971 | |||
972 | if (!all_lru_mode && !!page_is_file_cache(page) != file) | ||
973 | return ret; | ||
974 | |||
975 | /* Do not give back unevictable pages for compaction */ | 947 | /* Do not give back unevictable pages for compaction */ |
976 | if (PageUnevictable(page)) | 948 | if (PageUnevictable(page)) |
977 | return ret; | 949 | return ret; |
@@ -1039,47 +1011,39 @@ int __isolate_lru_page(struct page *page, isolate_mode_t mode, int file) | |||
1039 | * Appropriate locks must be held before calling this function. | 1011 | * Appropriate locks must be held before calling this function. |
1040 | * | 1012 | * |
1041 | * @nr_to_scan: The number of pages to look through on the list. | 1013 | * @nr_to_scan: The number of pages to look through on the list. |
1042 | * @mz: The mem_cgroup_zone to pull pages from. | 1014 | * @lruvec: The LRU vector to pull pages from. |
1043 | * @dst: The temp list to put pages on to. | 1015 | * @dst: The temp list to put pages on to. |
1044 | * @nr_scanned: The number of pages that were scanned. | 1016 | * @nr_scanned: The number of pages that were scanned. |
1045 | * @sc: The scan_control struct for this reclaim session | 1017 | * @sc: The scan_control struct for this reclaim session |
1046 | * @mode: One of the LRU isolation modes | 1018 | * @mode: One of the LRU isolation modes |
1047 | * @active: True [1] if isolating active pages | 1019 | * @lru: LRU list id for isolating |
1048 | * @file: True [1] if isolating file [!anon] pages | ||
1049 | * | 1020 | * |
1050 | * returns how many pages were moved onto *@dst. | 1021 | * returns how many pages were moved onto *@dst. |
1051 | */ | 1022 | */ |
1052 | static unsigned long isolate_lru_pages(unsigned long nr_to_scan, | 1023 | static unsigned long isolate_lru_pages(unsigned long nr_to_scan, |
1053 | struct mem_cgroup_zone *mz, struct list_head *dst, | 1024 | struct lruvec *lruvec, struct list_head *dst, |
1054 | unsigned long *nr_scanned, struct scan_control *sc, | 1025 | unsigned long *nr_scanned, struct scan_control *sc, |
1055 | isolate_mode_t mode, int active, int file) | 1026 | isolate_mode_t mode, enum lru_list lru) |
1056 | { | 1027 | { |
1057 | struct lruvec *lruvec; | 1028 | struct list_head *src = &lruvec->lists[lru]; |
1058 | struct list_head *src; | ||
1059 | unsigned long nr_taken = 0; | 1029 | unsigned long nr_taken = 0; |
1060 | unsigned long scan; | 1030 | unsigned long scan; |
1061 | int lru = LRU_BASE; | ||
1062 | |||
1063 | lruvec = mem_cgroup_zone_lruvec(mz->zone, mz->mem_cgroup); | ||
1064 | if (active) | ||
1065 | lru += LRU_ACTIVE; | ||
1066 | if (file) | ||
1067 | lru += LRU_FILE; | ||
1068 | src = &lruvec->lists[lru]; | ||
1069 | 1031 | ||
1070 | for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) { | 1032 | for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) { |
1071 | struct page *page; | 1033 | struct page *page; |
1034 | int nr_pages; | ||
1072 | 1035 | ||
1073 | page = lru_to_page(src); | 1036 | page = lru_to_page(src); |
1074 | prefetchw_prev_lru_page(page, src, flags); | 1037 | prefetchw_prev_lru_page(page, src, flags); |
1075 | 1038 | ||
1076 | VM_BUG_ON(!PageLRU(page)); | 1039 | VM_BUG_ON(!PageLRU(page)); |
1077 | 1040 | ||
1078 | switch (__isolate_lru_page(page, mode, file)) { | 1041 | switch (__isolate_lru_page(page, mode)) { |
1079 | case 0: | 1042 | case 0: |
1080 | mem_cgroup_lru_del(page); | 1043 | nr_pages = hpage_nr_pages(page); |
1044 | mem_cgroup_update_lru_size(lruvec, lru, -nr_pages); | ||
1081 | list_move(&page->lru, dst); | 1045 | list_move(&page->lru, dst); |
1082 | nr_taken += hpage_nr_pages(page); | 1046 | nr_taken += nr_pages; |
1083 | break; | 1047 | break; |
1084 | 1048 | ||
1085 | case -EBUSY: | 1049 | case -EBUSY: |
@@ -1093,11 +1057,8 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan, | |||
1093 | } | 1057 | } |
1094 | 1058 | ||
1095 | *nr_scanned = scan; | 1059 | *nr_scanned = scan; |
1096 | 1060 | trace_mm_vmscan_lru_isolate(sc->order, nr_to_scan, scan, | |
1097 | trace_mm_vmscan_lru_isolate(sc->order, | 1061 | nr_taken, mode, is_file_lru(lru)); |
1098 | nr_to_scan, scan, | ||
1099 | nr_taken, | ||
1100 | mode, file); | ||
1101 | return nr_taken; | 1062 | return nr_taken; |
1102 | } | 1063 | } |
1103 | 1064 | ||
@@ -1134,15 +1095,16 @@ int isolate_lru_page(struct page *page) | |||
1134 | 1095 | ||
1135 | if (PageLRU(page)) { | 1096 | if (PageLRU(page)) { |
1136 | struct zone *zone = page_zone(page); | 1097 | struct zone *zone = page_zone(page); |
1098 | struct lruvec *lruvec; | ||
1137 | 1099 | ||
1138 | spin_lock_irq(&zone->lru_lock); | 1100 | spin_lock_irq(&zone->lru_lock); |
1101 | lruvec = mem_cgroup_page_lruvec(page, zone); | ||
1139 | if (PageLRU(page)) { | 1102 | if (PageLRU(page)) { |
1140 | int lru = page_lru(page); | 1103 | int lru = page_lru(page); |
1141 | ret = 0; | ||
1142 | get_page(page); | 1104 | get_page(page); |
1143 | ClearPageLRU(page); | 1105 | ClearPageLRU(page); |
1144 | 1106 | del_page_from_lru_list(page, lruvec, lru); | |
1145 | del_page_from_lru_list(zone, page, lru); | 1107 | ret = 0; |
1146 | } | 1108 | } |
1147 | spin_unlock_irq(&zone->lru_lock); | 1109 | spin_unlock_irq(&zone->lru_lock); |
1148 | } | 1110 | } |
@@ -1175,11 +1137,10 @@ static int too_many_isolated(struct zone *zone, int file, | |||
1175 | } | 1137 | } |
1176 | 1138 | ||
1177 | static noinline_for_stack void | 1139 | static noinline_for_stack void |
1178 | putback_inactive_pages(struct mem_cgroup_zone *mz, | 1140 | putback_inactive_pages(struct lruvec *lruvec, struct list_head *page_list) |
1179 | struct list_head *page_list) | ||
1180 | { | 1141 | { |
1181 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz); | 1142 | struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; |
1182 | struct zone *zone = mz->zone; | 1143 | struct zone *zone = lruvec_zone(lruvec); |
1183 | LIST_HEAD(pages_to_free); | 1144 | LIST_HEAD(pages_to_free); |
1184 | 1145 | ||
1185 | /* | 1146 | /* |
@@ -1197,9 +1158,13 @@ putback_inactive_pages(struct mem_cgroup_zone *mz, | |||
1197 | spin_lock_irq(&zone->lru_lock); | 1158 | spin_lock_irq(&zone->lru_lock); |
1198 | continue; | 1159 | continue; |
1199 | } | 1160 | } |
1161 | |||
1162 | lruvec = mem_cgroup_page_lruvec(page, zone); | ||
1163 | |||
1200 | SetPageLRU(page); | 1164 | SetPageLRU(page); |
1201 | lru = page_lru(page); | 1165 | lru = page_lru(page); |
1202 | add_page_to_lru_list(zone, page, lru); | 1166 | add_page_to_lru_list(page, lruvec, lru); |
1167 | |||
1203 | if (is_active_lru(lru)) { | 1168 | if (is_active_lru(lru)) { |
1204 | int file = is_file_lru(lru); | 1169 | int file = is_file_lru(lru); |
1205 | int numpages = hpage_nr_pages(page); | 1170 | int numpages = hpage_nr_pages(page); |
@@ -1208,7 +1173,7 @@ putback_inactive_pages(struct mem_cgroup_zone *mz, | |||
1208 | if (put_page_testzero(page)) { | 1173 | if (put_page_testzero(page)) { |
1209 | __ClearPageLRU(page); | 1174 | __ClearPageLRU(page); |
1210 | __ClearPageActive(page); | 1175 | __ClearPageActive(page); |
1211 | del_page_from_lru_list(zone, page, lru); | 1176 | del_page_from_lru_list(page, lruvec, lru); |
1212 | 1177 | ||
1213 | if (unlikely(PageCompound(page))) { | 1178 | if (unlikely(PageCompound(page))) { |
1214 | spin_unlock_irq(&zone->lru_lock); | 1179 | spin_unlock_irq(&zone->lru_lock); |
@@ -1225,71 +1190,24 @@ putback_inactive_pages(struct mem_cgroup_zone *mz, | |||
1225 | list_splice(&pages_to_free, page_list); | 1190 | list_splice(&pages_to_free, page_list); |
1226 | } | 1191 | } |
1227 | 1192 | ||
1228 | static noinline_for_stack void | ||
1229 | update_isolated_counts(struct mem_cgroup_zone *mz, | ||
1230 | struct list_head *page_list, | ||
1231 | unsigned long *nr_anon, | ||
1232 | unsigned long *nr_file) | ||
1233 | { | ||
1234 | struct zone *zone = mz->zone; | ||
1235 | unsigned int count[NR_LRU_LISTS] = { 0, }; | ||
1236 | unsigned long nr_active = 0; | ||
1237 | struct page *page; | ||
1238 | int lru; | ||
1239 | |||
1240 | /* | ||
1241 | * Count pages and clear active flags | ||
1242 | */ | ||
1243 | list_for_each_entry(page, page_list, lru) { | ||
1244 | int numpages = hpage_nr_pages(page); | ||
1245 | lru = page_lru_base_type(page); | ||
1246 | if (PageActive(page)) { | ||
1247 | lru += LRU_ACTIVE; | ||
1248 | ClearPageActive(page); | ||
1249 | nr_active += numpages; | ||
1250 | } | ||
1251 | count[lru] += numpages; | ||
1252 | } | ||
1253 | |||
1254 | preempt_disable(); | ||
1255 | __count_vm_events(PGDEACTIVATE, nr_active); | ||
1256 | |||
1257 | __mod_zone_page_state(zone, NR_ACTIVE_FILE, | ||
1258 | -count[LRU_ACTIVE_FILE]); | ||
1259 | __mod_zone_page_state(zone, NR_INACTIVE_FILE, | ||
1260 | -count[LRU_INACTIVE_FILE]); | ||
1261 | __mod_zone_page_state(zone, NR_ACTIVE_ANON, | ||
1262 | -count[LRU_ACTIVE_ANON]); | ||
1263 | __mod_zone_page_state(zone, NR_INACTIVE_ANON, | ||
1264 | -count[LRU_INACTIVE_ANON]); | ||
1265 | |||
1266 | *nr_anon = count[LRU_ACTIVE_ANON] + count[LRU_INACTIVE_ANON]; | ||
1267 | *nr_file = count[LRU_ACTIVE_FILE] + count[LRU_INACTIVE_FILE]; | ||
1268 | |||
1269 | __mod_zone_page_state(zone, NR_ISOLATED_ANON, *nr_anon); | ||
1270 | __mod_zone_page_state(zone, NR_ISOLATED_FILE, *nr_file); | ||
1271 | preempt_enable(); | ||
1272 | } | ||
1273 | |||
1274 | /* | 1193 | /* |
1275 | * shrink_inactive_list() is a helper for shrink_zone(). It returns the number | 1194 | * shrink_inactive_list() is a helper for shrink_zone(). It returns the number |
1276 | * of reclaimed pages | 1195 | * of reclaimed pages |
1277 | */ | 1196 | */ |
1278 | static noinline_for_stack unsigned long | 1197 | static noinline_for_stack unsigned long |
1279 | shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, | 1198 | shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, |
1280 | struct scan_control *sc, int priority, int file) | 1199 | struct scan_control *sc, enum lru_list lru) |
1281 | { | 1200 | { |
1282 | LIST_HEAD(page_list); | 1201 | LIST_HEAD(page_list); |
1283 | unsigned long nr_scanned; | 1202 | unsigned long nr_scanned; |
1284 | unsigned long nr_reclaimed = 0; | 1203 | unsigned long nr_reclaimed = 0; |
1285 | unsigned long nr_taken; | 1204 | unsigned long nr_taken; |
1286 | unsigned long nr_anon; | ||
1287 | unsigned long nr_file; | ||
1288 | unsigned long nr_dirty = 0; | 1205 | unsigned long nr_dirty = 0; |
1289 | unsigned long nr_writeback = 0; | 1206 | unsigned long nr_writeback = 0; |
1290 | isolate_mode_t isolate_mode = ISOLATE_INACTIVE; | 1207 | isolate_mode_t isolate_mode = 0; |
1291 | struct zone *zone = mz->zone; | 1208 | int file = is_file_lru(lru); |
1292 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz); | 1209 | struct zone *zone = lruvec_zone(lruvec); |
1210 | struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; | ||
1293 | 1211 | ||
1294 | while (unlikely(too_many_isolated(zone, file, sc))) { | 1212 | while (unlikely(too_many_isolated(zone, file, sc))) { |
1295 | congestion_wait(BLK_RW_ASYNC, HZ/10); | 1213 | congestion_wait(BLK_RW_ASYNC, HZ/10); |
@@ -1308,31 +1226,30 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, | |||
1308 | 1226 | ||
1309 | spin_lock_irq(&zone->lru_lock); | 1227 | spin_lock_irq(&zone->lru_lock); |
1310 | 1228 | ||
1311 | nr_taken = isolate_lru_pages(nr_to_scan, mz, &page_list, &nr_scanned, | 1229 | nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &page_list, |
1312 | sc, isolate_mode, 0, file); | 1230 | &nr_scanned, sc, isolate_mode, lru); |
1231 | |||
1232 | __mod_zone_page_state(zone, NR_LRU_BASE + lru, -nr_taken); | ||
1233 | __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken); | ||
1234 | |||
1313 | if (global_reclaim(sc)) { | 1235 | if (global_reclaim(sc)) { |
1314 | zone->pages_scanned += nr_scanned; | 1236 | zone->pages_scanned += nr_scanned; |
1315 | if (current_is_kswapd()) | 1237 | if (current_is_kswapd()) |
1316 | __count_zone_vm_events(PGSCAN_KSWAPD, zone, | 1238 | __count_zone_vm_events(PGSCAN_KSWAPD, zone, nr_scanned); |
1317 | nr_scanned); | ||
1318 | else | 1239 | else |
1319 | __count_zone_vm_events(PGSCAN_DIRECT, zone, | 1240 | __count_zone_vm_events(PGSCAN_DIRECT, zone, nr_scanned); |
1320 | nr_scanned); | ||
1321 | } | 1241 | } |
1322 | spin_unlock_irq(&zone->lru_lock); | 1242 | spin_unlock_irq(&zone->lru_lock); |
1323 | 1243 | ||
1324 | if (nr_taken == 0) | 1244 | if (nr_taken == 0) |
1325 | return 0; | 1245 | return 0; |
1326 | 1246 | ||
1327 | update_isolated_counts(mz, &page_list, &nr_anon, &nr_file); | 1247 | nr_reclaimed = shrink_page_list(&page_list, zone, sc, |
1328 | |||
1329 | nr_reclaimed = shrink_page_list(&page_list, mz, sc, priority, | ||
1330 | &nr_dirty, &nr_writeback); | 1248 | &nr_dirty, &nr_writeback); |
1331 | 1249 | ||
1332 | spin_lock_irq(&zone->lru_lock); | 1250 | spin_lock_irq(&zone->lru_lock); |
1333 | 1251 | ||
1334 | reclaim_stat->recent_scanned[0] += nr_anon; | 1252 | reclaim_stat->recent_scanned[file] += nr_taken; |
1335 | reclaim_stat->recent_scanned[1] += nr_file; | ||
1336 | 1253 | ||
1337 | if (global_reclaim(sc)) { | 1254 | if (global_reclaim(sc)) { |
1338 | if (current_is_kswapd()) | 1255 | if (current_is_kswapd()) |
@@ -1343,10 +1260,9 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, | |||
1343 | nr_reclaimed); | 1260 | nr_reclaimed); |
1344 | } | 1261 | } |
1345 | 1262 | ||
1346 | putback_inactive_pages(mz, &page_list); | 1263 | putback_inactive_pages(lruvec, &page_list); |
1347 | 1264 | ||
1348 | __mod_zone_page_state(zone, NR_ISOLATED_ANON, -nr_anon); | 1265 | __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken); |
1349 | __mod_zone_page_state(zone, NR_ISOLATED_FILE, -nr_file); | ||
1350 | 1266 | ||
1351 | spin_unlock_irq(&zone->lru_lock); | 1267 | spin_unlock_irq(&zone->lru_lock); |
1352 | 1268 | ||
@@ -1375,13 +1291,14 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, | |||
1375 | * DEF_PRIORITY-6 For SWAP_CLUSTER_MAX isolated pages, throttle if any | 1291 | * DEF_PRIORITY-6 For SWAP_CLUSTER_MAX isolated pages, throttle if any |
1376 | * isolated page is PageWriteback | 1292 | * isolated page is PageWriteback |
1377 | */ | 1293 | */ |
1378 | if (nr_writeback && nr_writeback >= (nr_taken >> (DEF_PRIORITY-priority))) | 1294 | if (nr_writeback && nr_writeback >= |
1295 | (nr_taken >> (DEF_PRIORITY - sc->priority))) | ||
1379 | wait_iff_congested(zone, BLK_RW_ASYNC, HZ/10); | 1296 | wait_iff_congested(zone, BLK_RW_ASYNC, HZ/10); |
1380 | 1297 | ||
1381 | trace_mm_vmscan_lru_shrink_inactive(zone->zone_pgdat->node_id, | 1298 | trace_mm_vmscan_lru_shrink_inactive(zone->zone_pgdat->node_id, |
1382 | zone_idx(zone), | 1299 | zone_idx(zone), |
1383 | nr_scanned, nr_reclaimed, | 1300 | nr_scanned, nr_reclaimed, |
1384 | priority, | 1301 | sc->priority, |
1385 | trace_shrink_flags(file)); | 1302 | trace_shrink_flags(file)); |
1386 | return nr_reclaimed; | 1303 | return nr_reclaimed; |
1387 | } | 1304 | } |
@@ -1404,30 +1321,32 @@ shrink_inactive_list(unsigned long nr_to_scan, struct mem_cgroup_zone *mz, | |||
1404 | * But we had to alter page->flags anyway. | 1321 | * But we had to alter page->flags anyway. |
1405 | */ | 1322 | */ |
1406 | 1323 | ||
1407 | static void move_active_pages_to_lru(struct zone *zone, | 1324 | static void move_active_pages_to_lru(struct lruvec *lruvec, |
1408 | struct list_head *list, | 1325 | struct list_head *list, |
1409 | struct list_head *pages_to_free, | 1326 | struct list_head *pages_to_free, |
1410 | enum lru_list lru) | 1327 | enum lru_list lru) |
1411 | { | 1328 | { |
1329 | struct zone *zone = lruvec_zone(lruvec); | ||
1412 | unsigned long pgmoved = 0; | 1330 | unsigned long pgmoved = 0; |
1413 | struct page *page; | 1331 | struct page *page; |
1332 | int nr_pages; | ||
1414 | 1333 | ||
1415 | while (!list_empty(list)) { | 1334 | while (!list_empty(list)) { |
1416 | struct lruvec *lruvec; | ||
1417 | |||
1418 | page = lru_to_page(list); | 1335 | page = lru_to_page(list); |
1336 | lruvec = mem_cgroup_page_lruvec(page, zone); | ||
1419 | 1337 | ||
1420 | VM_BUG_ON(PageLRU(page)); | 1338 | VM_BUG_ON(PageLRU(page)); |
1421 | SetPageLRU(page); | 1339 | SetPageLRU(page); |
1422 | 1340 | ||
1423 | lruvec = mem_cgroup_lru_add_list(zone, page, lru); | 1341 | nr_pages = hpage_nr_pages(page); |
1342 | mem_cgroup_update_lru_size(lruvec, lru, nr_pages); | ||
1424 | list_move(&page->lru, &lruvec->lists[lru]); | 1343 | list_move(&page->lru, &lruvec->lists[lru]); |
1425 | pgmoved += hpage_nr_pages(page); | 1344 | pgmoved += nr_pages; |
1426 | 1345 | ||
1427 | if (put_page_testzero(page)) { | 1346 | if (put_page_testzero(page)) { |
1428 | __ClearPageLRU(page); | 1347 | __ClearPageLRU(page); |
1429 | __ClearPageActive(page); | 1348 | __ClearPageActive(page); |
1430 | del_page_from_lru_list(zone, page, lru); | 1349 | del_page_from_lru_list(page, lruvec, lru); |
1431 | 1350 | ||
1432 | if (unlikely(PageCompound(page))) { | 1351 | if (unlikely(PageCompound(page))) { |
1433 | spin_unlock_irq(&zone->lru_lock); | 1352 | spin_unlock_irq(&zone->lru_lock); |
@@ -1443,9 +1362,9 @@ static void move_active_pages_to_lru(struct zone *zone, | |||
1443 | } | 1362 | } |
1444 | 1363 | ||
1445 | static void shrink_active_list(unsigned long nr_to_scan, | 1364 | static void shrink_active_list(unsigned long nr_to_scan, |
1446 | struct mem_cgroup_zone *mz, | 1365 | struct lruvec *lruvec, |
1447 | struct scan_control *sc, | 1366 | struct scan_control *sc, |
1448 | int priority, int file) | 1367 | enum lru_list lru) |
1449 | { | 1368 | { |
1450 | unsigned long nr_taken; | 1369 | unsigned long nr_taken; |
1451 | unsigned long nr_scanned; | 1370 | unsigned long nr_scanned; |
@@ -1454,10 +1373,11 @@ static void shrink_active_list(unsigned long nr_to_scan, | |||
1454 | LIST_HEAD(l_active); | 1373 | LIST_HEAD(l_active); |
1455 | LIST_HEAD(l_inactive); | 1374 | LIST_HEAD(l_inactive); |
1456 | struct page *page; | 1375 | struct page *page; |
1457 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz); | 1376 | struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; |
1458 | unsigned long nr_rotated = 0; | 1377 | unsigned long nr_rotated = 0; |
1459 | isolate_mode_t isolate_mode = ISOLATE_ACTIVE; | 1378 | isolate_mode_t isolate_mode = 0; |
1460 | struct zone *zone = mz->zone; | 1379 | int file = is_file_lru(lru); |
1380 | struct zone *zone = lruvec_zone(lruvec); | ||
1461 | 1381 | ||
1462 | lru_add_drain(); | 1382 | lru_add_drain(); |
1463 | 1383 | ||
@@ -1468,18 +1388,15 @@ static void shrink_active_list(unsigned long nr_to_scan, | |||
1468 | 1388 | ||
1469 | spin_lock_irq(&zone->lru_lock); | 1389 | spin_lock_irq(&zone->lru_lock); |
1470 | 1390 | ||
1471 | nr_taken = isolate_lru_pages(nr_to_scan, mz, &l_hold, &nr_scanned, sc, | 1391 | nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &l_hold, |
1472 | isolate_mode, 1, file); | 1392 | &nr_scanned, sc, isolate_mode, lru); |
1473 | if (global_reclaim(sc)) | 1393 | if (global_reclaim(sc)) |
1474 | zone->pages_scanned += nr_scanned; | 1394 | zone->pages_scanned += nr_scanned; |
1475 | 1395 | ||
1476 | reclaim_stat->recent_scanned[file] += nr_taken; | 1396 | reclaim_stat->recent_scanned[file] += nr_taken; |
1477 | 1397 | ||
1478 | __count_zone_vm_events(PGREFILL, zone, nr_scanned); | 1398 | __count_zone_vm_events(PGREFILL, zone, nr_scanned); |
1479 | if (file) | 1399 | __mod_zone_page_state(zone, NR_LRU_BASE + lru, -nr_taken); |
1480 | __mod_zone_page_state(zone, NR_ACTIVE_FILE, -nr_taken); | ||
1481 | else | ||
1482 | __mod_zone_page_state(zone, NR_ACTIVE_ANON, -nr_taken); | ||
1483 | __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken); | 1400 | __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken); |
1484 | spin_unlock_irq(&zone->lru_lock); | 1401 | spin_unlock_irq(&zone->lru_lock); |
1485 | 1402 | ||
@@ -1535,10 +1452,8 @@ static void shrink_active_list(unsigned long nr_to_scan, | |||
1535 | */ | 1452 | */ |
1536 | reclaim_stat->recent_rotated[file] += nr_rotated; | 1453 | reclaim_stat->recent_rotated[file] += nr_rotated; |
1537 | 1454 | ||
1538 | move_active_pages_to_lru(zone, &l_active, &l_hold, | 1455 | move_active_pages_to_lru(lruvec, &l_active, &l_hold, lru); |
1539 | LRU_ACTIVE + file * LRU_FILE); | 1456 | move_active_pages_to_lru(lruvec, &l_inactive, &l_hold, lru - LRU_ACTIVE); |
1540 | move_active_pages_to_lru(zone, &l_inactive, &l_hold, | ||
1541 | LRU_BASE + file * LRU_FILE); | ||
1542 | __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken); | 1457 | __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken); |
1543 | spin_unlock_irq(&zone->lru_lock); | 1458 | spin_unlock_irq(&zone->lru_lock); |
1544 | 1459 | ||
@@ -1561,13 +1476,12 @@ static int inactive_anon_is_low_global(struct zone *zone) | |||
1561 | 1476 | ||
1562 | /** | 1477 | /** |
1563 | * inactive_anon_is_low - check if anonymous pages need to be deactivated | 1478 | * inactive_anon_is_low - check if anonymous pages need to be deactivated |
1564 | * @zone: zone to check | 1479 | * @lruvec: LRU vector to check |
1565 | * @sc: scan control of this context | ||
1566 | * | 1480 | * |
1567 | * Returns true if the zone does not have enough inactive anon pages, | 1481 | * Returns true if the zone does not have enough inactive anon pages, |
1568 | * meaning some active anon pages need to be deactivated. | 1482 | * meaning some active anon pages need to be deactivated. |
1569 | */ | 1483 | */ |
1570 | static int inactive_anon_is_low(struct mem_cgroup_zone *mz) | 1484 | static int inactive_anon_is_low(struct lruvec *lruvec) |
1571 | { | 1485 | { |
1572 | /* | 1486 | /* |
1573 | * If we don't have swap space, anonymous page deactivation | 1487 | * If we don't have swap space, anonymous page deactivation |
@@ -1577,13 +1491,12 @@ static int inactive_anon_is_low(struct mem_cgroup_zone *mz) | |||
1577 | return 0; | 1491 | return 0; |
1578 | 1492 | ||
1579 | if (!mem_cgroup_disabled()) | 1493 | if (!mem_cgroup_disabled()) |
1580 | return mem_cgroup_inactive_anon_is_low(mz->mem_cgroup, | 1494 | return mem_cgroup_inactive_anon_is_low(lruvec); |
1581 | mz->zone); | ||
1582 | 1495 | ||
1583 | return inactive_anon_is_low_global(mz->zone); | 1496 | return inactive_anon_is_low_global(lruvec_zone(lruvec)); |
1584 | } | 1497 | } |
1585 | #else | 1498 | #else |
1586 | static inline int inactive_anon_is_low(struct mem_cgroup_zone *mz) | 1499 | static inline int inactive_anon_is_low(struct lruvec *lruvec) |
1587 | { | 1500 | { |
1588 | return 0; | 1501 | return 0; |
1589 | } | 1502 | } |
@@ -1601,7 +1514,7 @@ static int inactive_file_is_low_global(struct zone *zone) | |||
1601 | 1514 | ||
1602 | /** | 1515 | /** |
1603 | * inactive_file_is_low - check if file pages need to be deactivated | 1516 | * inactive_file_is_low - check if file pages need to be deactivated |
1604 | * @mz: memory cgroup and zone to check | 1517 | * @lruvec: LRU vector to check |
1605 | * | 1518 | * |
1606 | * When the system is doing streaming IO, memory pressure here | 1519 | * When the system is doing streaming IO, memory pressure here |
1607 | * ensures that active file pages get deactivated, until more | 1520 | * ensures that active file pages get deactivated, until more |
@@ -1613,44 +1526,39 @@ static int inactive_file_is_low_global(struct zone *zone) | |||
1613 | * This uses a different ratio than the anonymous pages, because | 1526 | * This uses a different ratio than the anonymous pages, because |
1614 | * the page cache uses a use-once replacement algorithm. | 1527 | * the page cache uses a use-once replacement algorithm. |
1615 | */ | 1528 | */ |
1616 | static int inactive_file_is_low(struct mem_cgroup_zone *mz) | 1529 | static int inactive_file_is_low(struct lruvec *lruvec) |
1617 | { | 1530 | { |
1618 | if (!mem_cgroup_disabled()) | 1531 | if (!mem_cgroup_disabled()) |
1619 | return mem_cgroup_inactive_file_is_low(mz->mem_cgroup, | 1532 | return mem_cgroup_inactive_file_is_low(lruvec); |
1620 | mz->zone); | ||
1621 | 1533 | ||
1622 | return inactive_file_is_low_global(mz->zone); | 1534 | return inactive_file_is_low_global(lruvec_zone(lruvec)); |
1623 | } | 1535 | } |
1624 | 1536 | ||
1625 | static int inactive_list_is_low(struct mem_cgroup_zone *mz, int file) | 1537 | static int inactive_list_is_low(struct lruvec *lruvec, enum lru_list lru) |
1626 | { | 1538 | { |
1627 | if (file) | 1539 | if (is_file_lru(lru)) |
1628 | return inactive_file_is_low(mz); | 1540 | return inactive_file_is_low(lruvec); |
1629 | else | 1541 | else |
1630 | return inactive_anon_is_low(mz); | 1542 | return inactive_anon_is_low(lruvec); |
1631 | } | 1543 | } |
1632 | 1544 | ||
1633 | static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, | 1545 | static unsigned long shrink_list(enum lru_list lru, unsigned long nr_to_scan, |
1634 | struct mem_cgroup_zone *mz, | 1546 | struct lruvec *lruvec, struct scan_control *sc) |
1635 | struct scan_control *sc, int priority) | ||
1636 | { | 1547 | { |
1637 | int file = is_file_lru(lru); | ||
1638 | |||
1639 | if (is_active_lru(lru)) { | 1548 | if (is_active_lru(lru)) { |
1640 | if (inactive_list_is_low(mz, file)) | 1549 | if (inactive_list_is_low(lruvec, lru)) |
1641 | shrink_active_list(nr_to_scan, mz, sc, priority, file); | 1550 | shrink_active_list(nr_to_scan, lruvec, sc, lru); |
1642 | return 0; | 1551 | return 0; |
1643 | } | 1552 | } |
1644 | 1553 | ||
1645 | return shrink_inactive_list(nr_to_scan, mz, sc, priority, file); | 1554 | return shrink_inactive_list(nr_to_scan, lruvec, sc, lru); |
1646 | } | 1555 | } |
1647 | 1556 | ||
1648 | static int vmscan_swappiness(struct mem_cgroup_zone *mz, | 1557 | static int vmscan_swappiness(struct scan_control *sc) |
1649 | struct scan_control *sc) | ||
1650 | { | 1558 | { |
1651 | if (global_reclaim(sc)) | 1559 | if (global_reclaim(sc)) |
1652 | return vm_swappiness; | 1560 | return vm_swappiness; |
1653 | return mem_cgroup_swappiness(mz->mem_cgroup); | 1561 | return mem_cgroup_swappiness(sc->target_mem_cgroup); |
1654 | } | 1562 | } |
1655 | 1563 | ||
1656 | /* | 1564 | /* |
@@ -1662,17 +1570,18 @@ static int vmscan_swappiness(struct mem_cgroup_zone *mz, | |||
1662 | * nr[0] = anon inactive pages to scan; nr[1] = anon active pages to scan | 1570 | * nr[0] = anon inactive pages to scan; nr[1] = anon active pages to scan |
1663 | * nr[2] = file inactive pages to scan; nr[3] = file active pages to scan | 1571 | * nr[2] = file inactive pages to scan; nr[3] = file active pages to scan |
1664 | */ | 1572 | */ |
1665 | static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc, | 1573 | static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc, |
1666 | unsigned long *nr, int priority) | 1574 | unsigned long *nr) |
1667 | { | 1575 | { |
1668 | unsigned long anon, file, free; | 1576 | unsigned long anon, file, free; |
1669 | unsigned long anon_prio, file_prio; | 1577 | unsigned long anon_prio, file_prio; |
1670 | unsigned long ap, fp; | 1578 | unsigned long ap, fp; |
1671 | struct zone_reclaim_stat *reclaim_stat = get_reclaim_stat(mz); | 1579 | struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat; |
1672 | u64 fraction[2], denominator; | 1580 | u64 fraction[2], denominator; |
1673 | enum lru_list lru; | 1581 | enum lru_list lru; |
1674 | int noswap = 0; | 1582 | int noswap = 0; |
1675 | bool force_scan = false; | 1583 | bool force_scan = false; |
1584 | struct zone *zone = lruvec_zone(lruvec); | ||
1676 | 1585 | ||
1677 | /* | 1586 | /* |
1678 | * If the zone or memcg is small, nr[l] can be 0. This | 1587 | * If the zone or memcg is small, nr[l] can be 0. This |
@@ -1684,7 +1593,7 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc, | |||
1684 | * latencies, so it's better to scan a minimum amount there as | 1593 | * latencies, so it's better to scan a minimum amount there as |
1685 | * well. | 1594 | * well. |
1686 | */ | 1595 | */ |
1687 | if (current_is_kswapd() && mz->zone->all_unreclaimable) | 1596 | if (current_is_kswapd() && zone->all_unreclaimable) |
1688 | force_scan = true; | 1597 | force_scan = true; |
1689 | if (!global_reclaim(sc)) | 1598 | if (!global_reclaim(sc)) |
1690 | force_scan = true; | 1599 | force_scan = true; |
@@ -1698,16 +1607,16 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc, | |||
1698 | goto out; | 1607 | goto out; |
1699 | } | 1608 | } |
1700 | 1609 | ||
1701 | anon = zone_nr_lru_pages(mz, LRU_ACTIVE_ANON) + | 1610 | anon = get_lru_size(lruvec, LRU_ACTIVE_ANON) + |
1702 | zone_nr_lru_pages(mz, LRU_INACTIVE_ANON); | 1611 | get_lru_size(lruvec, LRU_INACTIVE_ANON); |
1703 | file = zone_nr_lru_pages(mz, LRU_ACTIVE_FILE) + | 1612 | file = get_lru_size(lruvec, LRU_ACTIVE_FILE) + |
1704 | zone_nr_lru_pages(mz, LRU_INACTIVE_FILE); | 1613 | get_lru_size(lruvec, LRU_INACTIVE_FILE); |
1705 | 1614 | ||
1706 | if (global_reclaim(sc)) { | 1615 | if (global_reclaim(sc)) { |
1707 | free = zone_page_state(mz->zone, NR_FREE_PAGES); | 1616 | free = zone_page_state(zone, NR_FREE_PAGES); |
1708 | /* If we have very few page cache pages, | 1617 | /* If we have very few page cache pages, |
1709 | force-scan anon pages. */ | 1618 | force-scan anon pages. */ |
1710 | if (unlikely(file + free <= high_wmark_pages(mz->zone))) { | 1619 | if (unlikely(file + free <= high_wmark_pages(zone))) { |
1711 | fraction[0] = 1; | 1620 | fraction[0] = 1; |
1712 | fraction[1] = 0; | 1621 | fraction[1] = 0; |
1713 | denominator = 1; | 1622 | denominator = 1; |
@@ -1719,8 +1628,8 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc, | |||
1719 | * With swappiness at 100, anonymous and file have the same priority. | 1628 | * With swappiness at 100, anonymous and file have the same priority. |
1720 | * This scanning priority is essentially the inverse of IO cost. | 1629 | * This scanning priority is essentially the inverse of IO cost. |
1721 | */ | 1630 | */ |
1722 | anon_prio = vmscan_swappiness(mz, sc); | 1631 | anon_prio = vmscan_swappiness(sc); |
1723 | file_prio = 200 - vmscan_swappiness(mz, sc); | 1632 | file_prio = 200 - anon_prio; |
1724 | 1633 | ||
1725 | /* | 1634 | /* |
1726 | * OK, so we have swap space and a fair amount of page cache | 1635 | * OK, so we have swap space and a fair amount of page cache |
@@ -1733,7 +1642,7 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc, | |||
1733 | * | 1642 | * |
1734 | * anon in [0], file in [1] | 1643 | * anon in [0], file in [1] |
1735 | */ | 1644 | */ |
1736 | spin_lock_irq(&mz->zone->lru_lock); | 1645 | spin_lock_irq(&zone->lru_lock); |
1737 | if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) { | 1646 | if (unlikely(reclaim_stat->recent_scanned[0] > anon / 4)) { |
1738 | reclaim_stat->recent_scanned[0] /= 2; | 1647 | reclaim_stat->recent_scanned[0] /= 2; |
1739 | reclaim_stat->recent_rotated[0] /= 2; | 1648 | reclaim_stat->recent_rotated[0] /= 2; |
@@ -1754,7 +1663,7 @@ static void get_scan_count(struct mem_cgroup_zone *mz, struct scan_control *sc, | |||
1754 | 1663 | ||
1755 | fp = file_prio * (reclaim_stat->recent_scanned[1] + 1); | 1664 | fp = file_prio * (reclaim_stat->recent_scanned[1] + 1); |
1756 | fp /= reclaim_stat->recent_rotated[1] + 1; | 1665 | fp /= reclaim_stat->recent_rotated[1] + 1; |
1757 | spin_unlock_irq(&mz->zone->lru_lock); | 1666 | spin_unlock_irq(&zone->lru_lock); |
1758 | 1667 | ||
1759 | fraction[0] = ap; | 1668 | fraction[0] = ap; |
1760 | fraction[1] = fp; | 1669 | fraction[1] = fp; |
@@ -1764,9 +1673,9 @@ out: | |||
1764 | int file = is_file_lru(lru); | 1673 | int file = is_file_lru(lru); |
1765 | unsigned long scan; | 1674 | unsigned long scan; |
1766 | 1675 | ||
1767 | scan = zone_nr_lru_pages(mz, lru); | 1676 | scan = get_lru_size(lruvec, lru); |
1768 | if (priority || noswap || !vmscan_swappiness(mz, sc)) { | 1677 | if (sc->priority || noswap || !vmscan_swappiness(sc)) { |
1769 | scan >>= priority; | 1678 | scan >>= sc->priority; |
1770 | if (!scan && force_scan) | 1679 | if (!scan && force_scan) |
1771 | scan = SWAP_CLUSTER_MAX; | 1680 | scan = SWAP_CLUSTER_MAX; |
1772 | scan = div64_u64(scan * fraction[file], denominator); | 1681 | scan = div64_u64(scan * fraction[file], denominator); |
@@ -1776,11 +1685,11 @@ out: | |||
1776 | } | 1685 | } |
1777 | 1686 | ||
1778 | /* Use reclaim/compaction for costly allocs or under memory pressure */ | 1687 | /* Use reclaim/compaction for costly allocs or under memory pressure */ |
1779 | static bool in_reclaim_compaction(int priority, struct scan_control *sc) | 1688 | static bool in_reclaim_compaction(struct scan_control *sc) |
1780 | { | 1689 | { |
1781 | if (COMPACTION_BUILD && sc->order && | 1690 | if (COMPACTION_BUILD && sc->order && |
1782 | (sc->order > PAGE_ALLOC_COSTLY_ORDER || | 1691 | (sc->order > PAGE_ALLOC_COSTLY_ORDER || |
1783 | priority < DEF_PRIORITY - 2)) | 1692 | sc->priority < DEF_PRIORITY - 2)) |
1784 | return true; | 1693 | return true; |
1785 | 1694 | ||
1786 | return false; | 1695 | return false; |
@@ -1793,17 +1702,16 @@ static bool in_reclaim_compaction(int priority, struct scan_control *sc) | |||
1793 | * calls try_to_compact_zone() that it will have enough free pages to succeed. | 1702 | * calls try_to_compact_zone() that it will have enough free pages to succeed. |
1794 | * It will give up earlier than that if there is difficulty reclaiming pages. | 1703 | * It will give up earlier than that if there is difficulty reclaiming pages. |
1795 | */ | 1704 | */ |
1796 | static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz, | 1705 | static inline bool should_continue_reclaim(struct lruvec *lruvec, |
1797 | unsigned long nr_reclaimed, | 1706 | unsigned long nr_reclaimed, |
1798 | unsigned long nr_scanned, | 1707 | unsigned long nr_scanned, |
1799 | int priority, | ||
1800 | struct scan_control *sc) | 1708 | struct scan_control *sc) |
1801 | { | 1709 | { |
1802 | unsigned long pages_for_compaction; | 1710 | unsigned long pages_for_compaction; |
1803 | unsigned long inactive_lru_pages; | 1711 | unsigned long inactive_lru_pages; |
1804 | 1712 | ||
1805 | /* If not in reclaim/compaction mode, stop */ | 1713 | /* If not in reclaim/compaction mode, stop */ |
1806 | if (!in_reclaim_compaction(priority, sc)) | 1714 | if (!in_reclaim_compaction(sc)) |
1807 | return false; | 1715 | return false; |
1808 | 1716 | ||
1809 | /* Consider stopping depending on scan and reclaim activity */ | 1717 | /* Consider stopping depending on scan and reclaim activity */ |
@@ -1834,15 +1742,15 @@ static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz, | |||
1834 | * inactive lists are large enough, continue reclaiming | 1742 | * inactive lists are large enough, continue reclaiming |
1835 | */ | 1743 | */ |
1836 | pages_for_compaction = (2UL << sc->order); | 1744 | pages_for_compaction = (2UL << sc->order); |
1837 | inactive_lru_pages = zone_nr_lru_pages(mz, LRU_INACTIVE_FILE); | 1745 | inactive_lru_pages = get_lru_size(lruvec, LRU_INACTIVE_FILE); |
1838 | if (nr_swap_pages > 0) | 1746 | if (nr_swap_pages > 0) |
1839 | inactive_lru_pages += zone_nr_lru_pages(mz, LRU_INACTIVE_ANON); | 1747 | inactive_lru_pages += get_lru_size(lruvec, LRU_INACTIVE_ANON); |
1840 | if (sc->nr_reclaimed < pages_for_compaction && | 1748 | if (sc->nr_reclaimed < pages_for_compaction && |
1841 | inactive_lru_pages > pages_for_compaction) | 1749 | inactive_lru_pages > pages_for_compaction) |
1842 | return true; | 1750 | return true; |
1843 | 1751 | ||
1844 | /* If compaction would go ahead or the allocation would succeed, stop */ | 1752 | /* If compaction would go ahead or the allocation would succeed, stop */ |
1845 | switch (compaction_suitable(mz->zone, sc->order)) { | 1753 | switch (compaction_suitable(lruvec_zone(lruvec), sc->order)) { |
1846 | case COMPACT_PARTIAL: | 1754 | case COMPACT_PARTIAL: |
1847 | case COMPACT_CONTINUE: | 1755 | case COMPACT_CONTINUE: |
1848 | return false; | 1756 | return false; |
@@ -1854,8 +1762,7 @@ static inline bool should_continue_reclaim(struct mem_cgroup_zone *mz, | |||
1854 | /* | 1762 | /* |
1855 | * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. | 1763 | * This is a basic per-zone page freer. Used by both kswapd and direct reclaim. |
1856 | */ | 1764 | */ |
1857 | static void shrink_mem_cgroup_zone(int priority, struct mem_cgroup_zone *mz, | 1765 | static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) |
1858 | struct scan_control *sc) | ||
1859 | { | 1766 | { |
1860 | unsigned long nr[NR_LRU_LISTS]; | 1767 | unsigned long nr[NR_LRU_LISTS]; |
1861 | unsigned long nr_to_scan; | 1768 | unsigned long nr_to_scan; |
@@ -1867,7 +1774,7 @@ static void shrink_mem_cgroup_zone(int priority, struct mem_cgroup_zone *mz, | |||
1867 | restart: | 1774 | restart: |
1868 | nr_reclaimed = 0; | 1775 | nr_reclaimed = 0; |
1869 | nr_scanned = sc->nr_scanned; | 1776 | nr_scanned = sc->nr_scanned; |
1870 | get_scan_count(mz, sc, nr, priority); | 1777 | get_scan_count(lruvec, sc, nr); |
1871 | 1778 | ||
1872 | blk_start_plug(&plug); | 1779 | blk_start_plug(&plug); |
1873 | while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || | 1780 | while (nr[LRU_INACTIVE_ANON] || nr[LRU_ACTIVE_FILE] || |
@@ -1879,7 +1786,7 @@ restart: | |||
1879 | nr[lru] -= nr_to_scan; | 1786 | nr[lru] -= nr_to_scan; |
1880 | 1787 | ||
1881 | nr_reclaimed += shrink_list(lru, nr_to_scan, | 1788 | nr_reclaimed += shrink_list(lru, nr_to_scan, |
1882 | mz, sc, priority); | 1789 | lruvec, sc); |
1883 | } | 1790 | } |
1884 | } | 1791 | } |
1885 | /* | 1792 | /* |
@@ -1890,7 +1797,8 @@ restart: | |||
1890 | * with multiple processes reclaiming pages, the total | 1797 | * with multiple processes reclaiming pages, the total |
1891 | * freeing target can get unreasonably large. | 1798 | * freeing target can get unreasonably large. |
1892 | */ | 1799 | */ |
1893 | if (nr_reclaimed >= nr_to_reclaim && priority < DEF_PRIORITY) | 1800 | if (nr_reclaimed >= nr_to_reclaim && |
1801 | sc->priority < DEF_PRIORITY) | ||
1894 | break; | 1802 | break; |
1895 | } | 1803 | } |
1896 | blk_finish_plug(&plug); | 1804 | blk_finish_plug(&plug); |
@@ -1900,36 +1808,33 @@ restart: | |||
1900 | * Even if we did not try to evict anon pages at all, we want to | 1808 | * Even if we did not try to evict anon pages at all, we want to |
1901 | * rebalance the anon lru active/inactive ratio. | 1809 | * rebalance the anon lru active/inactive ratio. |
1902 | */ | 1810 | */ |
1903 | if (inactive_anon_is_low(mz)) | 1811 | if (inactive_anon_is_low(lruvec)) |
1904 | shrink_active_list(SWAP_CLUSTER_MAX, mz, sc, priority, 0); | 1812 | shrink_active_list(SWAP_CLUSTER_MAX, lruvec, |
1813 | sc, LRU_ACTIVE_ANON); | ||
1905 | 1814 | ||
1906 | /* reclaim/compaction might need reclaim to continue */ | 1815 | /* reclaim/compaction might need reclaim to continue */ |
1907 | if (should_continue_reclaim(mz, nr_reclaimed, | 1816 | if (should_continue_reclaim(lruvec, nr_reclaimed, |
1908 | sc->nr_scanned - nr_scanned, | 1817 | sc->nr_scanned - nr_scanned, sc)) |
1909 | priority, sc)) | ||
1910 | goto restart; | 1818 | goto restart; |
1911 | 1819 | ||
1912 | throttle_vm_writeout(sc->gfp_mask); | 1820 | throttle_vm_writeout(sc->gfp_mask); |
1913 | } | 1821 | } |
1914 | 1822 | ||
1915 | static void shrink_zone(int priority, struct zone *zone, | 1823 | static void shrink_zone(struct zone *zone, struct scan_control *sc) |
1916 | struct scan_control *sc) | ||
1917 | { | 1824 | { |
1918 | struct mem_cgroup *root = sc->target_mem_cgroup; | 1825 | struct mem_cgroup *root = sc->target_mem_cgroup; |
1919 | struct mem_cgroup_reclaim_cookie reclaim = { | 1826 | struct mem_cgroup_reclaim_cookie reclaim = { |
1920 | .zone = zone, | 1827 | .zone = zone, |
1921 | .priority = priority, | 1828 | .priority = sc->priority, |
1922 | }; | 1829 | }; |
1923 | struct mem_cgroup *memcg; | 1830 | struct mem_cgroup *memcg; |
1924 | 1831 | ||
1925 | memcg = mem_cgroup_iter(root, NULL, &reclaim); | 1832 | memcg = mem_cgroup_iter(root, NULL, &reclaim); |
1926 | do { | 1833 | do { |
1927 | struct mem_cgroup_zone mz = { | 1834 | struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg); |
1928 | .mem_cgroup = memcg, | 1835 | |
1929 | .zone = zone, | 1836 | shrink_lruvec(lruvec, sc); |
1930 | }; | ||
1931 | 1837 | ||
1932 | shrink_mem_cgroup_zone(priority, &mz, sc); | ||
1933 | /* | 1838 | /* |
1934 | * Limit reclaim has historically picked one memcg and | 1839 | * Limit reclaim has historically picked one memcg and |
1935 | * scanned it with decreasing priority levels until | 1840 | * scanned it with decreasing priority levels until |
@@ -2005,8 +1910,7 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc) | |||
2005 | * the caller that it should consider retrying the allocation instead of | 1910 | * the caller that it should consider retrying the allocation instead of |
2006 | * further reclaim. | 1911 | * further reclaim. |
2007 | */ | 1912 | */ |
2008 | static bool shrink_zones(int priority, struct zonelist *zonelist, | 1913 | static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc) |
2009 | struct scan_control *sc) | ||
2010 | { | 1914 | { |
2011 | struct zoneref *z; | 1915 | struct zoneref *z; |
2012 | struct zone *zone; | 1916 | struct zone *zone; |
@@ -2033,7 +1937,8 @@ static bool shrink_zones(int priority, struct zonelist *zonelist, | |||
2033 | if (global_reclaim(sc)) { | 1937 | if (global_reclaim(sc)) { |
2034 | if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) | 1938 | if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL)) |
2035 | continue; | 1939 | continue; |
2036 | if (zone->all_unreclaimable && priority != DEF_PRIORITY) | 1940 | if (zone->all_unreclaimable && |
1941 | sc->priority != DEF_PRIORITY) | ||
2037 | continue; /* Let kswapd poll it */ | 1942 | continue; /* Let kswapd poll it */ |
2038 | if (COMPACTION_BUILD) { | 1943 | if (COMPACTION_BUILD) { |
2039 | /* | 1944 | /* |
@@ -2065,7 +1970,7 @@ static bool shrink_zones(int priority, struct zonelist *zonelist, | |||
2065 | /* need some check for avoid more shrink_zone() */ | 1970 | /* need some check for avoid more shrink_zone() */ |
2066 | } | 1971 | } |
2067 | 1972 | ||
2068 | shrink_zone(priority, zone, sc); | 1973 | shrink_zone(zone, sc); |
2069 | } | 1974 | } |
2070 | 1975 | ||
2071 | return aborted_reclaim; | 1976 | return aborted_reclaim; |
@@ -2116,7 +2021,6 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, | |||
2116 | struct scan_control *sc, | 2021 | struct scan_control *sc, |
2117 | struct shrink_control *shrink) | 2022 | struct shrink_control *shrink) |
2118 | { | 2023 | { |
2119 | int priority; | ||
2120 | unsigned long total_scanned = 0; | 2024 | unsigned long total_scanned = 0; |
2121 | struct reclaim_state *reclaim_state = current->reclaim_state; | 2025 | struct reclaim_state *reclaim_state = current->reclaim_state; |
2122 | struct zoneref *z; | 2026 | struct zoneref *z; |
@@ -2129,9 +2033,9 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, | |||
2129 | if (global_reclaim(sc)) | 2033 | if (global_reclaim(sc)) |
2130 | count_vm_event(ALLOCSTALL); | 2034 | count_vm_event(ALLOCSTALL); |
2131 | 2035 | ||
2132 | for (priority = DEF_PRIORITY; priority >= 0; priority--) { | 2036 | do { |
2133 | sc->nr_scanned = 0; | 2037 | sc->nr_scanned = 0; |
2134 | aborted_reclaim = shrink_zones(priority, zonelist, sc); | 2038 | aborted_reclaim = shrink_zones(zonelist, sc); |
2135 | 2039 | ||
2136 | /* | 2040 | /* |
2137 | * Don't shrink slabs when reclaiming memory from | 2041 | * Don't shrink slabs when reclaiming memory from |
@@ -2173,7 +2077,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, | |||
2173 | 2077 | ||
2174 | /* Take a nap, wait for some writeback to complete */ | 2078 | /* Take a nap, wait for some writeback to complete */ |
2175 | if (!sc->hibernation_mode && sc->nr_scanned && | 2079 | if (!sc->hibernation_mode && sc->nr_scanned && |
2176 | priority < DEF_PRIORITY - 2) { | 2080 | sc->priority < DEF_PRIORITY - 2) { |
2177 | struct zone *preferred_zone; | 2081 | struct zone *preferred_zone; |
2178 | 2082 | ||
2179 | first_zones_zonelist(zonelist, gfp_zone(sc->gfp_mask), | 2083 | first_zones_zonelist(zonelist, gfp_zone(sc->gfp_mask), |
@@ -2181,7 +2085,7 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist, | |||
2181 | &preferred_zone); | 2085 | &preferred_zone); |
2182 | wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/10); | 2086 | wait_iff_congested(preferred_zone, BLK_RW_ASYNC, HZ/10); |
2183 | } | 2087 | } |
2184 | } | 2088 | } while (--sc->priority >= 0); |
2185 | 2089 | ||
2186 | out: | 2090 | out: |
2187 | delayacct_freepages_end(); | 2091 | delayacct_freepages_end(); |
@@ -2219,6 +2123,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, | |||
2219 | .may_unmap = 1, | 2123 | .may_unmap = 1, |
2220 | .may_swap = 1, | 2124 | .may_swap = 1, |
2221 | .order = order, | 2125 | .order = order, |
2126 | .priority = DEF_PRIORITY, | ||
2222 | .target_mem_cgroup = NULL, | 2127 | .target_mem_cgroup = NULL, |
2223 | .nodemask = nodemask, | 2128 | .nodemask = nodemask, |
2224 | }; | 2129 | }; |
@@ -2251,17 +2156,15 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg, | |||
2251 | .may_unmap = 1, | 2156 | .may_unmap = 1, |
2252 | .may_swap = !noswap, | 2157 | .may_swap = !noswap, |
2253 | .order = 0, | 2158 | .order = 0, |
2159 | .priority = 0, | ||
2254 | .target_mem_cgroup = memcg, | 2160 | .target_mem_cgroup = memcg, |
2255 | }; | 2161 | }; |
2256 | struct mem_cgroup_zone mz = { | 2162 | struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg); |
2257 | .mem_cgroup = memcg, | ||
2258 | .zone = zone, | ||
2259 | }; | ||
2260 | 2163 | ||
2261 | sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | | 2164 | sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | |
2262 | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); | 2165 | (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK); |
2263 | 2166 | ||
2264 | trace_mm_vmscan_memcg_softlimit_reclaim_begin(0, | 2167 | trace_mm_vmscan_memcg_softlimit_reclaim_begin(sc.order, |
2265 | sc.may_writepage, | 2168 | sc.may_writepage, |
2266 | sc.gfp_mask); | 2169 | sc.gfp_mask); |
2267 | 2170 | ||
@@ -2272,7 +2175,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg, | |||
2272 | * will pick up pages from other mem cgroup's as well. We hack | 2175 | * will pick up pages from other mem cgroup's as well. We hack |
2273 | * the priority and make it zero. | 2176 | * the priority and make it zero. |
2274 | */ | 2177 | */ |
2275 | shrink_mem_cgroup_zone(0, &mz, &sc); | 2178 | shrink_lruvec(lruvec, &sc); |
2276 | 2179 | ||
2277 | trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); | 2180 | trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed); |
2278 | 2181 | ||
@@ -2293,6 +2196,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, | |||
2293 | .may_swap = !noswap, | 2196 | .may_swap = !noswap, |
2294 | .nr_to_reclaim = SWAP_CLUSTER_MAX, | 2197 | .nr_to_reclaim = SWAP_CLUSTER_MAX, |
2295 | .order = 0, | 2198 | .order = 0, |
2199 | .priority = DEF_PRIORITY, | ||
2296 | .target_mem_cgroup = memcg, | 2200 | .target_mem_cgroup = memcg, |
2297 | .nodemask = NULL, /* we don't care the placement */ | 2201 | .nodemask = NULL, /* we don't care the placement */ |
2298 | .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | | 2202 | .gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) | |
@@ -2323,8 +2227,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, | |||
2323 | } | 2227 | } |
2324 | #endif | 2228 | #endif |
2325 | 2229 | ||
2326 | static void age_active_anon(struct zone *zone, struct scan_control *sc, | 2230 | static void age_active_anon(struct zone *zone, struct scan_control *sc) |
2327 | int priority) | ||
2328 | { | 2231 | { |
2329 | struct mem_cgroup *memcg; | 2232 | struct mem_cgroup *memcg; |
2330 | 2233 | ||
@@ -2333,14 +2236,11 @@ static void age_active_anon(struct zone *zone, struct scan_control *sc, | |||
2333 | 2236 | ||
2334 | memcg = mem_cgroup_iter(NULL, NULL, NULL); | 2237 | memcg = mem_cgroup_iter(NULL, NULL, NULL); |
2335 | do { | 2238 | do { |
2336 | struct mem_cgroup_zone mz = { | 2239 | struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg); |
2337 | .mem_cgroup = memcg, | ||
2338 | .zone = zone, | ||
2339 | }; | ||
2340 | 2240 | ||
2341 | if (inactive_anon_is_low(&mz)) | 2241 | if (inactive_anon_is_low(lruvec)) |
2342 | shrink_active_list(SWAP_CLUSTER_MAX, &mz, | 2242 | shrink_active_list(SWAP_CLUSTER_MAX, lruvec, |
2343 | sc, priority, 0); | 2243 | sc, LRU_ACTIVE_ANON); |
2344 | 2244 | ||
2345 | memcg = mem_cgroup_iter(NULL, memcg, NULL); | 2245 | memcg = mem_cgroup_iter(NULL, memcg, NULL); |
2346 | } while (memcg); | 2246 | } while (memcg); |
@@ -2449,7 +2349,6 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, | |||
2449 | { | 2349 | { |
2450 | int all_zones_ok; | 2350 | int all_zones_ok; |
2451 | unsigned long balanced; | 2351 | unsigned long balanced; |
2452 | int priority; | ||
2453 | int i; | 2352 | int i; |
2454 | int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ | 2353 | int end_zone = 0; /* Inclusive. 0 = ZONE_DMA */ |
2455 | unsigned long total_scanned; | 2354 | unsigned long total_scanned; |
@@ -2473,11 +2372,12 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order, | |||
2473 | }; | 2372 | }; |
2474 | loop_again: | 2373 | loop_again: |
2475 | total_scanned = 0; | 2374 | total_scanned = 0; |
2375 | sc.priority = DEF_PRIORITY; | ||
2476 | sc.nr_reclaimed = 0; | 2376 | sc.nr_reclaimed = 0; |
2477 | sc.may_writepage = !laptop_mode; | 2377 | sc.may_writepage = !laptop_mode; |
2478 | count_vm_event(PAGEOUTRUN); | 2378 | count_vm_event(PAGEOUTRUN); |
2479 | 2379 | ||
2480 | for (priority = DEF_PRIORITY; priority >= 0; priority--) { | 2380 | do { |
2481 | unsigned long lru_pages = 0; | 2381 | unsigned long lru_pages = 0; |
2482 | int has_under_min_watermark_zone = 0; | 2382 | int has_under_min_watermark_zone = 0; |
2483 | 2383 | ||
@@ -2494,14 +2394,15 @@ loop_again: | |||
2494 | if (!populated_zone(zone)) | 2394 | if (!populated_zone(zone)) |
2495 | continue; | 2395 | continue; |
2496 | 2396 | ||
2497 | if (zone->all_unreclaimable && priority != DEF_PRIORITY) | 2397 | if (zone->all_unreclaimable && |
2398 | sc.priority != DEF_PRIORITY) | ||
2498 | continue; | 2399 | continue; |
2499 | 2400 | ||
2500 | /* | 2401 | /* |
2501 | * Do some background aging of the anon list, to give | 2402 | * Do some background aging of the anon list, to give |
2502 | * pages a chance to be referenced before reclaiming. | 2403 | * pages a chance to be referenced before reclaiming. |
2503 | */ | 2404 | */ |
2504 | age_active_anon(zone, &sc, priority); | 2405 | age_active_anon(zone, &sc); |
2505 | 2406 | ||
2506 | /* | 2407 | /* |
2507 | * If the number of buffer_heads in the machine | 2408 | * If the number of buffer_heads in the machine |
@@ -2549,7 +2450,8 @@ loop_again: | |||
2549 | if (!populated_zone(zone)) | 2450 | if (!populated_zone(zone)) |
2550 | continue; | 2451 | continue; |
2551 | 2452 | ||
2552 | if (zone->all_unreclaimable && priority != DEF_PRIORITY) | 2453 | if (zone->all_unreclaimable && |
2454 | sc.priority != DEF_PRIORITY) | ||
2553 | continue; | 2455 | continue; |
2554 | 2456 | ||
2555 | sc.nr_scanned = 0; | 2457 | sc.nr_scanned = 0; |
@@ -2593,7 +2495,7 @@ loop_again: | |||
2593 | !zone_watermark_ok_safe(zone, testorder, | 2495 | !zone_watermark_ok_safe(zone, testorder, |
2594 | high_wmark_pages(zone) + balance_gap, | 2496 | high_wmark_pages(zone) + balance_gap, |
2595 | end_zone, 0)) { | 2497 | end_zone, 0)) { |
2596 | shrink_zone(priority, zone, &sc); | 2498 | shrink_zone(zone, &sc); |
2597 | 2499 | ||
2598 | reclaim_state->reclaimed_slab = 0; | 2500 | reclaim_state->reclaimed_slab = 0; |
2599 | nr_slab = shrink_slab(&shrink, sc.nr_scanned, lru_pages); | 2501 | nr_slab = shrink_slab(&shrink, sc.nr_scanned, lru_pages); |
@@ -2650,7 +2552,7 @@ loop_again: | |||
2650 | * OK, kswapd is getting into trouble. Take a nap, then take | 2552 | * OK, kswapd is getting into trouble. Take a nap, then take |
2651 | * another pass across the zones. | 2553 | * another pass across the zones. |
2652 | */ | 2554 | */ |
2653 | if (total_scanned && (priority < DEF_PRIORITY - 2)) { | 2555 | if (total_scanned && (sc.priority < DEF_PRIORITY - 2)) { |
2654 | if (has_under_min_watermark_zone) | 2556 | if (has_under_min_watermark_zone) |
2655 | count_vm_event(KSWAPD_SKIP_CONGESTION_WAIT); | 2557 | count_vm_event(KSWAPD_SKIP_CONGESTION_WAIT); |
2656 | else | 2558 | else |
@@ -2665,7 +2567,7 @@ loop_again: | |||
2665 | */ | 2567 | */ |
2666 | if (sc.nr_reclaimed >= SWAP_CLUSTER_MAX) | 2568 | if (sc.nr_reclaimed >= SWAP_CLUSTER_MAX) |
2667 | break; | 2569 | break; |
2668 | } | 2570 | } while (--sc.priority >= 0); |
2669 | out: | 2571 | out: |
2670 | 2572 | ||
2671 | /* | 2573 | /* |
@@ -2715,7 +2617,8 @@ out: | |||
2715 | if (!populated_zone(zone)) | 2617 | if (!populated_zone(zone)) |
2716 | continue; | 2618 | continue; |
2717 | 2619 | ||
2718 | if (zone->all_unreclaimable && priority != DEF_PRIORITY) | 2620 | if (zone->all_unreclaimable && |
2621 | sc.priority != DEF_PRIORITY) | ||
2719 | continue; | 2622 | continue; |
2720 | 2623 | ||
2721 | /* Would compaction fail due to lack of free memory? */ | 2624 | /* Would compaction fail due to lack of free memory? */ |
@@ -2786,7 +2689,10 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order, int classzone_idx) | |||
2786 | * them before going back to sleep. | 2689 | * them before going back to sleep. |
2787 | */ | 2690 | */ |
2788 | set_pgdat_percpu_threshold(pgdat, calculate_normal_threshold); | 2691 | set_pgdat_percpu_threshold(pgdat, calculate_normal_threshold); |
2789 | schedule(); | 2692 | |
2693 | if (!kthread_should_stop()) | ||
2694 | schedule(); | ||
2695 | |||
2790 | set_pgdat_percpu_threshold(pgdat, calculate_pressure_threshold); | 2696 | set_pgdat_percpu_threshold(pgdat, calculate_pressure_threshold); |
2791 | } else { | 2697 | } else { |
2792 | if (remaining) | 2698 | if (remaining) |
@@ -2982,6 +2888,7 @@ unsigned long shrink_all_memory(unsigned long nr_to_reclaim) | |||
2982 | .nr_to_reclaim = nr_to_reclaim, | 2888 | .nr_to_reclaim = nr_to_reclaim, |
2983 | .hibernation_mode = 1, | 2889 | .hibernation_mode = 1, |
2984 | .order = 0, | 2890 | .order = 0, |
2891 | .priority = DEF_PRIORITY, | ||
2985 | }; | 2892 | }; |
2986 | struct shrink_control shrink = { | 2893 | struct shrink_control shrink = { |
2987 | .gfp_mask = sc.gfp_mask, | 2894 | .gfp_mask = sc.gfp_mask, |
@@ -3052,14 +2959,17 @@ int kswapd_run(int nid) | |||
3052 | } | 2959 | } |
3053 | 2960 | ||
3054 | /* | 2961 | /* |
3055 | * Called by memory hotplug when all memory in a node is offlined. | 2962 | * Called by memory hotplug when all memory in a node is offlined. Caller must |
2963 | * hold lock_memory_hotplug(). | ||
3056 | */ | 2964 | */ |
3057 | void kswapd_stop(int nid) | 2965 | void kswapd_stop(int nid) |
3058 | { | 2966 | { |
3059 | struct task_struct *kswapd = NODE_DATA(nid)->kswapd; | 2967 | struct task_struct *kswapd = NODE_DATA(nid)->kswapd; |
3060 | 2968 | ||
3061 | if (kswapd) | 2969 | if (kswapd) { |
3062 | kthread_stop(kswapd); | 2970 | kthread_stop(kswapd); |
2971 | NODE_DATA(nid)->kswapd = NULL; | ||
2972 | } | ||
3063 | } | 2973 | } |
3064 | 2974 | ||
3065 | static int __init kswapd_init(void) | 2975 | static int __init kswapd_init(void) |
@@ -3159,7 +3069,6 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
3159 | const unsigned long nr_pages = 1 << order; | 3069 | const unsigned long nr_pages = 1 << order; |
3160 | struct task_struct *p = current; | 3070 | struct task_struct *p = current; |
3161 | struct reclaim_state reclaim_state; | 3071 | struct reclaim_state reclaim_state; |
3162 | int priority; | ||
3163 | struct scan_control sc = { | 3072 | struct scan_control sc = { |
3164 | .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE), | 3073 | .may_writepage = !!(zone_reclaim_mode & RECLAIM_WRITE), |
3165 | .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP), | 3074 | .may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP), |
@@ -3168,6 +3077,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
3168 | SWAP_CLUSTER_MAX), | 3077 | SWAP_CLUSTER_MAX), |
3169 | .gfp_mask = gfp_mask, | 3078 | .gfp_mask = gfp_mask, |
3170 | .order = order, | 3079 | .order = order, |
3080 | .priority = ZONE_RECLAIM_PRIORITY, | ||
3171 | }; | 3081 | }; |
3172 | struct shrink_control shrink = { | 3082 | struct shrink_control shrink = { |
3173 | .gfp_mask = sc.gfp_mask, | 3083 | .gfp_mask = sc.gfp_mask, |
@@ -3190,11 +3100,9 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order) | |||
3190 | * Free memory by calling shrink zone with increasing | 3100 | * Free memory by calling shrink zone with increasing |
3191 | * priorities until we have enough memory freed. | 3101 | * priorities until we have enough memory freed. |
3192 | */ | 3102 | */ |
3193 | priority = ZONE_RECLAIM_PRIORITY; | ||
3194 | do { | 3103 | do { |
3195 | shrink_zone(priority, zone, &sc); | 3104 | shrink_zone(zone, &sc); |
3196 | priority--; | 3105 | } while (sc.nr_reclaimed < nr_pages && --sc.priority >= 0); |
3197 | } while (priority >= 0 && sc.nr_reclaimed < nr_pages); | ||
3198 | } | 3106 | } |
3199 | 3107 | ||
3200 | nr_slab_pages0 = zone_page_state(zone, NR_SLAB_RECLAIMABLE); | 3108 | nr_slab_pages0 = zone_page_state(zone, NR_SLAB_RECLAIMABLE); |
@@ -3345,6 +3253,7 @@ void check_move_unevictable_pages(struct page **pages, int nr_pages) | |||
3345 | zone = pagezone; | 3253 | zone = pagezone; |
3346 | spin_lock_irq(&zone->lru_lock); | 3254 | spin_lock_irq(&zone->lru_lock); |
3347 | } | 3255 | } |
3256 | lruvec = mem_cgroup_page_lruvec(page, zone); | ||
3348 | 3257 | ||
3349 | if (!PageLRU(page) || !PageUnevictable(page)) | 3258 | if (!PageLRU(page) || !PageUnevictable(page)) |
3350 | continue; | 3259 | continue; |
@@ -3354,11 +3263,8 @@ void check_move_unevictable_pages(struct page **pages, int nr_pages) | |||
3354 | 3263 | ||
3355 | VM_BUG_ON(PageActive(page)); | 3264 | VM_BUG_ON(PageActive(page)); |
3356 | ClearPageUnevictable(page); | 3265 | ClearPageUnevictable(page); |
3357 | __dec_zone_state(zone, NR_UNEVICTABLE); | 3266 | del_page_from_lru_list(page, lruvec, LRU_UNEVICTABLE); |
3358 | lruvec = mem_cgroup_lru_move_lists(zone, page, | 3267 | add_page_to_lru_list(page, lruvec, lru); |
3359 | LRU_UNEVICTABLE, lru); | ||
3360 | list_move(&page->lru, &lruvec->lists[lru]); | ||
3361 | __inc_zone_state(zone, NR_INACTIVE_ANON + lru); | ||
3362 | pgrescued++; | 3268 | pgrescued++; |
3363 | } | 3269 | } |
3364 | } | 3270 | } |